From b288bbc8ab4ce464922a228995e8aa27aafcaf6d Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Mon, 4 May 2026 19:57:45 +0000 Subject: [PATCH 01/36] Implement groupby ``all``/``any`` via bool-coercion + min/max Both methods previously raised ``NotImplementedError``. Reduce ``all``/ ``any`` to ``min``/``max`` on a bool-coerced copy of the value columns: - Strings coerce as ``count_characters > 0`` so empty strings become ``False`` and nulls remain null (preserving them through the agg). - Numerics coerce as ``!= 0`` with the same null preservation. - ``skipna=False`` replaces nulls with ``True`` before the aggregation so that nulls don't flip ``all`` to ``False`` and trivially make ``any`` ``True``. - Empty groups (all-NA values, skipna=True) yield NA from min/max; pandas treats those as vacuously ``True`` for ``all`` and ``False`` for ``any``, so the result is filled accordingly. - ``min_count`` masks groups whose non-null count is below the threshold. Conftest update for ``test_string_dtype_all_na[*-all-*]`` and ``[*-any-*]`` (32 entries). The string-key DataFrame cases additionally rely on identity-based grouping-key column exclusion, which lands in a sibling PR; both must merge before the entries can be removed without xpassing. Co-Authored-By: Claude Opus 4.7 (1M context) --- python/cudf/cudf/core/groupby/groupby.py | 110 ++++++++++++++++-- .../cudf/pandas/scripts/conftest-patch.py | 32 ----- .../cudf/tests/groupby/test_reductions.py | 45 +++++++ 3 files changed, 148 insertions(+), 39 deletions(-) diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 0ad04470a5f..ddbbc0caf9c 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -32,7 +32,10 @@ from cudf.core.column_accessor import ColumnAccessor from cudf.core.common import pipe from cudf.core.copy_types import GatherMap -from cudf.core.dtype.validators import is_dtype_obj_numeric +from cudf.core.dtype.validators import ( + is_dtype_obj_numeric, + is_dtype_obj_string, +) from cudf.core.dtypes import ( CategoricalDtype, DecimalDtype, @@ -2991,18 +2994,111 @@ def ewm(self, *args, **kwargs): def any(self, skipna: bool = True, min_count: int = 0, **kwargs: Any): """ Return True if any value in the group is truthful, else False. - - Currently not implemented. """ - raise NotImplementedError("any is currently not implemented") + return self._bool_reduce("any", skipna=skipna, min_count=min_count) def all(self, skipna: bool = True, min_count: int = 0, **kwargs: Any): """ Return True if all values in the group are truthful, else False. - - Currently not implemented. """ - raise NotImplementedError("all is currently not implemented") + return self._bool_reduce("all", skipna=skipna, min_count=min_count) + + def _bool_reduce(self, op: str, *, skipna: bool, min_count: int): + """Implement all/any as min/max on bool-coerced value columns.""" + from cudf.core.dataframe import DataFrame + from cudf.core.series import Series + + agg_name = {"all": "min", "any": "max"}[op] + # Empty-group fill value: vacuously True for all, vacuously False for any + fill_value = op == "all" + + is_series = isinstance(self.obj, Series) + + # Coerce each value column to a (nullable) bool column so that + # nulls are preserved through the aggregation (min/max skip + # nulls). For ``skipna=False``, nulls are replaced with True so + # they don't flip ``all`` to False and always make ``any`` True. + def _to_bool_col(col): + from cudf.core.column import ColumnBase + + if isinstance(col.dtype, pd.StringDtype) or is_dtype_obj_string( + col.dtype + ): + counts_plc = plc.strings.attributes.count_characters( + col.plc_column + ) + gt_plc = plc.binaryop.binary_operation( + counts_plc, + plc.Scalar.from_py(0), + plc.binaryop.BinaryOperator.GREATER, + plc.DataType(plc.TypeId.BOOL8), + ) + bool_col = ColumnBase.create(gt_plc, np.dtype(np.bool_)) + else: + # For numeric/bool inputs, cast to bool preserving nulls. + ne_plc = plc.binaryop.binary_operation( + col.plc_column, + plc.Scalar.from_py(0), + plc.binaryop.BinaryOperator.NOT_EQUAL, + plc.DataType(plc.TypeId.BOOL8), + ) + bool_col = ColumnBase.create(ne_plc, np.dtype(np.bool_)) + if not skipna: + bool_col = bool_col.fillna(True) + return bool_col + + if is_series: + new_obj = Series._from_column( + _to_bool_col(self.obj._column), name=self.obj.name + ) + else: + new_data = { + col_name: _to_bool_col(self.obj._data[col_name]) + for col_name in self.grouping._values_column_names + } + new_obj = DataFrame._from_data(new_data, index=self.obj.index) + + # Reuse the same grouping so key columns match ``new_obj`` exactly, + # avoiding label-based lookup when the key column was excluded. + bool_gb = type(self)( + new_obj, + by=self.grouping, + level=None, + sort=self._sort, + as_index=self._as_index, + dropna=self._dropna, + ) + result = bool_gb.agg(agg_name) + + # Empty groups (skipna=True with all-NA values) yield NA from + # min/max — pandas treats these as ``True`` for ``all`` and + # ``False`` for ``any``. + bool_np = np.dtype(np.bool_) + if isinstance(result, Series): + result = result.fillna(fill_value).astype(bool_np) + else: + for col_name in result._column_names: + result[col_name] = ( + result[col_name].fillna(fill_value).astype(bool_np) + ) + + if min_count and min_count > 0: + counts = self.agg("count") + if isinstance(result, Series): + count_series = ( + counts if isinstance(counts, Series) else counts.iloc[:, 0] + ) + result = result.where(count_series >= min_count, None) + else: + for col_name in result._column_names: + if col_name not in counts._column_names: + continue + count_col = counts._data[col_name] + mask = count_col < min_count + result[col_name] = result[col_name].where( + ~Series._from_column(mask), None + ) + return result class DataFrameGroupBy(GroupBy, GetAttrGetItemMixin): diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py index 9817fb48c25..66dca3fdeb2 100644 --- a/python/cudf/cudf/pandas/scripts/conftest-patch.py +++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py @@ -3857,14 +3857,6 @@ def pytest_unconfigure(config): "tests/groupby/test_reductions.py::test_nunique_with_empty_series": "TODO: Add a reason for failure", "tests/groupby/test_reductions.py::test_nunique_with_object": "TODO: Add a reason for failure", "tests/groupby/test_reductions.py::test_nunique_with_timegrouper": "TODO: Add a reason for failure", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[pyarrow]-all-False-False-0]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[pyarrow]-all-False-False-1]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[pyarrow]-all-True-False-0]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[pyarrow]-all-True-False-1]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[pyarrow]-any-False-False-0]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[pyarrow]-any-False-False-1]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[pyarrow]-any-True-False-0]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[pyarrow]-any-True-False-1]": "AssertionError: DataFrame are different", "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[pyarrow]-count-False-False-0]": "AssertionError: DataFrame are different", "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[pyarrow]-count-False-False-1]": "AssertionError: DataFrame are different", "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[pyarrow]-count-True-False-0]": "AssertionError: DataFrame are different", @@ -3897,14 +3889,6 @@ def pytest_unconfigure(config): "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[pyarrow]-sum-False-False-1]": "AssertionError: DataFrame are different", "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[pyarrow]-sum-True-False-0]": "AssertionError: DataFrame are different", "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[pyarrow]-sum-True-False-1]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[python]-all-False-False-0]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[python]-all-False-False-1]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[python]-all-True-False-0]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[python]-all-True-False-1]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[python]-any-False-False-0]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[python]-any-False-False-1]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[python]-any-True-False-0]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[python]-any-True-False-1]": "AssertionError: DataFrame are different", "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[python]-count-False-False-0]": "AssertionError: DataFrame are different", "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[python]-count-False-False-1]": "AssertionError: DataFrame are different", "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[python]-count-True-False-0]": "AssertionError: DataFrame are different", @@ -3945,14 +3929,6 @@ def pytest_unconfigure(config): "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[python]-sum-False-False-1]": "AssertionError: DataFrame are different", "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[python]-sum-True-False-0]": "AssertionError: DataFrame are different", "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=str[python]-sum-True-False-1]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=string[pyarrow]-all-False-False-0]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=string[pyarrow]-all-False-False-1]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=string[pyarrow]-all-True-False-0]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=string[pyarrow]-all-True-False-1]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=string[pyarrow]-any-False-False-0]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=string[pyarrow]-any-False-False-1]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=string[pyarrow]-any-True-False-0]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=string[pyarrow]-any-True-False-1]": "AssertionError: DataFrame are different", "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=string[pyarrow]-count-False-False-0]": "AssertionError: DataFrame are different", "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=string[pyarrow]-count-False-False-1]": "AssertionError: DataFrame are different", "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=string[pyarrow]-count-False-True-0]": "AssertionError: Attributes of Series are different", @@ -4001,14 +3977,6 @@ def pytest_unconfigure(config): "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=string[pyarrow]-sum-False-False-1]": "AssertionError: DataFrame are different", "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=string[pyarrow]-sum-True-False-0]": "AssertionError: DataFrame are different", "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=string[pyarrow]-sum-True-False-1]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=string[python]-all-False-False-0]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=string[python]-all-False-False-1]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=string[python]-all-True-False-0]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=string[python]-all-True-False-1]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=string[python]-any-False-False-0]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=string[python]-any-False-False-1]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=string[python]-any-True-False-0]": "AssertionError: DataFrame are different", - "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=string[python]-any-True-False-1]": "AssertionError: DataFrame are different", "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=string[python]-count-False-False-0]": "AssertionError: DataFrame are different", "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=string[python]-count-False-False-1]": "AssertionError: DataFrame are different", "tests/groupby/test_reductions.py::test_string_dtype_all_na[string=string[python]-count-False-True-0]": "AssertionError: Attributes of Series are different", diff --git a/python/cudf/cudf/tests/groupby/test_reductions.py b/python/cudf/cudf/tests/groupby/test_reductions.py index fc664bae59a..2e9efe26f47 100644 --- a/python/cudf/cudf/tests/groupby/test_reductions.py +++ b/python/cudf/cudf/tests/groupby/test_reductions.py @@ -1189,3 +1189,48 @@ def test_string_groupby_key_index(): got = gdf.groupby("a", sort=True).count() assert_eq(expect, got, check_dtype=False) + + +@pytest.mark.parametrize("op", ["all", "any"]) +@pytest.mark.parametrize( + "data", + [ + [True, False, True, True, False, False], + [1, 0, 2, 3, 0, 0], + [1.0, 0.0, 2.5, 3.5, 0.0, 0.0], + ], +) +def test_groupby_all_any(op, data): + pdf = pd.DataFrame({"a": [1, 1, 2, 2, 3, 3], "b": data}) + gdf = cudf.from_pandas(pdf) + with cudf.option_context("mode.pandas_compatible", True): + got = getattr(gdf.groupby("a"), op)() + expect = getattr(pdf.groupby("a"), op)() + assert_eq(expect, got) + + +@pytest.mark.parametrize("op", ["all", "any"]) +def test_groupby_all_any_string(op): + pdf = pd.DataFrame( + {"a": [1, 1, 2, 2, 3, 3], "b": ["x", "", "", "", "y", "z"]} + ) + gdf = cudf.from_pandas(pdf) + with cudf.option_context("mode.pandas_compatible", True): + got = getattr(gdf.groupby("a"), op)() + expect = getattr(pdf.groupby("a"), op)() + assert_eq(expect, got) + + +@pytest.mark.parametrize("op", ["all", "any"]) +def test_groupby_all_any_empty(op): + pdf = pd.DataFrame( + { + "a": pd.array([], dtype="int64"), + "b": pd.array([], dtype="bool"), + } + ) + gdf = cudf.from_pandas(pdf) + with cudf.option_context("mode.pandas_compatible", True): + got = getattr(gdf.groupby("a"), op)() + expect = getattr(pdf.groupby("a"), op)() + assert_eq(expect, got, check_index_type=False) From 8992d3983daab6ae9c4db6f091d3c4a8e8f47f70 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 6 May 2026 15:49:03 -0500 Subject: [PATCH 02/36] Apply suggestions from code review Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- python/cudf/cudf/core/groupby/groupby.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index ddbbc0caf9c..8538953ea7e 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -3021,9 +3021,7 @@ def _bool_reduce(self, op: str, *, skipna: bool, min_count: int): def _to_bool_col(col): from cudf.core.column import ColumnBase - if isinstance(col.dtype, pd.StringDtype) or is_dtype_obj_string( - col.dtype - ): + if is_dtype_obj_string(col.dtype): counts_plc = plc.strings.attributes.count_characters( col.plc_column ) From 82852237e27b48b42b5f9afe961b236abe325740 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Mon, 4 May 2026 16:56:54 -0500 Subject: [PATCH 03/36] Reject pd.NA string-to-object casts in pandas-compatible mode (#22295) ## Description In pandas-compatible mode, reject casting nullable string columns that use `pd.NA` as their missing-value sentinel to numpy `object` dtype. This came from a pandas 3 compatibility issue in `cudf.pandas`: pandas preserves `pd.NA` when `StringDtype(na_value=pd.NA)` is cast to `object`, while cuDF's string-to-object path materializes nulls as Python `None`. Preserving that sentinel would require carrying source dtype metadata after the result has become plain `object`, which the review pointed out is not a good fit for the current column model. Instead, when `mode.pandas_compatible` is enabled, this PR now raises in `StringColumn.as_string_column` for: - `pd.StringDtype(..., na_value=pd.NA)` -> `object` - string `pd.ArrowDtype` -> `object` Outside pandas-compatible mode, the existing string-to-object cast behavior is unchanged. String dtypes that use `np.nan` as their missing-value sentinel and ordinary object string columns also keep the existing behavior. ## Changes - Add an explicit pandas-compatible-mode `NotImplementedError` for nullable `pd.NA` string-to-object casts in `python/cudf/cudf/core/column/string.py`. - Add focused coverage in `python/cudf/cudf/tests/series/methods/test_astype.py` for both pandas-compatible and non-pandas-compatible behavior. - Remove the previous per-instance `_PANDAS_NA_VALUE` override path. ## Checklist - [x] I am familiar with the [Contributing Guidelines](https://github.com/rapidsai/cudf/blob/HEAD/CONTRIBUTING.md). - [x] New or existing tests cover these changes. - [x] The documentation is up to date with these changes. --- python/cudf/cudf/core/column/string.py | 12 ++ .../cudf/pandas/scripts/conftest-patch.py | 126 ------------------ .../cudf/tests/series/methods/test_astype.py | 40 ++++++ 3 files changed, 52 insertions(+), 126 deletions(-) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 1e13c23ec8d..47ab9262ac5 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -338,6 +338,18 @@ def as_string_column(self, dtype: DtypeObj) -> Self: if isinstance(dtype, np.dtype) and dtype.kind == "U": dtype = np.dtype("object") if dtype != self.dtype: + if ( + cudf.get_option("mode.pandas_compatible") + and self.null_count != 0 + and isinstance(dtype, np.dtype) + and dtype == np.dtype("O") + and isinstance(self.dtype, (pd.StringDtype, pd.ArrowDtype)) + and self.dtype.na_value is pd.NA + ): + raise NotImplementedError( + "Casting nullable string columns with pd.NA to object " + "is not supported." + ) return cast(Self, ColumnBase.create(self.plc_column, dtype)) return self diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py index 66dca3fdeb2..8e65569d557 100644 --- a/python/cudf/cudf/pandas/scripts/conftest-patch.py +++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py @@ -1425,7 +1425,6 @@ def pytest_unconfigure(config): "tests/extension/test_arrow.py::TestArrowArray::test_grouping_grouper[uint32]": "AssertionError: ndarray Expected type , found instead", "tests/extension/test_arrow.py::TestArrowArray::test_grouping_grouper[uint64]": "AssertionError: ndarray Expected type , found instead", "tests/extension/test_arrow.py::TestArrowArray::test_grouping_grouper[uint8]": "AssertionError: ndarray Expected type , found instead", - "tests/extension/test_arrow.py::TestArrowArray::test_loc_setitem_with_expansion_preserves_ea_index_dtype[string]": "AssertionError: DataFrame.index are different", "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[bool-prod-True]": "AssertionError: Attributes of ExtensionArray are different", "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[bool-sum-True]": "AssertionError: Attributes of ExtensionArray are different", "tests/extension/test_arrow.py::TestArrowArray::test_reduce_frame[decimal128(7, 3)-mean-False]": "TODO: Add a reason for failure", @@ -1491,13 +1490,6 @@ def pytest_unconfigure(config): "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[uint64-kurt-True]": "TODO: Add a reason for failure", "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[uint8-kurt-False]": "TODO: Add a reason for failure", "tests/extension/test_arrow.py::TestArrowArray::test_reduce_series_numeric[uint8-kurt-True]": "TODO: Add a reason for failure", - "tests/extension/test_arrow.py::TestArrowArray::test_setitem_series[string-full_slice]": "AssertionError: Series are different", - "tests/extension/test_arrow.py::TestArrowArray::test_setitem_series[string-index]": "AssertionError: Series are different", - "tests/extension/test_arrow.py::TestArrowArray::test_setitem_series[string-list(range)]": "AssertionError: Series are different", - "tests/extension/test_arrow.py::TestArrowArray::test_setitem_series[string-list[index]]": "AssertionError: Series are different", - "tests/extension/test_arrow.py::TestArrowArray::test_setitem_series[string-mask]": "AssertionError: Series are different", - "tests/extension/test_arrow.py::TestArrowArray::test_setitem_series[string-null_slice]": "AssertionError: Series are different", - "tests/extension/test_arrow.py::TestArrowArray::test_setitem_series[string-range]": "AssertionError: Series are different", "tests/extension/test_arrow.py::TestArrowArray::test_setitem_with_expansion_row[string]": "AssertionError: Attributes of DataFrame.iloc[:, 0] (column name='data') are different", "tests/extension/test_arrow.py::TestArrowArray::test_unstack[decimal128(7, 3)-frame-index1]": "AssertionError: DataFrame.iloc[:, 1] (column name='('A', 'B')') are different", "tests/extension/test_arrow.py::TestArrowArray::test_unstack[decimal128(7, 3)-frame-index2]": "AssertionError: DataFrame.iloc[:, 1] (column name='('A', 'B')') are different", @@ -1505,12 +1497,6 @@ def pytest_unconfigure(config): "tests/extension/test_arrow.py::TestArrowArray::test_unstack[decimal128(7, 3)-series-index1]": "AssertionError: DataFrame.iloc[:, 1] (column name='B') are different", "tests/extension/test_arrow.py::TestArrowArray::test_unstack[decimal128(7, 3)-series-index2]": "AssertionError: DataFrame.iloc[:, 1] (column name='B') are different", "tests/extension/test_arrow.py::TestArrowArray::test_unstack[decimal128(7, 3)-series-index3]": "AssertionError: DataFrame.iloc[:, 0] (column name='A') are different", - "tests/extension/test_arrow.py::TestArrowArray::test_unstack[string-frame-index1]": "AssertionError: DataFrame.iloc[:, 1] (column name='('A', 'B')') are different", - "tests/extension/test_arrow.py::TestArrowArray::test_unstack[string-frame-index2]": "AssertionError: DataFrame.iloc[:, 1] (column name='('A', 'B')') are different", - "tests/extension/test_arrow.py::TestArrowArray::test_unstack[string-frame-index3]": "AssertionError: DataFrame.iloc[:, 0] (column name='('A', 'A')') are different", - "tests/extension/test_arrow.py::TestArrowArray::test_unstack[string-series-index1]": "AssertionError: DataFrame.iloc[:, 1] (column name='B') are different", - "tests/extension/test_arrow.py::TestArrowArray::test_unstack[string-series-index2]": "AssertionError: DataFrame.iloc[:, 1] (column name='B') are different", - "tests/extension/test_arrow.py::TestArrowArray::test_unstack[string-series-index3]": "AssertionError: DataFrame.iloc[:, 0] (column name='A') are different", "tests/extension/test_arrow.py::TestLogicalOps::test_kleene_and": "TODO: Add a reason for failure", "tests/extension/test_arrow.py::TestLogicalOps::test_kleene_and_scalar[False-expected3]": "TODO: Add a reason for failure", "tests/extension/test_arrow.py::TestLogicalOps::test_kleene_and_scalar[None-expected0]": "TODO: Add a reason for failure", @@ -2090,30 +2076,6 @@ def pytest_unconfigure(config): "tests/extension/test_string.py::TestStringArray::test_unstack[string=str[python]-True-series-index1]": "AssertionError: DataFrame.iloc[:, 1] (column name='B') are different", "tests/extension/test_string.py::TestStringArray::test_unstack[string=str[python]-True-series-index2]": "AssertionError: DataFrame.iloc[:, 1] (column name='B') are different", "tests/extension/test_string.py::TestStringArray::test_unstack[string=str[python]-True-series-index3]": "AssertionError: DataFrame.iloc[:, 0] (column name='A') are different", - "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[pyarrow]-False-frame-index1]": "AssertionError: DataFrame.iloc[:, 1] (column name='('A', 'B')') are different", - "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[pyarrow]-False-frame-index2]": "AssertionError: DataFrame.iloc[:, 1] (column name='('A', 'B')') are different", - "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[pyarrow]-False-frame-index3]": "AssertionError: DataFrame.iloc[:, 0] (column name='('A', 'A')') are different", - "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[pyarrow]-False-series-index1]": "AssertionError: DataFrame.iloc[:, 1] (column name='B') are different", - "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[pyarrow]-False-series-index2]": "AssertionError: DataFrame.iloc[:, 1] (column name='B') are different", - "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[pyarrow]-False-series-index3]": "AssertionError: DataFrame.iloc[:, 0] (column name='A') are different", - "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[pyarrow]-True-frame-index1]": "AssertionError: DataFrame.iloc[:, 1] (column name='('A', 'B')') are different", - "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[pyarrow]-True-frame-index2]": "AssertionError: DataFrame.iloc[:, 1] (column name='('A', 'B')') are different", - "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[pyarrow]-True-frame-index3]": "AssertionError: DataFrame.iloc[:, 0] (column name='('A', 'A')') are different", - "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[pyarrow]-True-series-index1]": "AssertionError: DataFrame.iloc[:, 1] (column name='B') are different", - "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[pyarrow]-True-series-index2]": "AssertionError: DataFrame.iloc[:, 1] (column name='B') are different", - "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[pyarrow]-True-series-index3]": "AssertionError: DataFrame.iloc[:, 0] (column name='A') are different", - "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[python]-False-frame-index1]": "AssertionError: DataFrame.iloc[:, 1] (column name='('A', 'B')') are different", - "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[python]-False-frame-index2]": "AssertionError: DataFrame.iloc[:, 1] (column name='('A', 'B')') are different", - "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[python]-False-frame-index3]": "AssertionError: DataFrame.iloc[:, 0] (column name='('A', 'A')') are different", - "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[python]-False-series-index1]": "AssertionError: DataFrame.iloc[:, 1] (column name='B') are different", - "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[python]-False-series-index2]": "AssertionError: DataFrame.iloc[:, 1] (column name='B') are different", - "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[python]-False-series-index3]": "AssertionError: DataFrame.iloc[:, 0] (column name='A') are different", - "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[python]-True-frame-index1]": "AssertionError: DataFrame.iloc[:, 1] (column name='('A', 'B')') are different", - "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[python]-True-frame-index2]": "AssertionError: DataFrame.iloc[:, 1] (column name='('A', 'B')') are different", - "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[python]-True-frame-index3]": "AssertionError: DataFrame.iloc[:, 0] (column name='('A', 'A')') are different", - "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[python]-True-series-index1]": "AssertionError: DataFrame.iloc[:, 1] (column name='B') are different", - "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[python]-True-series-index2]": "AssertionError: DataFrame.iloc[:, 1] (column name='B') are different", - "tests/extension/test_string.py::TestStringArray::test_unstack[string=string[python]-True-series-index3]": "AssertionError: DataFrame.iloc[:, 0] (column name='A') are different", "tests/frame/constructors/test_from_records.py::TestFromRecords::test_from_records_empty": "AssertionError: Attributes of DataFrame.iloc[:, 0] (column name='a') are different", "tests/frame/constructors/test_from_records.py::TestFromRecords::test_from_records_empty_iterator_with_preserve_columns": "AssertionError: Attributes of DataFrame.iloc[:, 0] (column name='col_1') are different", "tests/frame/constructors/test_from_records.py::TestFromRecords::test_from_records_misc_brokenness": "AssertionError: Attributes of DataFrame.iloc[:, 0] (column name='a') are different", @@ -4475,7 +4437,6 @@ def pytest_unconfigure(config): "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_with_method_numeric_vs_bool[backfill]": "Failed: DID NOT RAISE ", "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_with_method_numeric_vs_bool[nearest]": "Failed: DID NOT RAISE ", "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_indexer_with_method_numeric_vs_bool[pad]": "Failed: DID NOT RAISE ", - "tests/indexes/numeric/test_indexing.py::TestGetIndexer::test_get_loc_masked_na_and_nan[False]": "ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()", "tests/indexes/numeric/test_numeric.py::TestFloatNumericIndex::test_equals_numeric": "TODO: Add a reason for failure", "tests/indexes/numeric/test_numeric.py::TestFloatNumericIndex::test_type_coercion_fail[int16]": "TODO: Add a reason for failure", "tests/indexes/numeric/test_numeric.py::TestFloatNumericIndex::test_type_coercion_fail[int32]": "TODO: Add a reason for failure", @@ -4558,12 +4519,6 @@ def pytest_unconfigure(config): "tests/indexes/test_any_index.py::TestConversion::test_to_series_with_arguments[uint8]": "TODO: Add a reason for failure", "tests/indexes/test_base.py::TestIndex::test_cached_properties_not_settable": "TODO: Add a reason for failure", "tests/indexes/test_base.py::TestIndex::test_constructor_dtypes_datetime[None-Index-values]": "TODO: Add a reason for failure", - "tests/indexes/test_base.py::TestIndex::test_empty_fancy[bool-float32]": "Failed: DID NOT RAISE ", - "tests/indexes/test_base.py::TestIndex::test_empty_fancy[bool-float64]": "Failed: DID NOT RAISE ", - "tests/indexes/test_base.py::TestIndex::test_empty_fancy[bool-int32]": "Failed: DID NOT RAISE ", - "tests/indexes/test_base.py::TestIndex::test_empty_fancy[bool-int64]": "Failed: DID NOT RAISE ", - "tests/indexes/test_base.py::TestIndex::test_empty_fancy[bool-uint32]": "Failed: DID NOT RAISE ", - "tests/indexes/test_base.py::TestIndex::test_empty_fancy[bool-uint64]": "Failed: DID NOT RAISE ", "tests/indexes/test_base.py::TestIndex::test_equals_op_mismatched_multiindex_raises[index0]": "TODO: Add a reason for failure", "tests/indexes/test_base.py::TestIndex::test_is_": "TODO: Add a reason for failure", "tests/indexes/test_base.py::TestIndex::test_is_object[string-True]": "AssertionError: assert True is False", @@ -5999,7 +5954,6 @@ def pytest_unconfigure(config): "tests/resample/test_resample_api.py::test_agg_with_lambda[df_resample-agg1]": "TODO: Add a reason for failure", "tests/resample/test_resample_api.py::test_agg_with_lambda[df_resample-agg2]": "TODO: Add a reason for failure", "tests/resample/test_resample_api.py::test_groupby_resample_api": "TODO: Add a reason for failure", - "tests/resample/test_resample_api.py::test_resample_group_keys": "TODO: Add a reason for failure", "tests/resample/test_resample_api.py::test_transform_frame[None]": "TODO: Add a reason for failure", "tests/resample/test_resample_api.py::test_transform_frame[date]": "TODO: Add a reason for failure", "tests/resample/test_resampler_grouper.py::test_apply_columns_multilevel": "AssertionError: (, None)", @@ -6200,8 +6154,6 @@ def pytest_unconfigure(config): "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_on_ints_floats[int_vals1-float_vals1-exp_vals1]": "TODO: Add a reason for failure", "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_on_ints_floats[int_vals2-float_vals2-exp_vals2]": "TODO: Add a reason for failure", "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_on_ints_floats_warning": "TODO: Add a reason for failure", - "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_with_intc_columns": "AssertionError: DataFrame.iloc[:, 0] (column name='a') are different", - "tests/reshape/merge/test_merge.py::TestMergeDtypes::test_merge_with_uintc_columns": "AssertionError: DataFrame.iloc[:, 0] (column name='a') are different", "tests/reshape/merge/test_merge.py::test_merge_arrow_and_numpy_dtypes[int64]": "AssertionError: Attributes of DataFrame.iloc[:, 0] (column name='a') are different", "tests/reshape/merge/test_merge.py::test_merge_different_index_names": "TODO: Add a reason for failure", "tests/reshape/merge/test_merge.py::test_merge_ea_and_non_ea[Float32-right]": "AssertionError: Attributes of DataFrame.iloc[:, 0] (column name='a') are different", @@ -7356,86 +7308,8 @@ def pytest_unconfigure(config): "tests/strings/test_split_partition.py::test_partition_series_stdlib[string=string[pyarrow]-rpartition]": "TODO: Add a reason for failure", "tests/strings/test_split_partition.py::test_partition_series_stdlib[string=string[python]-partition]": "TODO: Add a reason for failure", "tests/strings/test_split_partition.py::test_partition_series_stdlib[string=string[python]-rpartition]": "TODO: Add a reason for failure", - "tests/strings/test_split_partition.py::test_split_blank_string_with_non_empty[string=object]": "AssertionError: Attributes of DataFrame.iloc[:, 0] (column name='0') are different", "tests/strings/test_split_partition.py::test_split_nan_expand[string=object]": "AssertionError: DataFrame.iloc[:, 0] (column name='0') are different", - "tests/strings/test_split_partition.py::test_split_to_dataframe_unequal_splits[string=object]": "AssertionError: Attributes of DataFrame.iloc[:, 3] (column name='3') are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-capitalize]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-casefold]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-cat1]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-center]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-extract0]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-extract1]": "AssertionError: DataFrame.iloc[:, 0] (column name='0') are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-extractall]": "AssertionError: DataFrame.iloc[:, 0] (column name='0') are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-get]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-join]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-ljust]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-lower]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-lstrip]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-normalize]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-pad]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-partition1]": "AssertionError: DataFrame.iloc[:, 0] (column name='0') are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-partition2]": "AssertionError: DataFrame.iloc[:, 0] (column name='0') are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-removeprefix]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-removesuffix]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-repeat]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-replace]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-rjust]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-rpartition1]": "AssertionError: DataFrame.iloc[:, 0] (column name='0') are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-rpartition2]": "AssertionError: DataFrame.iloc[:, 0] (column name='0') are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-rstrip]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-slice0]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-slice1]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-slice_replace0]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-slice_replace1]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-split1]": "AssertionError: DataFrame.iloc[:, 0] (column name='0') are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-strip]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-swapcase]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-title]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-translate]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-upper]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-wrap]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[pyarrow]-zfill]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-capitalize]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-casefold]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-cat1]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-center]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-extract0]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-extract1]": "AssertionError: DataFrame.iloc[:, 0] (column name='0') are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-extractall]": "AssertionError: DataFrame.iloc[:, 0] (column name='0') are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-get]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-join]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-ljust]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-lower]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-lstrip]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-normalize]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-pad]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-partition1]": "AssertionError: DataFrame.iloc[:, 0] (column name='0') are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-partition2]": "AssertionError: DataFrame.iloc[:, 0] (column name='0') are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-removeprefix]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-removesuffix]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-repeat]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-replace]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-rjust]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-rpartition1]": "AssertionError: DataFrame.iloc[:, 0] (column name='0') are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-rpartition2]": "AssertionError: DataFrame.iloc[:, 0] (column name='0') are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-rstrip]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-slice0]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-slice1]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-slice_replace0]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-slice_replace1]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-split1]": "AssertionError: DataFrame.iloc[:, 0] (column name='0') are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-strip]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-swapcase]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-title]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-translate]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-upper]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-wrap]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array[string[python]-zfill]": "AssertionError: Series are different", - "tests/strings/test_string_array.py::test_string_array_extract[string[pyarrow]]": "AssertionError: DataFrame.iloc[:, 0] (column name='0') are different", - "tests/strings/test_string_array.py::test_string_array_extract[string[python]]": "AssertionError: DataFrame.iloc[:, 0] (column name='0') are different", "tests/strings/test_strings.py::test_index_str_accessor_multiindex_raises": "TODO: Add a reason for failure", - "tests/strings/test_strings.py::test_split_join_roundtrip[string=str[pyarrow]]": "AssertionError: Series are different", - "tests/strings/test_strings.py::test_split_join_roundtrip[string=str[python]]": "AssertionError: Series are different", "tests/strings/test_strings.py::test_split_join_roundtrip[string=string[pyarrow]]": "AssertionError: Series are different", "tests/strings/test_strings.py::test_split_join_roundtrip[string=string[python]]": "AssertionError: Series are different", "tests/strings/test_strings.py::test_string_slice_out_of_bounds[string=object]": "AssertionError: Series are different", diff --git a/python/cudf/cudf/tests/series/methods/test_astype.py b/python/cudf/cudf/tests/series/methods/test_astype.py index 2f809b7c6d9..bf6a1b866e0 100644 --- a/python/cudf/cudf/tests/series/methods/test_astype.py +++ b/python/cudf/cudf/tests/series/methods/test_astype.py @@ -69,6 +69,46 @@ def test_series_typecast_to_object(): assert new_series[0] == "1970-01-01 00:00:00.000000001" +@pytest.mark.parametrize( + "dtype", + [ + pd.StringDtype(storage="python", na_value=pd.NA), + pd.StringDtype(storage="pyarrow", na_value=pd.NA), + pd.ArrowDtype(pa.string()), + ], +) +def test_string_astype_object_pd_na_pandas_compat(dtype): + sr = cudf.Series(["a", None, "b"], dtype=dtype) + + with cudf.option_context("mode.pandas_compatible", True): + with pytest.raises( + NotImplementedError, + match="Casting nullable string columns with pd.NA to object", + ): + sr.astype(object) + + with cudf.option_context("mode.pandas_compatible", False): + result = sr.astype(object) + assert result.dtype == np.dtype("object") + + +@pytest.mark.parametrize( + "dtype", + [ + pd.StringDtype(storage="python", na_value=pd.NA), + pd.StringDtype(storage="pyarrow", na_value=pd.NA), + pd.ArrowDtype(pa.string()), + ], +) +def test_string_astype_object_pd_na_pandas_compat_no_nulls(dtype): + sr = cudf.Series(["a", "b", "c"], dtype=dtype) + + with cudf.option_context("mode.pandas_compatible", True): + result = sr.astype(object) + assert result.dtype == np.dtype("object") + assert result.to_arrow().to_pylist() == ["a", "b", "c"] + + @pytest.mark.parametrize( "dtype", [ From ca6dddcf990ed72df0f64887eb3afbb1aaa647cc Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Mon, 4 May 2026 20:24:34 +0200 Subject: [PATCH 04/36] Remove legacy Dask-based streaming backends (#22358) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drops the legacy `Cluster.DISTRIBUTED` cluster and the entire `rapidsmpf.integrations.dask` execution path. The new `DaskEngine` (`Cluster.DASK`) is unaffected. Note: all removed components were under `experimental`, so no deprecation period is required. **What’s removed** * `Cluster.DISTRIBUTED` enum value and all dispatch paths (`rapidsmpf/core.py`, `parallel.py:get_scheduler`) * `experimental/dask_registers.py`, `experimental/spilling.py`, `experimental/rapidsmpf/dask.py` * `rapidsmpf_distributed_available()`, `StreamingExecutor.rapidsmpf_spill`, and `cluster_kind` plumbing in `shuffle.py` and `sort.py` * Legacy benchmark harness (`benchmarks/utils_legacy.py`) and the `utils.py` dispatch shim * Legacy test suite (`tests/experimental/legacy/`) and Dask registration test files **What stays** * `Cluster.DASK` / `DaskEngine` (`frontend/dask.py`), the supported Dask backend * `Cluster.SINGLE`, `SPMD`, and `RAY` streaming frontends * The task-graph backend (`Runtime.TASKS`). Authors: - Mads R. B. Kristensen (https://github.com/madsbk) Approvers: - Peter Andreas Entschev (https://github.com/pentschev) - Matthew Murray (https://github.com/Matt711) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/22358 --- ci/run_cudf_polars_experimental_pytests.sh | 8 +- python/cudf_polars/cudf_polars/callback.py | 12 +- .../experimental/benchmarks/utils.py | 59 +- .../experimental/benchmarks/utils_legacy.py | 2100 ----------------- .../experimental/dask_registers.py | 227 -- .../cudf_polars/experimental/io.py | 10 +- .../cudf_polars/experimental/parallel.py | 77 +- .../experimental/rapidsmpf/core.py | 31 +- .../experimental/rapidsmpf/dask.py | 194 -- .../cudf_polars/experimental/rapidsmpf/io.py | 2 +- .../cudf_polars/experimental/shuffle.py | 65 +- .../cudf_polars/experimental/sort.py | 65 +- .../cudf_polars/experimental/spilling.py | 148 -- .../cudf_polars/cudf_polars/utils/config.py | 97 +- python/cudf_polars/tests/conftest.py | 8 +- .../tests/experimental/legacy/__init__.py | 8 - .../tests/experimental/legacy/conftest.py | 46 - .../experimental/legacy/test_distributed.py | 67 - .../tests/experimental/legacy/test_explain.py | 89 - .../experimental/legacy/test_parallel.py | 127 - .../tests/experimental/legacy/test_shuffle.py | 101 - .../experimental/legacy/test_shuffler.py | 79 - .../tests/experimental/legacy/test_sort.py | 152 -- .../tests/experimental/test_dask_serialize.py | 129 - .../tests/experimental/test_dask_sizeof.py | 32 - .../tests/experimental/test_dask_tokenize.py | 32 - .../tests/experimental/test_io_multirank.py | 2 +- .../tests/experimental/test_sink.py | 13 - python/cudf_polars/tests/test_config.py | 77 +- python/cudf_polars/tests/test_profile.py | 5 +- 30 files changed, 106 insertions(+), 3956 deletions(-) delete mode 100644 python/cudf_polars/cudf_polars/experimental/benchmarks/utils_legacy.py delete mode 100644 python/cudf_polars/cudf_polars/experimental/dask_registers.py delete mode 100644 python/cudf_polars/cudf_polars/experimental/rapidsmpf/dask.py delete mode 100644 python/cudf_polars/cudf_polars/experimental/spilling.py delete mode 100644 python/cudf_polars/tests/experimental/legacy/__init__.py delete mode 100644 python/cudf_polars/tests/experimental/legacy/conftest.py delete mode 100644 python/cudf_polars/tests/experimental/legacy/test_distributed.py delete mode 100644 python/cudf_polars/tests/experimental/legacy/test_explain.py delete mode 100644 python/cudf_polars/tests/experimental/legacy/test_parallel.py delete mode 100644 python/cudf_polars/tests/experimental/legacy/test_shuffle.py delete mode 100644 python/cudf_polars/tests/experimental/legacy/test_shuffler.py delete mode 100644 python/cudf_polars/tests/experimental/legacy/test_sort.py delete mode 100644 python/cudf_polars/tests/experimental/test_dask_serialize.py delete mode 100644 python/cudf_polars/tests/experimental/test_dask_sizeof.py delete mode 100644 python/cudf_polars/tests/experimental/test_dask_tokenize.py diff --git a/ci/run_cudf_polars_experimental_pytests.sh b/ci/run_cudf_polars_experimental_pytests.sh index ea0fe69f37c..d0a4767bd99 100755 --- a/ci/run_cudf_polars_experimental_pytests.sh +++ b/ci/run_cudf_polars_experimental_pytests.sh @@ -11,10 +11,4 @@ set -euo pipefail cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cudf_polars/ echo "Running the full cudf-polars test suite with both the in-memory and spmd engine" -python -m pytest --cache-clear "$@" tests --ignore=tests/experimental/legacy - -echo "Running experimental legacy tests with the 'rapidsmpf' runtime and a 'distributed' cluster" -python -m pytest --cache-clear "$@" "tests/experimental/legacy" \ - --executor streaming \ - --cluster distributed \ - --runtime rapidsmpf +python -m pytest --cache-clear "$@" tests diff --git a/python/cudf_polars/cudf_polars/callback.py b/python/cudf_polars/cudf_polars/callback.py index b06ba2d770f..fb915784f96 100644 --- a/python/cudf_polars/cudf_polars/callback.py +++ b/python/cudf_polars/cudf_polars/callback.py @@ -313,10 +313,8 @@ def _callback( if timer is not None: msg = textwrap.dedent("""\ LazyFrame.profile() is not supported with the streaming executor. - To profile execution with the streaming executor, use: - - - NVIDIA NSight Systems with the 'streaming' scheduler. - - Dask's built-in profiling tools with the 'distributed' scheduler. + To profile execution with the streaming executor, use NVIDIA + NSight Systems with the 'streaming' scheduler. """) raise NotImplementedError(msg) @@ -368,12 +366,6 @@ def execute_with_cudf( if timer is not None: timer.store(start, time.monotonic_ns(), "gpu-ir-translation") - if ( - memory_resource is None - and translator.config_options.executor.name == "streaming" - and translator.config_options.executor.cluster == "distributed" - ): # pragma: no cover; Requires distributed cluster - memory_resource = rmm.mr.get_current_device_resource() if len(ir_translation_errors): # TODO: Display these errors in user-friendly way. # tracked in https://github.com/rapidsai/cudf/issues/17051 diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py index fce2966e7da..8591ed18cdd 100644 --- a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py +++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py @@ -1,56 +1,19 @@ # SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. # SPDX-License-Identifier: Apache-2.0 -""" -Benchmark utilities - forwarding shim. - -Dispatches to ``utils_new_frontends`` when ``--frontend`` appears in ``sys.argv``, -otherwise falls back to ``utils_legacy``. -""" +"""Benchmark utilities.""" from __future__ import annotations -import sys - - -def _use_new_frontend() -> bool: - # HACK: Inspect sys.argv to detect use of the new frontends - # (e.g. ``--frontend ray``) without full argument parsing. - # This only works when invoked from the CLI; direct imports always get the - # legacy path. TODO: Remove this shim once the legacy path is deleted. - args = sys.argv[1:] - has_frontend = "--frontend" in args - has_cluster = "--cluster" in args or "-c" in args - if has_frontend and has_cluster: - raise SystemExit( - "Error: --frontend and --cluster cannot be used together.\n" - " Use --frontend for the new frontend path.\n" - " Use --cluster for the legacy path." - ) - return has_frontend - - -if _use_new_frontend(): - from cudf_polars.experimental.benchmarks.utils_new_frontends import ( - COUNT_DTYPE, - QueryResult, - RunConfig, - build_parser, - get_data, - parse_args, - run_duckdb, - run_polars, - ) -else: - from cudf_polars.experimental.benchmarks.utils_legacy import ( # type: ignore[assignment] - COUNT_DTYPE, - QueryResult, - RunConfig, - build_parser, - get_data, - parse_args, - run_duckdb, - run_polars, - ) +from cudf_polars.experimental.benchmarks.utils_new_frontends import ( + COUNT_DTYPE, + QueryResult, + RunConfig, + build_parser, + get_data, + parse_args, + run_duckdb, + run_polars, +) __all__: list[str] = [ "COUNT_DTYPE", diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils_legacy.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils_legacy.py deleted file mode 100644 index 2d18b2747b5..00000000000 --- a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils_legacy.py +++ /dev/null @@ -1,2100 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. -# SPDX-License-Identifier: Apache-2.0 - -"""Utility functions/classes for running the PDS-H and PDS-DS benchmarks.""" - -from __future__ import annotations - -import argparse -import dataclasses -import importlib -import io -import itertools -import json -import logging -import os -import pprint -import statistics -import sys -import textwrap -import time -import traceback -import uuid -from collections import defaultdict -from datetime import UTC, datetime -from pathlib import Path -from typing import TYPE_CHECKING, Any, Literal, assert_never - -import nvtx - -import polars as pl - -import rmm.statistics - -# The dtype for count() aggregations depends on the presence -# of the polars-runtime-64 package (`polars[rt64]`). -HAS_POLARS_RT_64 = pl.config.plr.RUNTIME_REPR == "rt64" -COUNT_DTYPE = pl.UInt64() if HAS_POLARS_RT_64 else pl.UInt32() - -try: - import duckdb - - duckdb_err = None -except ImportError as e: - duckdb = None - duckdb_err = e - -try: - import pynvml -except ImportError: - pynvml = None - -try: - from cudf_polars.dsl.ir import IRExecutionContext - from cudf_polars.dsl.tracing import Scope - from cudf_polars.dsl.translate import Translator - from cudf_polars.experimental.benchmarks.asserts import ( - ValidationError, - assert_tpch_result_equal, - ) - from cudf_polars.experimental.explain import explain_query - from cudf_polars.experimental.parallel import evaluate_streaming - from cudf_polars.utils.config import ConfigOptions - - CUDF_POLARS_AVAILABLE = True -except ImportError: - CUDF_POLARS_AVAILABLE = False - -if TYPE_CHECKING: - from collections.abc import Callable, Sequence - - from cudf_polars.experimental.explain import SerializablePlan - - -POLARS_VALIDATION_OPTIONS = { - "check_row_order": True, - "check_column_order": True, - "check_dtypes": True, - "check_exact": False, - "rel_tol": 1e-5, - "abs_tol": 1e-2, -} - - -def get_validation_options(args: Any) -> dict[str, Any]: - """Get validation options dict from parsed arguments.""" - return { - **POLARS_VALIDATION_OPTIONS, - "abs_tol": args.validation_abs_tol, - } - - -try: - import structlog - import structlog.contextvars - import structlog.processors - import structlog.stdlib -except ImportError: - _HAS_STRUCTLOG = False -else: - _HAS_STRUCTLOG = True - - -ExecutorType = Literal["in-memory", "streaming", "cpu"] - - -@dataclasses.dataclass -class ValidationResult: - """ - Result of a validation run. - - Parameters - ---------- - status - The status of the validation. Either 'Passed' or 'Failed'. - message - The message from the validation. This should be ``None`` if - the validation passed, and a string describing the failure otherwise. - details - Additional details about the validation failure. - """ - - status: Literal["Passed", "Failed"] - message: str | None - details: dict[str, Any] | None = None - - @classmethod - def from_error(cls, error: Exception) -> ValidationResult: - """ - Create a ValidationResult from some exception. - - Parameters - ---------- - error : Exception - The error to create a ValidationResult from. - - This will correctly propagate "message" and "details" from - ``cudf_polars.testing.asserts.ValidationError``. - - Returns - ------- - ValidationResult - The ValidationResult created from the error. - """ - match error: - case ValidationError(message=message, details=details): - return cls(status="Failed", message=message, details=details) - case _: - return cls(status="Failed", message=str(error)) - - -@dataclasses.dataclass -class ValidationMethod: - """ - Information about how the validation was performed. - - Parameters - ---------- - expected_source - A name indicating the source of the expected results. - - - 'polars-cpu': Run polars against the same data - - 'duckdb': Compare against pre-computed DuckDB results - - comparison_method - How the comparison was performed. Currently, only - 'polars' is supported, which indicates that ``polars.testing.assert_frame_equal`` - was used. - - comparison_options - Additional options passed to the comparison method, controlling - things like the tolerance for floating point comparisons. - """ - - expected_source: Literal["polars-cpu", "duckdb"] - comparison_method: Literal["polars"] - comparison_options: dict[str, Any] - - -@dataclasses.dataclass(kw_only=True) -class FailedRecord: - """Records a failed query iteration.""" - - query: int - iteration: int - status: Literal["error"] = "error" - traceback: str - - -@dataclasses.dataclass(kw_only=True) -class SuccessRecord: - """Results for a single run of a single PDS-H query.""" - - query: int - iteration: int - duration: float - shuffle_stats: dict[str, dict[str, int | float]] | None = None - traces: list[dict[str, Any]] | None = None - validation_result: ValidationResult | None = None - status: Literal["success"] = "success" - - @classmethod - def new( - cls, - query: int, - iteration: int, - duration: float, - shuffle_stats: dict[str, dict[str, int | float]] | None = None, - traces: list[dict[str, Any]] | None = None, - ) -> SuccessRecord: - """Create a Record from plain data.""" - return cls( - query=query, - iteration=iteration, - duration=duration, - shuffle_stats=shuffle_stats, - traces=traces, - ) - - -@dataclasses.dataclass -class QueryRunResult: - """Result of running a single query (all iterations).""" - - query_records: list[SuccessRecord | FailedRecord] - plan: SerializablePlan | None - iteration_failures: list[tuple[int, int]] - validation_failed: bool - - -@dataclasses.dataclass -class VersionInfo: - """Information about the commit of the software used to run the query.""" - - version: str - commit: str - - -@dataclasses.dataclass -class PackageVersions: - """Information about the versions of the software used to run the query.""" - - cudf_polars: str | VersionInfo - polars: str - python: str - rapidsmpf: str | VersionInfo | None - duckdb: str | None - - @classmethod - def collect(cls) -> PackageVersions: - """Collect the versions of the software used to run the query.""" - packages = [ - "cudf_polars", - "duckdb", - "polars", - "rapidsmpf", - ] - versions: dict[str, str | VersionInfo | None] = {} - for name in packages: - try: - package = importlib.import_module(name) - except (AttributeError, ImportError): - versions[name] = None - else: - if name in ("cudf_polars", "rapidsmpf"): - versions[name] = VersionInfo( - version=package.__version__, - commit=package.__git_commit__, - ) - else: - versions[name] = package.__version__ - - versions["python"] = ".".join(str(v) for v in sys.version_info[:3]) - # we manually ensure that only cudf-polars and rapidsmpf have a VersionInfo - return cls(**versions) # type: ignore[arg-type] - - -@dataclasses.dataclass -class GPUInfo: - """Information about a specific GPU.""" - - name: str - index: int - free_memory: int | None - used_memory: int | None - total_memory: int | None - - @classmethod - def from_index(cls, index: int) -> GPUInfo: - """Create a GPUInfo from an index.""" - pynvml.nvmlInit() - handle = pynvml.nvmlDeviceGetHandleByIndex(index) - try: - memory = pynvml.nvmlDeviceGetMemoryInfo(handle) - return cls( - name=pynvml.nvmlDeviceGetName(handle), - index=index, - free_memory=memory.free, - used_memory=memory.used, - total_memory=memory.total, - ) - except pynvml.NVMLError_NotSupported: - # Happens on systems without traditional GPU memory (e.g., Grace Hopper), - # where nvmlDeviceGetMemoryInfo is not supported. - # See: https://github.com/rapidsai/cudf/issues/19427 - return cls( - name=pynvml.nvmlDeviceGetName(handle), - index=index, - free_memory=None, - used_memory=None, - total_memory=None, - ) - - -@dataclasses.dataclass -class HardwareInfo: - """Information about the hardware used to run the query.""" - - gpus: list[GPUInfo] - # TODO: ucx - - @classmethod - def collect(cls) -> HardwareInfo: - """Collect the hardware information.""" - if pynvml is not None: - pynvml.nvmlInit() - gpus = [GPUInfo.from_index(i) for i in range(pynvml.nvmlDeviceGetCount())] - else: - # No GPUs -- probably running in CPU mode - gpus = [] - return cls(gpus=gpus) - - -def _infer_scale_factor(name: str, path: str | Path, suffix: str) -> int | float: - if "pdsh" in name: - supplier = get_data(path, "supplier", suffix) - num_rows = supplier.select(pl.len()).collect().item(0, 0) - return num_rows / 10_000 - - elif "pdsds" in name: - # TODO: Keep a map of SF-row_count because of nonlinear scaling - # See: https://www.tpc.org/TPC_Documents_Current_Versions/pdf/TPC-DS_v4.0.0.pdf pg.46 - customer = get_data(path, "promotion", suffix) - num_rows = customer.select(pl.len()).collect().item(0, 0) - return num_rows / 300 - - else: - raise ValueError(f"Invalid benchmark script name: '{name}'.") - - -@dataclasses.dataclass(kw_only=True) -class RunConfig: - """Results for a PDS-H or PDS-DS query run.""" - - engine_name: Literal["polars-cpu", "cudf-polars", "duckdb"] - queries: list[int] - suffix: str - executor: ExecutorType - runtime: str - stream_policy: str | None - cluster: str - n_workers: int - versions: PackageVersions = dataclasses.field( - default_factory=PackageVersions.collect - ) - records: dict[int, list[SuccessRecord | FailedRecord]] = dataclasses.field( - default_factory=dict - ) - plans: dict[int, SerializablePlan] = dataclasses.field(default_factory=dict) - dataset_path: Path - scale_factor: int | float - qualification: bool = False - shuffle: Literal["rapidsmpf", "tasks"] | None = None - gather_shuffle_stats: bool = False - broadcast_join_limit: int | None = None - blocksize: int | None = None - max_rows_per_partition: int | None = None - threads: int - iterations: int - timestamp: str = dataclasses.field( - default_factory=lambda: datetime.now(UTC).isoformat() - ) - hardware: HardwareInfo = dataclasses.field(default_factory=HardwareInfo.collect) - run_id: uuid.UUID = dataclasses.field(default_factory=uuid.uuid4) - rmm_async: bool - rapidsmpf_oom_protection: bool - rapidsmpf_spill: bool - spill_device: float - query_set: str - collect_traces: bool = False - dynamic_planning: bool | None = None - max_io_threads: int - native_parquet: bool - spill_to_pinned_memory: bool - extra_info: dict[str, Any] = dataclasses.field(default_factory=dict) - fallback_mode: str | None = None - validation_method: ValidationMethod | None = None - io_mode: Literal["cold", "lukewarm", "hot"] = "lukewarm" - duckdb_threads: int | None = None - duckdb_memory_limit: str | None = None - duckdb_temp_dir: str | None = None - - def __post_init__(self) -> None: # noqa: D105 - if self.gather_shuffle_stats and self.shuffle != "rapidsmpf": - raise ValueError( - "gather_shuffle_stats is only supported when shuffle='rapidsmpf'." - ) - if self.io_mode == "hot" and self.iterations < 2: - raise ValueError( - "--io-mode hot requires at least 2 iterations: " - "iteration 0 warms the cache, iterations 1+ are the hot measurements." - ) - - @classmethod - def from_args(cls, args: argparse.Namespace) -> RunConfig: - """Create a RunConfig from command line arguments.""" - executor: ExecutorType = args.executor - cluster = args.cluster - runtime = args.runtime - stream_policy = args.stream_policy - - # Handle "auto" stream policy - if stream_policy == "auto": - stream_policy = None - - # Deal with non-streaming executors - if executor == "in-memory" or executor == "cpu": - cluster = "single" - - path = args.path - name = args.query_set - scale_factor = args.scale - - if args.qualification and "pdsds" not in name: - raise ValueError("--qualification can only be used with PDS-DS benchmarks.") - - if scale_factor is None: - if "pdsds" in name: - raise ValueError( - "--scale is required for PDS-DS benchmarks.\n" - "TODO: This will be inferred once we maintain a map of scale factors to row counts." - ) - if path is None: - raise ValueError( - "Must specify --root and --scale if --path is not specified." - ) - # For PDS-H, infer scale factor based on row count - scale_factor = _infer_scale_factor(name, path, args.suffix) - if path is None: - path = f"{args.root}/scale-{scale_factor}" - - scale_factor = float(scale_factor) - try: - scale_factor_int = int(scale_factor) - except ValueError: - pass - else: - if scale_factor_int == scale_factor: - scale_factor = scale_factor_int - - skip_scale_factor_inference = ( - "LIBCUDF_IO_REROUTE_LOCAL_DIR_PATTERN" in os.environ - ) and ("LIBCUDF_IO_REROUTE_REMOTE_DIR_PATTERN" in os.environ) - - if ( - "pdsh" in name - and args.scale is not None - and skip_scale_factor_inference is False - ): - # Validate the user-supplied scale factor - sf_inf = _infer_scale_factor(name, path, args.suffix) - rel_error = abs((scale_factor - sf_inf) / sf_inf) - if rel_error > 0.01: - raise ValueError( - f"Specified scale factor is {args.scale}, " - f"but the inferred scale factor is {sf_inf}." - ) - - if args.validate_directory: - validation_method = ValidationMethod( - expected_source="duckdb", - comparison_method="polars", - comparison_options=get_validation_options(args), - ) - elif args.validate: - validation_method = ValidationMethod( - expected_source="polars-cpu" if args.baseline == "cpu" else "duckdb", - comparison_method="polars", - comparison_options=get_validation_options(args), - ) - else: - validation_method = None - - engine_name: Literal["polars-cpu", "cudf-polars", "duckdb"] - if args.engine == "duckdb": - engine_name = "duckdb" - elif args.engine == "polars": - if executor == "cpu": - engine_name = "polars-cpu" - else: - engine_name = "cudf-polars" - else: - raise ValueError(f"Invalid engine: {args.engine}") - - return cls( - engine_name=engine_name, - queries=args.query, - executor=executor, - cluster=cluster, - runtime=runtime, - stream_policy=stream_policy, - n_workers=args.n_workers, - shuffle=args.shuffle, - gather_shuffle_stats=args.rapidsmpf_dask_statistics, - broadcast_join_limit=args.broadcast_join_limit, - dataset_path=path, - scale_factor=scale_factor, - qualification=args.qualification, - blocksize=args.blocksize, - threads=args.threads, - iterations=args.iterations, - suffix=args.suffix, - rmm_async=args.rmm_async, - rapidsmpf_oom_protection=args.rapidsmpf_oom_protection, - spill_device=args.spill_device, - rapidsmpf_spill=args.rapidsmpf_spill, - max_rows_per_partition=args.max_rows_per_partition, - query_set=args.query_set, - collect_traces=args.collect_traces, - dynamic_planning=args.dynamic_planning, - max_io_threads=args.max_io_threads, - native_parquet=args.native_parquet, - extra_info=args.extra_info, - spill_to_pinned_memory=args.spill_to_pinned_memory, - fallback_mode=args.fallback_mode, - validation_method=validation_method, - io_mode=args.io_mode, - duckdb_threads=args.duckdb_threads, - duckdb_memory_limit=args.duckdb_memory_limit, - duckdb_temp_dir=args.duckdb_temp_dir, - ) - - def serialize(self, engine: pl.GPUEngine | None) -> dict: - """Serialize the run config to a dictionary.""" - result = dataclasses.asdict(self) - result["run_id"] = str(self.run_id) - - if engine is not None: - config_options = ConfigOptions.from_polars_engine(engine) - result["config_options"] = dataclasses.asdict(config_options) - return result - - def summarize(self) -> None: - """Print a summary of the results.""" - print("Iteration Summary") - print("=======================================") - - for query, records in self.records.items(): - print(f"query: {query}") - print(f"path: {self.dataset_path}") - print(f"scale_factor: {self.scale_factor}") - print(f"executor: {self.executor}") - print(f"stream_policy: {self.stream_policy}") - if self.executor == "streaming": - print(f"runtime: {self.runtime}") - print(f"cluster: {self.cluster}") - print(f"blocksize: {self.blocksize}") - print(f"shuffle_method: {self.shuffle}") - print(f"broadcast_join_limit: {self.broadcast_join_limit}") - if self.runtime == "rapidsmpf": - print(f"native_parquet: {self.native_parquet}") - print(f"dynamic_planning: {self.dynamic_planning}") - if self.cluster == "distributed": - print(f"n_workers: {self.n_workers}") - print(f"threads: {self.threads}") - print(f"rmm_async: {self.rmm_async}") - print(f"rapidsmpf_oom_protection: {self.rapidsmpf_oom_protection}") - print(f"spill_device: {self.spill_device}") - print(f"rapidsmpf_spill: {self.rapidsmpf_spill}") - valid_durations = [ - record.duration for record in records if record.status == "success" - ] - if len(valid_durations) > 0: - print(f"iterations: {self.iterations}") - print("---------------------------------------") - print(f"min time : {min(valid_durations):0.4f}") - print(f"max time : {max(valid_durations):0.4f}") - print(f"mean time: {statistics.mean(valid_durations):0.4f}") - print("=======================================") - any_success = any(record.status == "success" for record in records) - - if any_success: - total_mean_time = sum( - statistics.mean( - record.duration for record in records if record.status == "success" - ) - for records in self.records.values() - if records - ) - print(f"Total mean time across all queries: {total_mean_time:.4f} seconds") - else: - print("No successful queries") - - -def get_data(path: str | Path, table_name: str, suffix: str = "") -> pl.LazyFrame: - """Get table from dataset.""" - return pl.scan_parquet(f"{path}/{table_name}{suffix}") - - -def get_executor_options( - run_config: RunConfig, benchmark: Any = None -) -> dict[str, Any]: - """Generate executor_options for GPUEngine.""" - executor_options: dict[str, Any] = {} - - if run_config.executor == "streaming": - if run_config.blocksize: - executor_options["target_partition_size"] = run_config.blocksize - if run_config.max_rows_per_partition: - executor_options["max_rows_per_partition"] = ( - run_config.max_rows_per_partition - ) - if run_config.shuffle: - executor_options["shuffle_method"] = run_config.shuffle - if run_config.broadcast_join_limit: - executor_options["broadcast_join_limit"] = run_config.broadcast_join_limit - if run_config.rapidsmpf_spill: - executor_options["rapidsmpf_spill"] = run_config.rapidsmpf_spill - if run_config.fallback_mode: - executor_options["fallback_mode"] = run_config.fallback_mode - if run_config.cluster == "distributed": - executor_options["cluster"] = "distributed" - executor_options["client_device_threshold"] = run_config.spill_device - executor_options["runtime"] = run_config.runtime - executor_options["max_io_threads"] = run_config.max_io_threads - executor_options["spill_to_pinned_memory"] = run_config.spill_to_pinned_memory - if not run_config.dynamic_planning: - # Disable dynamic planning - executor_options["dynamic_planning"] = None - - if ( - benchmark - and benchmark.__name__ == "PDSHQueries" - and run_config.executor == "streaming" - and not run_config.dynamic_planning - ): - executor_options["unique_fraction"] = { - "c_custkey": 0.05, - "l_orderkey": 1.0, - "l_partkey": 0.1, - "o_custkey": 0.25, - } - - return executor_options - - -def print_query_plan( - q_id: int, - q: pl.LazyFrame, - args: argparse.Namespace, - run_config: RunConfig, - engine: None | pl.GPUEngine = None, - *, - print_plans: bool = True, -) -> tuple[str | None, str | None]: - """Print the query plan.""" - logical_plan = plan = None - if run_config.executor == "cpu": - if args.explain_logical: - logical_plan = q.explain() - if args.explain: - plan = q.show_graph(engine="streaming", plan_stage="physical") - elif CUDF_POLARS_AVAILABLE: - assert isinstance(engine, pl.GPUEngine) - if args.explain_logical: - logical_plan = explain_query(q, engine, physical=False) - if args.explain and run_config.executor == "streaming": - plan = explain_query(q, engine) - else: - raise RuntimeError( - "Cannot provide the logical or physical plan because cudf_polars is not installed." - ) - - if print_plans: - if logical_plan: - print(f"\nQuery {q_id} - Logical plan\n") - print(logical_plan) - if plan: - print(f"\nQuery {q_id} - Physical plan\n") - print(plan) - - return logical_plan, plan - - -def initialize_dask_cluster(run_config: RunConfig, args: argparse.Namespace): # type: ignore[no-untyped-def] - """ - Initialize a Dask distributed cluster. - - This function either creates a new LocalCUDACluster or connects to an - existing Dask cluster depending on the provided arguments. - - Parameters - ---------- - run_config : RunConfig - The run configuration. - args : argparse.Namespace - Parsed command line arguments. If ``args.scheduler_address`` or - ``args.scheduler_file`` is provided, we connect to an existing - cluster instead of creating a LocalCUDACluster. - - Returns - ------- - Client or None - A Dask distributed Client, or None if not using distributed mode. - """ - if run_config.cluster != "distributed": - return None - - from distributed import Client - - # Check if we should connect to an existing cluster - scheduler_address = args.scheduler_address - scheduler_file = args.scheduler_file - - if scheduler_address is not None: - # Connect to existing cluster via scheduler address - client = Client(address=scheduler_address) - n_workers = client.scheduler_info()["n_workers"] - print( - f"Connected to existing Dask cluster at {scheduler_address} " - f"with {n_workers} workers" - ) - elif scheduler_file is not None: - # Connect to existing cluster via scheduler file - client = Client(scheduler_file=scheduler_file) - n_workers = client.scheduler_info()["n_workers"] - print( - f"Connected to existing Dask cluster via scheduler file: {scheduler_file} " - f"with {n_workers} workers" - ) - else: - # Create a new LocalCUDACluster - from dask_cuda import LocalCUDACluster - - kwargs = { - "n_workers": run_config.n_workers, - "dashboard_address": ":8585", - "protocol": args.protocol, - "rmm_pool_size": args.rmm_pool_size, - "rmm_async": args.rmm_async, - "rmm_release_threshold": args.rmm_release_threshold, - "threads_per_worker": run_config.threads, - "memory_limit": args.worker_memory_limit, - } - - client = Client(LocalCUDACluster(**kwargs)) - client.wait_for_workers(run_config.n_workers) - - if run_config.shuffle != "tasks": - try: - from rapidsmpf.config import Options, get_environment_variables - from rapidsmpf.integrations.dask import bootstrap_dask_cluster - - bootstrap_dask_cluster( - client, - options=Options( - { - "dask_spill_device": str(run_config.spill_device), - "dask_spill_to_pinned_memory": str( - run_config.spill_to_pinned_memory - ), - "dask_statistics": str(args.rapidsmpf_dask_statistics), - "dask_print_statistics": str(args.rapidsmpf_print_statistics), - "dask_oom_protection": str(args.rapidsmpf_oom_protection), - } - | get_environment_variables() - ), - ) - # Setting this globally makes the peak statistics not meaningful - # across queries / iterations. But doing it per query isn't worth - # the effort right now. - client.run(rmm.statistics.enable_statistics) - except ImportError as err: - if run_config.shuffle == "rapidsmpf": - raise ImportError( - "rapidsmpf is required for shuffle='rapidsmpf' but is not installed." - ) from err - - return client - - -def drop_file_page_cache_recursively(path: os.PathLike | str) -> None: - """Drop the Linux page cache for all files under `path`.""" - try: - import kvikio - except ImportError as err: - raise RuntimeError( - "kvikio is required for cold-run page cache dropping. " - "Install it or switch to --io-mode lukewarm." - ) from err - p = Path(path).expanduser() - if p.is_file(): - kvikio.drop_file_page_cache(p) - return - for f in p.rglob("*"): - if f.is_file(): - kvikio.drop_file_page_cache(f) - - -def execute_query( - q_id: int, - i: int, - q: pl.LazyFrame, - run_config: RunConfig, - args: argparse.Namespace, - engine: None | pl.GPUEngine = None, -) -> tuple[pl.DataFrame, float]: - """Execute a query with NVTX annotation.""" - if run_config.io_mode == "cold": - drop_file_page_cache_recursively(run_config.dataset_path) - - with nvtx.annotate( - message=f"Query {q_id} - Iteration {i}", - domain="cudf_polars", - color="green", - ): - if run_config.executor == "cpu": - t0 = time.monotonic() - result = q.collect(engine="streaming") - t1 = time.monotonic() - - elif CUDF_POLARS_AVAILABLE: - assert isinstance(engine, pl.GPUEngine) - if args.debug: - translator = Translator(q._ldf.visit(), engine) - ir = translator.translate_ir() - context = IRExecutionContext() - if run_config.executor == "in-memory": - t0 = time.monotonic() - result = ir.evaluate( - cache={}, timer=None, context=context - ).to_polars() - t1 = time.monotonic() - elif run_config.executor == "streaming": - t0 = time.monotonic() - result = evaluate_streaming( - ir, - translator.config_options, - ) - t1 = time.monotonic() - else: - assert_never(run_config.executor) - else: - t0 = time.monotonic() - result = q.collect(engine=engine) - t1 = time.monotonic() - - else: - raise RuntimeError("The requested engine is not supported.") - - return result, t1 - t0 - - -def _query_type(num_queries: int) -> Callable[[str | int], list[int]]: - def parse(query: str | int) -> list[int]: - if isinstance(query, int): - return [query] - if query == "all": - return list(range(1, num_queries + 1)) - - result: set[int] = set() - for part in query.split(","): - if "-" in part: - start, end = part.split("-") - result.update(range(int(start), int(end) + 1)) - else: - result.add(int(part)) - return sorted(result) - - return parse - - -def build_parser(num_queries: int = 22) -> argparse.ArgumentParser: - """Build the argument parser for PDS-H/PDS-DS benchmarks.""" - parser = argparse.ArgumentParser( - prog="Cudf-Polars PDS-H Benchmarks", - description="Experimental streaming-executor benchmarks.", - formatter_class=argparse.RawTextHelpFormatter, - ) - parser.add_argument( - "query", - type=_query_type(num_queries), - help=textwrap.dedent("""\ - Query to run. One of the following: - - A single number (e.g. 11) - - A comma-separated list of query numbers (e.g. 1,3,7) - - A range of query number (e.g. 1-11,23-34) - - The string 'all' to run all queries (1 through 22)"""), - ) - parser.add_argument( - "--path", - type=str, - default=os.environ.get("PDSH_DATASET_PATH"), - help=textwrap.dedent("""\ - Path to the root directory of the PDS-H dataset. - Defaults to the PDSH_DATASET_PATH environment variable."""), - ) - parser.add_argument( - "--root", - type=str, - default=os.environ.get("PDSH_DATASET_ROOT"), - help="Root PDS-H dataset directory (ignored if --path is used).", - ) - parser.add_argument( - "--scale", - type=str, - default=None, - help="Dataset scale factor.", - ) - parser.add_argument( - "--qualification", - action="store_true", - help="Use TPC-DS qualification parameters from specification Appendix B (PDS-DS only).", - ) - parser.add_argument( - "--suffix", - type=str, - default=".parquet", - help=textwrap.dedent("""\ - File suffix for input table files. - Default: .parquet"""), - ) - parser.add_argument( - "-e", - "--executor", - default="streaming", - type=str, - choices=["in-memory", "streaming", "cpu"], - help=textwrap.dedent("""\ - Query executor backend: - - in-memory : Evaluate query in GPU memory - - streaming : Partitioned evaluation (default) - - cpu : Use Polars CPU engine"""), - ) - parser.add_argument( - "-c", - "--cluster", - default=None, - type=str, - choices=["single", "distributed"], - help=textwrap.dedent("""\ - Cluster type to use with the 'streaming' executor. - - single : Run locally in a single process - - distributed : Use Dask for multi-GPU execution"""), - ) - parser.add_argument( - "--runtime", - type=str, - choices=["tasks", "rapidsmpf"], - default="tasks", - help="Runtime to use for the streaming executor (tasks or rapidsmpf).", - ) - parser.add_argument( - "--stream-policy", - type=str, - choices=["auto", "default", "new", "pool"], - default="auto", - help=textwrap.dedent("""\ - CUDA stream policy (auto, default, new, pool). - Default: auto (use the default policy for the runtime)"""), - ) - parser.add_argument( - "--n-workers", - default=1, - type=int, - help="Number of Dask-CUDA workers (requires 'distributed' cluster).", - ) - external_cluster_group = parser.add_mutually_exclusive_group() - external_cluster_group.add_argument( - "--scheduler-address", - default=None, - type=str, - help=textwrap.dedent("""\ - Scheduler address for connecting to an existing Dask cluster. - If provided, a cluster is not created and worker - configuration options (--n-workers, --rmm-pool-size, etc.) - are ignored since the workers are assumed to be started separately."""), - ) - external_cluster_group.add_argument( - "--scheduler-file", - default=None, - type=str, - help=textwrap.dedent("""\ - Path to a scheduler file for connecting to an existing Dask cluster. - If provided, a cluster is not created and worker - configuration options (--n-workers, --rmm-pool-size, etc.) - are ignored since the workers are assumed to be started separately."""), - ) - parser.add_argument( - "--blocksize", - default=None, - type=int, - help="Target partition size, in bytes, for IO tasks.", - ) - parser.add_argument( - "--max-rows-per-partition", - default=None, - type=int, - help="The maximum number of rows to process per partition.", - ) - parser.add_argument( - "--iterations", - default=1, - type=int, - help="Number of times to run the same query.", - ) - parser.add_argument( - "--io-mode", - dest="io_mode", - default="lukewarm", - choices=["cold", "lukewarm", "hot"], - help=textwrap.dedent("""\ - Cache state control for each timed iteration: - - cold : Drop Linux page cache before each iteration (requires kvikio) - - lukewarm : No cache manipulation; OS cache state unchanged (default) - - hot : One untimed warmup iteration to populate cache before measured runs"""), - ) - parser.add_argument( - "--debug", - default=False, - action="store_true", - help="Debug run.", - ) - parser.add_argument( - "--protocol", - default="ucx", - type=str, - choices=["ucx"], - help="Communication protocol to use for Dask: ucx (uses ucxx)", - ) - parser.add_argument( - "--shuffle", - default=None, - type=str, - choices=[None, "rapidsmpf", "tasks"], - help="Shuffle method to use for distributed execution.", - ) - parser.add_argument( - "--broadcast-join-limit", - default=None, - type=int, - help="Set an explicit `broadcast_join_limit` option.", - ) - parser.add_argument( - "--threads", - default=1, - type=int, - help="Number of threads to use on each GPU.", - ) - parser.add_argument( - "--rmm-pool-size", - default=None, - type=float, - help=textwrap.dedent("""\ - Fraction of total GPU memory to allocate for RMM pool. - Default: 0.5 (50%% of GPU memory) when --no-rmm-async, - None when --rmm-async"""), - ) - parser.add_argument( - "--rmm-release-threshold", - default=None, - type=float, - help=textwrap.dedent("""\ - Passed to dask_cuda.LocalCUDACluster or CudaAsyncMemoryResource - to control the release threshold for RMM pool memory. - Default: None (no release threshold)"""), - ) - parser.add_argument( - "--worker-memory-limit", - default="auto", - type=str, - help=textwrap.dedent("""\ - Passed to dask_cuda.LocalCUDACluster to control the memory limit - of each Dask worker. Use 'auto' to let Dask determine the limit - automatically, or '0' for unlimited. - Default: auto"""), - ) - parser.add_argument( - "--rmm-async", - action=argparse.BooleanOptionalAction, - default=False, - help="Use RMM async memory resource. Note: only affects distributed cluster!", - ) - parser.add_argument( - "--rapidsmpf-oom-protection", - action=argparse.BooleanOptionalAction, - default=False, - help="Use rapidsmpf CUDA managed memory-based OOM protection.", - ) - parser.add_argument( - "--rapidsmpf-dask-statistics", - action=argparse.BooleanOptionalAction, - default=False, - help="Collect rapidsmpf shuffle statistics. The output will be stored in the 'shuffle_stats' field of each record.", - ) - parser.add_argument( - "--rapidsmpf-print-statistics", - action=argparse.BooleanOptionalAction, - default=False, - help="Print rapidsmpf shuffle statistics on each Dask worker upon completion.", - ) - parser.add_argument( - "--rapidsmpf-spill", - action=argparse.BooleanOptionalAction, - default=False, - help="Use rapidsmpf for general spilling.", - ) - parser.add_argument( - "--spill-device", - default=0.5, - type=float, - help="Rapidsmpf device spill threshold.", - ) - parser.add_argument( - "-o", - "--output", - type=argparse.FileType("at"), - default="pdsh_results.jsonl", - help="Output file path.", - ) - parser.add_argument( - "--summarize", - action=argparse.BooleanOptionalAction, - help="Summarize the results.", - default=True, - ) - parser.add_argument( - "--print-results", - action=argparse.BooleanOptionalAction, - help="Print the query results", - default=True, - ) - parser.add_argument( - "--explain", - action=argparse.BooleanOptionalAction, - help="Print an outline of the physical plan", - default=False, - ) - parser.add_argument( - "--explain-logical", - action=argparse.BooleanOptionalAction, - help="Print an outline of the logical plan", - default=False, - ) - parser.add_argument( - "--print-plans", - action=argparse.BooleanOptionalAction, - help="Print the query plans", - default=True, - ) - parser.add_argument( - "--validate", - action=argparse.BooleanOptionalAction, - default=False, - help=( - "Validate the result against CPU execution. This will " - "run the query with both GPU and baseline engine (CPU polars or DuckDB), collect the " - "results in memory, and compare them using polars'. " - "At larger scale factors, computing the expected result can be slow so " - "--validate-directory should be used instead." - ), - ) - parser.add_argument( - "--baseline", - choices=["duckdb", "cpu"], - default="duckdb", - help="Which engine to use as the baseline for validation.", - ) - - parser.add_argument( - "--collect-traces", - action=argparse.BooleanOptionalAction, - default=False, - help="Collect data tracing cudf-polars execution.", - ) - - parser.add_argument( - "--dynamic-planning", - action=argparse.BooleanOptionalAction, - default=True, - help="Enable dynamic physical-plan generation. Only available for the 'rapidsmpf' runtime.", - ) - parser.add_argument( - "--max-io-threads", - default=2, - type=int, - help="Maximum number of IO threads for rapidsmpf runtime.", - ) - parser.add_argument( - "--native-parquet", - action=argparse.BooleanOptionalAction, - default=False, - help="Use C++ read_parquet nodes for the rapidsmpf runtime.", - ) - parser.add_argument( - "--results-directory", - type=Path, - default=None, - help="Optional directory to write query results as parquet files.", - ) - parser.add_argument( - "--output-expected-directory", - type=Path, - default=None, - help="Optional directory to write expected results as parquet files, when computed from CPU-polars or DuckDB.", - ) - parser.add_argument( - "--validate-directory", - type=Path, - default=None, - help=( - "Validate the results against a directory with a pre-computed set of 'golden' results. " - "The directory should contain one parquet file per query, named 'qDD.parquet', where DD is the " - "zero-padded query number. The JSON output will include the validation results for each record." - ), - ) - parser.add_argument( - "--validation-abs-tol", - type=float, - default=0.01, - help="Absolute tolerance for assert_frame_equal validation. Default: 0.01", - ) - parser.add_argument( - "--spill-to-pinned-memory", - action=argparse.BooleanOptionalAction, - default=True, - help=textwrap.dedent("""\ - Whether RapidsMPF should spill to pinned host memory when available, - or use regular pageable host memory."""), - ) - parser.add_argument( - "--extra-info", - type=json.loads, - default={}, - help="Extra information to add to the output file (e.g. version information). Must be JSON-serializable.", - ) - parser.add_argument( - "--fallback-mode", - type=str, - choices=["warn", "raise", "silent"], - default=None, - help=textwrap.dedent("""\ - How to handle operations that don't support multiple partitions in streaming executor. - - warn : Emit a warning and fall back to single partition (default) - - raise : Raise an exception - - silent : Silently fall back to single partition"""), - ) - parser.add_argument( - "--duckdb-threads", - type=int, - default=None, - help="Number of threads for DuckDB to use. Defaults to os.cpu_count().", - ) - parser.add_argument( - "--duckdb-memory-limit", - type=str, - default=None, - help="DuckDB memory limit (e.g. '500GB'). If unset, DuckDB uses its default.", - ) - parser.add_argument( - "--duckdb-temp-dir", - type=str, - default=None, - help="Directory for DuckDB to spill temporary data to disk.", - ) - - return parser - - -def parse_args( - args: Sequence[str] | None = None, - num_queries: int = 22, - parser: argparse.ArgumentParser | None = None, -) -> argparse.Namespace: - """Parse command line arguments.""" - if parser is None: - parser = build_parser(num_queries) - parsed_args = parser.parse_args(args) - - if parsed_args.rmm_pool_size is None and not parsed_args.rmm_async: - # The default rmm pool size depends on the rmm_async flag - parsed_args.rmm_pool_size = 0.5 - - if parsed_args.validate_directory and parsed_args.validate: - raise ValueError("Specify either --validate-directory or --validate, not both.") - if ( - parsed_args.validate_directory is not None - and not parsed_args.validate_directory.exists() - ): - raise FileNotFoundError( - f"--validate-directory: {parsed_args.validate_directory} does not exist." - ) - if parsed_args.validate_directory: - validation_files = list_validation_files(parsed_args.validate_directory) - missing_files = [ - str(x) for x in set(parsed_args.query) - set(validation_files.keys()) - ] - - if missing_files: - raise ValueError(f"Missing files for queries: {','.join(missing_files)}") - - if ( - parsed_args.output_expected_directory - and not parsed_args.validate - and parsed_args.engine != "duckdb" - ): - raise ValueError("Must specify --validate to use --output-expected-directory.") - - if ( - parsed_args.suffix - and not parsed_args.suffix.startswith(".") - and not parsed_args.suffix.startswith("/") - ): - parsed_args.suffix = f".{parsed_args.suffix}" - - return parsed_args - - -def list_validation_files( - validate_directory: Path, -) -> dict[int, Path]: - """List the validation files in the given directory.""" - validation_files: dict[int, Path] = {} - for q_path in validate_directory.glob("q*.parquet"): - q_id = int(q_path.stem.lstrip("q").lstrip("_")) - validation_files[q_id] = q_path - return validation_files - - -def validate_result( - result: pl.DataFrame, - expected: pl.DataFrame, - sort_by: list[tuple[str, bool]], - limit: int | None = None, - sort_keys: list[tuple[pl.Expr, bool]] | None = None, - **kwargs: Any, -) -> ValidationResult: - """ - Validate the computed result against the expected answer. - - This takes care of special handling for validating TPC-H queries, - where multiple results might be considered correct. - - See Also - -------- - cudf_polars.testing.asserts.assert_tpch_result_equal - """ - try: - assert_tpch_result_equal( - result, - expected, - sort_by=sort_by, - limit=limit, - sort_keys=sort_keys, - **kwargs, - ) - except Exception as e: - return ValidationResult.from_error(e) - else: - return ValidationResult(status="Passed", message=None) - - -@dataclasses.dataclass -class QueryResult: - """ - Representation of a query's result. - - Parameters - ---------- - frame: pl.LazyFrame - The result of the query. - sort_by: list[tuple[str, bool]] - The columns that the query sorts by. Each tuple contains (column_name, descending_flag). - Used for the ties/limit boundary logic in validation. - sort_keys: list[tuple[pl.Expr, bool]] | None - Optional Polars expressions for the sortedness check. Use this when the query - sorts by a conditional expression (e.g. ``CASE WHEN lochierarchy = 0 THEN i_category END``) - that cannot be represented as a plain column name in ``sort_by``. When provided, - these expressions are evaluated against the output and used only for the sortedness - check; ``sort_by`` still drives the ties/limit boundary logic. - limit: int | None - The limit of the query, if any. - - """ - - frame: pl.LazyFrame - sort_by: list[tuple[str, bool]] - limit: int | None = None - nulls_last: bool = True - sort_keys: list[tuple[pl.Expr, bool]] | None = None - - -def check_input_data_type( - run_config: RunConfig, -) -> tuple[Literal["decimal", "float"], Literal["date", "timestamp"]]: - """ - Check the input data types columns with variable data types. - - Our queries might be run on datasets that use different data types for different - types of columns. Our validation supports: - - 1. 'decimal' or 'float' for non-integer numeric columns (e.g. 'c_acctbal') - 2. 'date' or 'timestamp' for date type columns (e.g. 'o_orderdate') - - For PDS-H, this is determined by the ``c_acctbal`` column in the - customer table. For PDS-DS, we use ``i_current_price`` from the item table. - """ - if run_config.query_set == "pdsds": - table, col = "item", "i_current_price" - else: - table, col = "customer", "c_acctbal" - path = f"{run_config.dataset_path}/{table}{run_config.suffix}" - t = pl.scan_parquet(path).select(pl.col(col)).collect_schema()[col] - - num_type: Literal["decimal", "float"] - date_type: Literal["date", "timestamp"] - if t.is_decimal(): - num_type = "decimal" - else: - num_type = "float" - - if run_config.query_set == "pdsds": - date_type = "date" - else: - path = f"{run_config.dataset_path}/orders{run_config.suffix}" - t = ( - pl.scan_parquet(path) - .select(pl.col("o_orderdate")) - .collect_schema()["o_orderdate"] - ) - - if t.to_python().__name__ == "date": - date_type = "date" - else: - date_type = "timestamp" - - return num_type, date_type - - -def run_polars_query_iteration( - q_id: int, - iteration: int, - q: pl.LazyFrame, - run_config: RunConfig, - args: argparse.Namespace, - engine: pl.GPUEngine | None, - expected: pl.DataFrame | None, - query_result: Any, - client: Any, - prepare_validation_result: Callable[[pl.DataFrame], pl.DataFrame] | None = None, - result_casts: list[pl.Expr] | None = None, -) -> SuccessRecord: - """Run a single query iteration. Caller must wrap in try/except.""" - result, duration = execute_query(q_id, iteration, q, run_config, args, engine) - - if expected is not None and prepare_validation_result is not None: - result = prepare_validation_result(result) - - if expected is not None and result_casts: - # Applying the casts to the polars result is - # a workaround we need because of a polars bug - # See https://github.com/pola-rs/polars/issues/27269 - # Once we support polars 1.40, we should remove this - result = result.with_columns(*result_casts) - - if run_config.shuffle == "rapidsmpf" and run_config.gather_shuffle_stats: - from rapidsmpf.integrations.dask.shuffler import ( - clear_shuffle_statistics, - gather_shuffle_statistics, - ) - - shuffle_stats = gather_shuffle_statistics(client) - clear_shuffle_statistics(client) - else: - shuffle_stats = None - - if expected is not None: - validation_result = validate_result( - result, - expected, - query_result.sort_by, - limit=query_result.limit, - nulls_last=query_result.nulls_last, - sort_keys=query_result.sort_keys, - **get_validation_options(args), - ) - else: - validation_result = None - - if args.print_results: - print(result) - - if args.results_directory is not None and iteration == 0: - results_dir = Path(args.results_directory) - results_dir.mkdir(parents=True, exist_ok=True) - output_path = results_dir / f"q_{q_id:02d}.parquet" - result.write_parquet(output_path) - - return SuccessRecord( - query=q_id, - iteration=iteration, - duration=duration, - shuffle_stats=shuffle_stats, - validation_result=validation_result, - ) - - -def run_polars_query( - q_id: int, - benchmark: Any, - run_config: RunConfig, - args: argparse.Namespace, - engine: pl.GPUEngine | None, - client: Any, - numeric_type: str, - date_type: str, - validation_files: dict[int, Path] | None, - prepare_validation_result: Callable[[pl.DataFrame], pl.DataFrame] | None = None, -) -> QueryRunResult: - """Run all iterations for a single query. Caller must wrap in try/except.""" - query_result = getattr(benchmark, f"q{q_id}")(run_config) - q = query_result.frame - - print_query_plan(q_id, q, args, run_config, engine, print_plans=args.print_plans) - plan = None - if (args.explain or args.explain_logical) and engine is not None: - from cudf_polars.experimental.explain import serialize_query - - plan = serialize_query(q, engine) - - casts = benchmark.EXPECTED_CASTS.get(q_id, []) - if numeric_type == "decimal": - casts.extend(benchmark.EXPECTED_CASTS_DECIMAL.get(q_id, [])) - if date_type == "timestamp": - casts.extend(benchmark.EXPECTED_CASTS_TIMESTAMP.get(q_id, [])) - - expected: pl.DataFrame | None = None - if args.validate: - if args.baseline == "cpu": - expected = q.collect() - elif args.baseline == "duckdb": - duckdb_queries_cls = benchmark().duckdb_queries - get_ddb = getattr(duckdb_queries_cls, f"q{q_id}") - base_sql = get_ddb(run_config) - expected = execute_duckdb_query( - base_sql, - run_config.dataset_path, - query_set=duckdb_queries_cls.name, - suffix=run_config.suffix, - run_config=run_config, - ).with_columns(*casts) - else: - raise ValueError(f"Invalid baseline: {args.baseline}") - elif validation_files is not None: - expected = pl.read_parquet(validation_files[q_id]).with_columns(*casts) - else: - expected = None - - if args.output_expected_directory is not None: - assert expected is not None, ( - "Expected result must be computed before writing to disk." - ) - expected_dir = Path(args.output_expected_directory) - expected_dir.mkdir(parents=True, exist_ok=True) - expected.write_parquet(expected_dir / f"q_{q_id:02d}.parquet") - - query_records: list[SuccessRecord | FailedRecord] = [] - iteration_failures: list[tuple[int, int]] = [] - validation_failed = False - record: SuccessRecord | FailedRecord - - for i in range(args.iterations): - if _HAS_STRUCTLOG and run_config.collect_traces: - setup_logging(q_id, i) - if client is not None: - client.run(setup_logging, q_id, i) - - try: - record = run_polars_query_iteration( - q_id=q_id, - iteration=i, - q=q, - run_config=run_config, - args=args, - engine=engine, - expected=expected, - query_result=query_result, - client=client, - prepare_validation_result=prepare_validation_result, - result_casts=casts if casts else None, - ) - except Exception: - print(f"❌ query={q_id} iteration={i} failed!") - print(traceback.format_exc()) - iteration_failures.append((q_id, i)) - record = FailedRecord( - query=q_id, - iteration=i, - status="error", - traceback=traceback.format_exc(), - ) - - else: - if record.validation_result and record.validation_result.status == "Failed": - validation_failed = True - print( - f"❌ Query {q_id} failed validation!\n{record.validation_result.message}" - ) - if record.validation_result.details: - pprint.pprint(record.validation_result.details) - else: - prefix = "✅ " if record.validation_result else "" - print( - f"{prefix}Query {q_id} - Iteration {i} finished in {record.duration:0.4f}s", - flush=True, - ) - - query_records.append(record) - - return QueryRunResult( - query_records=query_records, - plan=plan, - iteration_failures=iteration_failures, - validation_failed=validation_failed, - ) - - -def _run_query_loop( - benchmark: Any, - args: argparse.Namespace, - run_config: RunConfig, - engine: pl.GPUEngine | None, - client: Any, - numeric_type: str, - date_type: str, - validation_files: dict[int, Path] | None, - prepare_validation_result: Callable[[pl.DataFrame], pl.DataFrame] | None = None, -) -> tuple[ - defaultdict[int, list[SuccessRecord | FailedRecord]], - dict[int, Any], - list[int], - list[tuple[int, int]], -]: - """Execute all queries in ``run_config`` and return accumulated results.""" - records: defaultdict[int, list[SuccessRecord | FailedRecord]] = defaultdict(list) - plans: dict[int, SerializablePlan] = {} - validation_failures: list[int] = [] - query_failures: list[tuple[int, int]] = [] - - for q_id in run_config.queries: - try: - result = run_polars_query( - q_id=q_id, - benchmark=benchmark, - run_config=run_config, - args=args, - engine=engine, - client=client, - numeric_type=numeric_type, - date_type=date_type, - validation_files=validation_files, - prepare_validation_result=prepare_validation_result, - ) - except Exception: - print(f"❌ query={q_id} failed (setup or execution)!") - print(traceback.format_exc()) - query_failures.append((q_id, -1)) - record = FailedRecord( - query=q_id, - iteration=-1, - traceback=traceback.format_exc(), - ) - result = QueryRunResult( - query_records=[record], - plan=None, - iteration_failures=[], - validation_failed=False, - ) - - records[q_id] = result.query_records - if result.plan is not None: - plans[q_id] = result.plan - query_failures.extend(result.iteration_failures) - if result.validation_failed: - validation_failures.append(q_id) - - return records, plans, validation_failures, query_failures - - -def _consolidate_logs(run_config: RunConfig, client: Any) -> RunConfig: - """Merge structlog traces from the local process and Dask workers into run_config.""" - if not (_HAS_STRUCTLOG and run_config.collect_traces): - return run_config - - def gather_logs() -> str: - logger = logging.getLogger() - return logger.handlers[0].stream.getvalue() # type: ignore[attr-defined] - - if client is not None: - # Gather logs from both client (for Query Plan) and workers - worker_logs = "\n".join(client.run(gather_logs).values()) - client_logs = gather_logs() - all_logs = client_logs + "\n" + worker_logs - else: - all_logs = gather_logs() - - parsed_logs = [json.loads(log) for log in all_logs.splitlines() if log] - # Some other log records can end up in here. Filter those out. - scope_values = {s.value for s in Scope} - parsed_logs = [log for log in parsed_logs if log.get("scope") in scope_values] - # Now we want to augment the existing Records with the trace data. - - def group_key(x: dict) -> int: - return x["query_id"] - - def sort_key(x: dict) -> tuple[int, int]: - return x["query_id"], x["iteration"] - - grouped = itertools.groupby( - sorted(parsed_logs, key=sort_key), - key=group_key, - ) - - for query_id, run_logs_group in grouped: - run_logs = list(run_logs_group) - by_iteration = [ - list(x) - for _, x in itertools.groupby(run_logs, key=lambda x: x["iteration"]) - ] - run_records = run_config.records[query_id] - assert len(by_iteration) == len(run_records) # same number of iterations - all_traces = [list(iteration) for iteration in by_iteration] - - new_records: list[SuccessRecord | FailedRecord] = [] - for rec, traces in zip(run_records, all_traces, strict=True): - if rec.status == "success": - new_records.append(dataclasses.replace(rec, traces=traces)) - else: - new_records.append(rec) - - run_config.records[query_id] = new_records - - return run_config - - -def run_polars( - benchmark: Any, - args: argparse.Namespace, -) -> None: - """Run the queries using the given benchmark and executor options.""" - vars(args).update({"query_set": benchmark.name}) - run_config = RunConfig.from_args(args) - numeric_type, date_type = check_input_data_type(run_config) - validation_files = ( - list_validation_files(args.validate_directory) - if args.validate_directory is not None - else None - ) - parquet_options = ( - {"use_rapidsmpf_native": run_config.native_parquet} - if run_config.runtime == "rapidsmpf" - else {} - ) - match run_config.cluster: - case "single" | "distributed" | None: - run_polars_single_or_dask( - benchmark, - args, - run_config, - parquet_options, - numeric_type, - date_type, - validation_files, - ) - - -def run_polars_single_or_dask( - benchmark: Any, - args: argparse.Namespace, - run_config: RunConfig, - parquet_options: dict[str, Any], - numeric_type: str, - date_type: str, - validation_files: dict[int, Path] | None, -) -> None: - """Run benchmark queries using Dask or single-process execution.""" - client = initialize_dask_cluster(run_config, args) - if client is not None: - run_config = dataclasses.replace( - run_config, n_workers=client.scheduler_info()["n_workers"] - ) - - engine = None - if run_config.executor != "cpu": - executor_options = get_executor_options(run_config, benchmark=benchmark) - engine = pl.GPUEngine( - raise_on_fail=True, - memory_resource=rmm.mr.CudaAsyncMemoryResource( - release_threshold=args.rmm_release_threshold - ) - if run_config.rmm_async - else None, - cuda_stream_policy=run_config.stream_policy, - executor=run_config.executor, - executor_options=executor_options, - parquet_options=parquet_options, - ) - - records, plans, validation_failures, query_failures = _run_query_loop( - benchmark, - args, - run_config, - engine, - client, - numeric_type, - date_type, - validation_files, - ) - run_config = dataclasses.replace(run_config, records=dict(records), plans=plans) - run_config = _consolidate_logs(run_config, client=client) - if client is not None: - client.close(timeout=60) - - if args.summarize: - run_config.summarize() - - if args.validate and run_config.executor != "cpu": - print("\nValidation Summary") - print("==================") - if validation_failures: - print( - f"{len(validation_failures)} queries failed validation: {sorted(set(validation_failures))}" - ) - else: - print("✅ All validated queries passed.") - - args.output.write(json.dumps(run_config.serialize(engine=engine))) - args.output.write("\n") - - sys.exit(1 if (query_failures or validation_failures) else 0) - - -def setup_logging(query_id: int, iteration: int) -> None: # noqa: D103 - import cudf_polars.dsl.tracing - - if not cudf_polars.dsl.tracing.LOG_TRACES: - msg = ( - "Tracing requested via --collect-traces, but tracking is not enabled. " - "Verify that 'CUDF_POLARS_LOG_TRACES' is set and structlog is installed." - ) - raise RuntimeError(msg) - - if _HAS_STRUCTLOG: - # structlog uses contextvars to propagate context down to where log records - # are emitted. Ideally, we'd just set the contextvars here using - # structlog.bind_contextvars; for the distributed cluster we would need - # to use something like client.run to set the contextvars on the worker. - # However, there's an unfortunate conflict between structlog's use of - # context vars and how Dask Workers actually execute tasks, such that - # the contextvars set via `client.run` aren't visible to the actual - # tasks. - # - # So instead we make a new logger each time we need a new context, - # i.e. for each query/iteration pair. - - def make_injector( - query_id: int, iteration: int - ) -> Callable[[logging.Logger, str, dict[str, Any]], dict[str, Any]]: - def inject( - logger: Any, method_name: Any, event_dict: Any - ) -> dict[str, Any]: - event_dict["query_id"] = query_id - event_dict["iteration"] = iteration - return event_dict - - return inject - - shared_processors = [ - structlog.contextvars.merge_contextvars, - make_injector(query_id, iteration), - structlog.processors.add_log_level, - structlog.processors.CallsiteParameterAdder( - parameters=[ - structlog.processors.CallsiteParameter.PROCESS, - structlog.processors.CallsiteParameter.THREAD, - ], - ), - structlog.processors.StackInfoRenderer(), - structlog.dev.set_exc_info, - structlog.processors.TimeStamper(fmt="%Y-%m-%d %H:%M:%S.%f", utc=False), - ] - - # For logging to a file - json_renderer = structlog.processors.JSONRenderer() - - stream = io.StringIO() - json_file_handler = logging.StreamHandler(stream) - json_file_handler.setFormatter( - structlog.stdlib.ProcessorFormatter( - processor=json_renderer, - foreign_pre_chain=shared_processors, - ) - ) - - logging.basicConfig(level=logging.INFO, handlers=[json_file_handler]) - - structlog.configure( - processors=[ - *shared_processors, - structlog.stdlib.ProcessorFormatter.wrap_for_formatter, - ], - logger_factory=structlog.stdlib.LoggerFactory(), - wrapper_class=structlog.make_filtering_bound_logger(logging.INFO), - cache_logger_on_first_use=True, - ) - - -PDSDS_TABLE_NAMES: list[str] = [ - "call_center", - "catalog_page", - "catalog_returns", - "catalog_sales", - "customer", - "customer_address", - "customer_demographics", - "date_dim", - "household_demographics", - "income_band", - "inventory", - "item", - "promotion", - "reason", - "ship_mode", - "store", - "store_returns", - "store_sales", - "time_dim", - "warehouse", - "web_page", - "web_returns", - "web_sales", - "web_site", -] - -PDSH_TABLE_NAMES: list[str] = [ - "customer", - "lineitem", - "nation", - "orders", - "part", - "partsupp", - "region", - "supplier", -] - - -def _make_duckdb_config(run_config: RunConfig | None) -> dict[str, Any]: - """Build a DuckDB connection config dict from a RunConfig.""" - config: dict[str, Any] = { - "threads": run_config.duckdb_threads - if (run_config and run_config.duckdb_threads is not None) - else os.cpu_count(), - } - if run_config and run_config.duckdb_memory_limit is not None: - config["memory_limit"] = run_config.duckdb_memory_limit - if run_config and run_config.duckdb_temp_dir is not None: - config["temp_directory"] = run_config.duckdb_temp_dir - return config - - -def print_duckdb_plan( - q_id: int, - sql: str, - dataset_path: Path, - suffix: str, - query_set: str, - args: argparse.Namespace, - run_config: RunConfig | None = None, -) -> None: - """Print DuckDB query plan using EXPLAIN.""" - if duckdb is None: - raise ImportError(duckdb_err) - - if query_set == "pdsds": - tbl_names = PDSDS_TABLE_NAMES - else: - tbl_names = PDSH_TABLE_NAMES - - with duckdb.connect(config=_make_duckdb_config(run_config)) as conn: - for name in tbl_names: - pattern = f"{dataset_path}/{name}{suffix}" - conn.execute( - f"CREATE OR REPLACE VIEW {name} AS " - f"SELECT * FROM parquet_scan('{pattern}');" - ) - - if args.explain_logical and args.explain: - conn.execute("PRAGMA explain_output = 'all';") - elif args.explain_logical: - conn.execute("PRAGMA explain_output = 'optimized_only';") - else: - conn.execute("PRAGMA explain_output = 'physical_only';") - - print(f"\nDuckDB Query {q_id} - Plan\n") - - plan_rows = conn.execute(f"EXPLAIN {sql}").fetchall() - for _, line in plan_rows: - print(line) - - -def execute_duckdb_query( - query: str, - dataset_path: Path, - *, - suffix: str = ".parquet", - query_set: str = "pdsh", - run_config: RunConfig | None = None, -) -> pl.DataFrame: - """Execute a query with DuckDB.""" - if duckdb is None: - raise ImportError(duckdb_err) - if query_set == "pdsds": - tbl_names = PDSDS_TABLE_NAMES - else: - tbl_names = PDSH_TABLE_NAMES - with duckdb.connect(config=_make_duckdb_config(run_config)) as conn: - for name in tbl_names: - pattern = f"{dataset_path}/{name}{suffix}" - conn.execute( - f"CREATE OR REPLACE VIEW {name} AS " - f"SELECT * FROM parquet_scan('{pattern}');" - ) - return conn.execute(query).pl() - - -def run_duckdb(duckdb_queries_cls: Any, args: argparse.Namespace) -> None: - """Run the benchmark with DuckDB.""" - vars(args).update({"query_set": duckdb_queries_cls.name}) - run_config = RunConfig.from_args(args) - records: defaultdict[int, list[SuccessRecord | FailedRecord]] = defaultdict(list) - - for q_id in run_config.queries: - try: - get_q = getattr(duckdb_queries_cls, f"q{q_id}") - except AttributeError as err: - raise NotImplementedError(f"Query {q_id} not implemented.") from err - - sql = get_q(run_config) - - if args.explain or args.explain_logical: - print_duckdb_plan( - q_id=q_id, - sql=sql, - dataset_path=run_config.dataset_path, - suffix=run_config.suffix, - query_set=duckdb_queries_cls.name, - args=args, - run_config=run_config, - ) - - print(f"DuckDB Executing: {q_id}") - records[q_id] = [] - - for i in range(args.iterations): - if run_config.io_mode == "cold": - drop_file_page_cache_recursively(run_config.dataset_path) - t0 = time.time() - result = execute_duckdb_query( - sql, - run_config.dataset_path, - suffix=run_config.suffix, - query_set=duckdb_queries_cls.name, - run_config=run_config, - ) - t1 = time.time() - record = SuccessRecord(query=q_id, iteration=i, duration=t1 - t0) - if args.print_results: - print(result) - print(f"Query {q_id} - Iteration {i} finished in {record.duration:0.4f}s") - records[q_id].append(record) - if i == 0 and args.output_expected_directory is not None: - expected_dir = Path(args.output_expected_directory) - expected_dir.mkdir(parents=True, exist_ok=True) - result.write_parquet(expected_dir / f"q_{q_id:02d}.parquet") - - run_config = dataclasses.replace(run_config, records=dict(records)) - if args.summarize: - run_config.summarize() - - args.output.write(json.dumps(run_config.serialize(engine=None))) - args.output.write("\n") diff --git a/python/cudf_polars/cudf_polars/experimental/dask_registers.py b/python/cudf_polars/cudf_polars/experimental/dask_registers.py deleted file mode 100644 index 94334ccbc57..00000000000 --- a/python/cudf_polars/cudf_polars/experimental/dask_registers.py +++ /dev/null @@ -1,227 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. -# SPDX-License-Identifier: Apache-2.0 - -"""Dask function registrations such as serializers and dispatch implementations.""" - -from __future__ import annotations - -from typing import TYPE_CHECKING, Any, ClassVar, overload - -from dask.sizeof import sizeof as sizeof_dispatch -from dask.tokenize import normalize_token -from distributed.protocol import dask_deserialize, dask_serialize -from distributed.protocol.cuda import cuda_deserialize, cuda_serialize -from distributed.utils import log_errors - -import pylibcudf as plc -import rmm - -from cudf_polars.containers import Column, DataFrame, DataType -from cudf_polars.dsl.expressions.base import NamedExpr -from cudf_polars.utils.cuda_stream import get_dask_cuda_stream - -if TYPE_CHECKING: - from collections.abc import Hashable, Mapping - - from distributed import Client - - from rmm.pylibrmm.memory_resource import DeviceMemoryResource - from rmm.pylibrmm.stream import Stream - - from cudf_polars.typing import ColumnHeader, ColumnOptions, DataFrameHeader - - -__all__ = ["DaskRegisterManager", "register"] - - -class DaskRegisterManager: # pragma: no cover; Only used with Distributed cluster - """Manager to ensure ensure serializer is only registered once.""" - - _registered: bool = False - _client_run_executed: ClassVar[set[str]] = set() - - @classmethod - def register_once(cls) -> None: - """Register Dask/cudf-polars serializers in calling process.""" - if not cls._registered: - from cudf_polars.experimental.dask_registers import register - - register() - cls._registered = True - - @classmethod - def run_on_cluster(cls, client: Client) -> None: - """Run register on the workers and scheduler once.""" - if client.id not in cls._client_run_executed: - client.run(cls.register_once) - client.run_on_scheduler(cls.register_once) - cls._client_run_executed.add(client.id) - - -def register() -> None: - """Register dask serialization and dispatch functions.""" - - @overload - def serialize_column_or_frame( - x: DataFrame, - ) -> tuple[DataFrameHeader, list[memoryview]]: ... - - @overload - def serialize_column_or_frame( - x: Column, - ) -> tuple[ColumnHeader, list[memoryview]]: ... - - @cuda_serialize.register((Column, DataFrame)) - def serialize_column_or_frame( - x: DataFrame | Column, - ) -> tuple[ - DataFrameHeader | ColumnHeader, list[memoryview[bytes] | plc.gpumemoryview] - ]: - with log_errors(): - header, frames = x.serialize(stream=get_dask_cuda_stream()) - # Dask expect a list of frames - return header, list(frames) - - @cuda_deserialize.register(DataFrame) - def _( - header: DataFrameHeader, frames: tuple[memoryview[bytes], plc.gpumemoryview] - ) -> DataFrame: - with log_errors(): - metadata, gpudata = frames # TODO: check if this is a length-2 list... - return DataFrame.deserialize( - header, - (metadata, plc.gpumemoryview(gpudata)), - stream=get_dask_cuda_stream(), - ) - - @cuda_deserialize.register(Column) - def _( - header: ColumnHeader, frames: tuple[memoryview[bytes], plc.gpumemoryview] - ) -> Column: - with log_errors(): - metadata, gpudata = frames - return Column.deserialize( - header, - (metadata, plc.gpumemoryview(gpudata)), - stream=get_dask_cuda_stream(), - ) - - @overload - def dask_serialize_column_or_frame( - x: DataFrame, - ) -> tuple[DataFrameHeader, tuple[memoryview[bytes], memoryview[bytes]]]: ... - - @overload - def dask_serialize_column_or_frame( - x: Column, - ) -> tuple[ColumnHeader, tuple[memoryview[bytes], memoryview[bytes]]]: ... - - @dask_serialize.register(Column) - def dask_serialize_column_or_frame( - x: DataFrame | Column, - ) -> tuple[ - DataFrameHeader | ColumnHeader, tuple[memoryview[bytes], memoryview[bytes]] - ]: - stream = get_dask_cuda_stream() - with log_errors(): - header, (metadata, gpudata) = x.serialize(stream=stream) - - # For robustness, we check that the gpu data is contiguous - cai = gpudata.__cuda_array_interface__ - assert len(cai["shape"]) == 1 - assert cai["strides"] is None or cai["strides"] == (1,) - assert cai["typestr"] == "|u1" - nbytes = cai["shape"][0] - - # Copy the gpudata to host memory - gpudata_on_host: memoryview[bytes] = memoryview( - rmm.DeviceBuffer(ptr=gpudata.ptr, size=nbytes).copy_to_host() - ) - return header, (metadata, gpudata_on_host) - - @dask_deserialize.register(Column) - def _(header: ColumnHeader, frames: tuple[memoryview[bytes], memoryview]) -> Column: - with log_errors(): - assert len(frames) == 2 - # Copy the second frame (the gpudata in host memory) back to the gpu - new_frames = ( - frames[0], - plc.gpumemoryview(rmm.DeviceBuffer.to_device(frames[1])), - ) - return Column.deserialize(header, new_frames, stream=get_dask_cuda_stream()) - - @dask_serialize.register(DataFrame) - def _( - x: DataFrame, context: Mapping[str, Any] | None = None - ) -> tuple[DataFrameHeader, tuple[memoryview[bytes], memoryview[bytes]]]: - # Do regular serialization if no staging buffer is provided. - if context is None or "staging_device_buffer" not in context: - return dask_serialize_column_or_frame(x) - - # If a staging buffer is provided, we use `ChunkedPack` to - # serialize the dataframe using the provided staging buffer. - with log_errors(): - # Keyword arguments for `Column.__init__`. - columns_kwargs: list[ColumnOptions] = [ - col.serialize_ctor_kwargs() for col in x.columns - ] - header: DataFrameHeader = { - "columns_kwargs": columns_kwargs, - "frame_count": 2, - } - if "stream" not in context: - raise ValueError( - "context: stream must be given when staging_device_buffer is" - ) - if "device_mr" not in context: - raise ValueError( - "context: device_mr must be given when staging_device_buffer is" - ) - stream: Stream = context["stream"] - device_mr: DeviceMemoryResource = context["device_mr"] - buf: rmm.DeviceBuffer = context["staging_device_buffer"] - frame = plc.contiguous_split.ChunkedPack.create( - x.table, buf.nbytes, stream, device_mr - ).pack_to_host(buf) - return header, frame - - @dask_deserialize.register(DataFrame) - def _( - header: DataFrameHeader, frames: tuple[memoryview[bytes], memoryview] - ) -> DataFrame: - with log_errors(): - assert len(frames) == 2 - # Copy the second frame (the gpudata in host memory) back to the gpu - new_frames = ( - frames[0], - plc.gpumemoryview(rmm.DeviceBuffer.to_device(frames[1])), - ) - return DataFrame.deserialize( - header, new_frames, stream=get_dask_cuda_stream() - ) - - @sizeof_dispatch.register(Column) - def _(x: Column) -> int: - """The total size of the device buffers used by the DataFrame or Column.""" - return x.obj.device_buffer_size() - - @sizeof_dispatch.register(DataFrame) - def _(x: DataFrame) -> int: - """The total size of the device buffers used by the DataFrame or Column.""" - return sum(c.obj.device_buffer_size() for c in x.columns) - - # Register rapidsmpf serializer if it's installed. - try: - from rapidsmpf.integrations.dask.spilling import register_dask_serialize - - register_dask_serialize() # pragma: no cover; rapidsmpf dependency not included yet - except ImportError: # pragma: no cover - pass - - # Register the tokenizer for NamedExpr and DataType. This is a performance - # optimization that speeds up tokenization for the most common types seen in - # the Dask task graph. - @normalize_token.register(NamedExpr) - @normalize_token.register(DataType) - def _(x: NamedExpr | DataType) -> Hashable: - return hash(x) diff --git a/python/cudf_polars/cudf_polars/experimental/io.py b/python/cudf_polars/cudf_polars/experimental/io.py index 804bcfe2040..f45baa054dd 100644 --- a/python/cudf_polars/cudf_polars/experimental/io.py +++ b/python/cudf_polars/cudf_polars/experimental/io.py @@ -410,17 +410,17 @@ def _( if ( Path(ir.path).exists() and executor_options.sink_to_directory - and executor_options.cluster in (Cluster.SINGLE, Cluster.DISTRIBUTED) + and executor_options.cluster == Cluster.SINGLE ): - # This lowering-time check can't be performed with the new spmd / ray / dask + # This lowering-time check can't be performed with the spmd / ray / dask # clusters, which lower on each worker independently. There's a race condition # between each worker performing this check that the path doesn't yet exist, # and the sink operation creating the directory at the start of execution. raise NotImplementedError( - f"Trying to sink to an existing directory: {ir.path}." + f"Trying to sink to an existing directory: {ir.path}. " "Writing to an existing path is not supported when sinking " - "to a directory. If you are using the 'distributed' scheduler, " - "please remove the target directory before calling 'collect'. " + "to a directory. Please remove the target directory before " + "calling 'collect'." ) sink_to_directory = executor_options.sink_to_directory diff --git a/python/cudf_polars/cudf_polars/experimental/parallel.py b/python/cudf_polars/cudf_polars/experimental/parallel.py index e9104b5e074..108d7822d60 100644 --- a/python/cudf_polars/cudf_polars/experimental/parallel.py +++ b/python/cudf_polars/cudf_polars/experimental/parallel.py @@ -109,7 +109,6 @@ def lower_ir_graph( def task_graph( ir: IR, partition_info: MutableMapping[IR, PartitionInfo], - config_options: ConfigOptions, ) -> tuple[MutableMapping[Any, Any], str | tuple[str, int]]: """ Construct a task graph for evaluation of an IR graph. @@ -121,16 +120,13 @@ def task_graph( partition_info A mapping from all unique IR nodes to the associated partitioning information. - config_options - GPUEngine configuration options. - context - Runtime context for IR node execution. Returns ------- graph - A Dask-compatible task graph for the entire - IR graph with root `ir`. + A task graph for the entire IR graph with root `ir`, + in dict-of-tuples form consumed by + :func:`~cudf_polars.experimental.scheduler.synchronous_scheduler`. Notes ----- @@ -138,9 +134,6 @@ def task_graph( graph with root `ir`, and extracts the tasks for each node with :func:`generate_ir_tasks`. - The output is passed into :func:`post_process_task_graph` to - add any additional processing that is specific to the executor. - See Also -------- generate_ir_tasks @@ -167,67 +160,9 @@ def task_graph( else: key = (key_name, 0) - graph = post_process_task_graph(graph, key, config_options) return graph, key -# The true type signature for get_scheduler() needs an overload. Not worth it. - - -def get_scheduler(config_options: ConfigOptions[StreamingExecutor]) -> Any: - """Get appropriate task scheduler.""" - cluster = config_options.executor.cluster - - if ( - cluster == "distributed" - ): # pragma: no cover; block depends on executor type and Distributed cluster - from distributed import get_client - - from cudf_polars.experimental.dask_registers import DaskRegisterManager - - client = get_client() - DaskRegisterManager.register_once() - DaskRegisterManager.run_on_cluster(client) - return client.get - elif cluster == "single": - from cudf_polars.experimental.scheduler import synchronous_scheduler - - return synchronous_scheduler - else: # pragma: no cover - raise ValueError(f"{cluster} not a supported cluster option.") - - -def post_process_task_graph( - graph: MutableMapping[Any, Any], - key: str | tuple[str, int], - config_options: ConfigOptions[StreamingExecutor], -) -> MutableMapping[Any, Any]: - """ - Post-process the task graph. - - Parameters - ---------- - graph - Task graph to post-process. - key - Output key for the graph. - config_options - GPUEngine configuration options. - - Returns - ------- - graph - A Dask-compatible task graph. - """ - if config_options.executor.rapidsmpf_spill: # pragma: no cover - from cudf_polars.experimental.spilling import wrap_dataframe_in_spillable - - return wrap_dataframe_in_spillable( - graph, ignore_key=key, config_options=config_options - ) - return graph - - def evaluate_rapidsmpf( ir: IR, config_options: ConfigOptions[StreamingExecutor], @@ -280,12 +215,14 @@ def evaluate_streaming( return evaluate_rapidsmpf(ir, config_options) else: # Using the default task engine. + from cudf_polars.experimental.scheduler import synchronous_scheduler + stats = collect_statistics(ir, config_options) ir, partition_info = lower_ir_graph(ir, config_options, stats) - graph, key = task_graph(ir, partition_info, config_options) + graph, key = task_graph(ir, partition_info) - return get_scheduler(config_options)(graph, key).to_polars() + return synchronous_scheduler(graph, key).to_polars() @generate_ir_tasks.register(IR) diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/core.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/core.py index 57b916a6d9f..478c0a33beb 100644 --- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/core.py +++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/core.py @@ -45,7 +45,6 @@ Union, ) from cudf_polars.dsl.traversal import CachingVisitor, traversal -from cudf_polars.experimental.base import PartitionInfo from cudf_polars.experimental.parallel import lower_ir_graph from cudf_polars.experimental.rapidsmpf.collectives import ReserveOpIDs from cudf_polars.experimental.rapidsmpf.dispatch import FanoutInfo @@ -55,7 +54,6 @@ ) from cudf_polars.experimental.rapidsmpf.tracing import log_query_plan from cudf_polars.experimental.rapidsmpf.utils import empty_table_chunk -from cudf_polars.experimental.repartition import Repartition from cudf_polars.experimental.statistics import collect_statistics from cudf_polars.utils.config import CUDAStreamPoolConfig @@ -70,7 +68,7 @@ import polars as pl from cudf_polars.dsl.ir import IR - from cudf_polars.experimental.base import StatsCollector + from cudf_polars.experimental.base import PartitionInfo, StatsCollector from cudf_polars.experimental.parallel import ConfigOptions from cudf_polars.experimental.rapidsmpf.dispatch import ( GenState, @@ -109,33 +107,6 @@ def evaluate_logical_plan( cudf_polars_query_id=str(query_id), ): match config_options.executor.cluster: - case "distributed": # pragma: no cover; block depends on executor type and Distributed cluster - # Legacy distributed execution: lower on the client, - # ship the lowered plan to workers. - from cudf_polars.experimental.rapidsmpf.dask import ( - evaluate_pipeline_dask, - ) - - stats = collect_statistics(ir, config_options) - ir, partition_info = lower_ir_graph(ir, config_options, stats) - - # Dask may return chunks in arbitrary order. - if not isinstance(ir, Repartition): - ir = Repartition(ir.schema, ir) - partition_info[ir] = PartitionInfo(count=1) - - with ReserveOpIDs(ir, config_options) as collective_id_map: - log_query_plan(ir, config_options) - result, metadata_collector = evaluate_pipeline_dask( - evaluate_pipeline, - ir, - partition_info, - config_options, - stats, - collective_id_map, - collect_metadata=collect_metadata, - query_id=query_id, - ) case "spmd": from cudf_polars.experimental.rapidsmpf.frontend.spmd import ( evaluate_pipeline_spmd_mode, diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/dask.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/dask.py deleted file mode 100644 index f13c488d5ea..00000000000 --- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/dask.py +++ /dev/null @@ -1,194 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. -# SPDX-License-Identifier: Apache-2.0 -"""Dask-based execution with the streaming RapidsMPF runtime.""" - -from __future__ import annotations - -from typing import TYPE_CHECKING, Any, Protocol - -from distributed import get_client -from rapidsmpf.config import Options, get_environment_variables -from rapidsmpf.integrations.dask import bootstrap_dask_cluster, get_worker_context -from rapidsmpf.streaming.core.context import Context - -import polars as pl - -import cudf_polars.dsl.tracing -from cudf_polars.experimental.dask_registers import DaskRegisterManager - -if TYPE_CHECKING: - import uuid - from collections.abc import MutableMapping - - from distributed import Client - from rapidsmpf.communicator.communicator import Communicator - from rapidsmpf.streaming.cudf.channel_metadata import ChannelMetadata - - from cudf_polars.dsl.ir import IR - from cudf_polars.experimental.base import PartitionInfo, StatsCollector - from cudf_polars.experimental.parallel import ConfigOptions, StreamingExecutor - - -class EvaluatePipelineCallback(Protocol): - """Protocol for the evaluate_pipeline callback.""" - - def __call__( - self, - ir: IR, - partition_info: MutableMapping[IR, PartitionInfo], - config_options: ConfigOptions[StreamingExecutor], - stats: StatsCollector, - collective_id_map: dict[IR, list[int]], - comm: Communicator, - rmpf_context: Context | None = None, - *, - collect_metadata: bool = False, - query_id: uuid.UUID, - ) -> tuple[pl.DataFrame, list[ChannelMetadata] | None]: - """Evaluate a pipeline and return the result DataFrame and metadata.""" - ... - - -def get_dask_client() -> Client: - """Get a distributed Dask client.""" - client = get_client() - DaskRegisterManager.register_once() - DaskRegisterManager.run_on_cluster(client) - return client - - -def evaluate_pipeline_dask( - callback: EvaluatePipelineCallback, - ir: IR, - partition_info: MutableMapping[IR, PartitionInfo], - config_options: ConfigOptions, - stats: StatsCollector, - collective_id_map: dict[IR, list[int]], - *, - collect_metadata: bool = False, - query_id: uuid.UUID, -) -> tuple[pl.DataFrame, list[ChannelMetadata] | None]: - """ - Evaluate a RapidsMPF streaming pipeline on a Dask cluster. - - Parameters - ---------- - callback - The callback function to evaluate the pipeline. - ir - The IR node. - partition_info - The partition information. - config_options - The configuration options. - stats - The statistics collector. - collective_id_map - Mapping from Shuffle/Repartition/Join IR nodes to reserved collective IDs. - collect_metadata - Whether to collect metadata. - query_id - A unique identifier for the query. - - Returns - ------- - The output DataFrame and metadata collector. - """ - client = get_dask_client() - - # Make sure the cluster is bootstrapped. - # This is a no-op if the cluster is already bootstrapped. - # TODO: We can apply configuration options here. However, these - # options will be ignored if the cluster is already bootstrapped. - bootstrap_dask_cluster(client) - - result = client.run( - _evaluate_pipeline_dask, - callback, - ir, - partition_info, - config_options, - stats, - collective_id_map, - collect_metadata=collect_metadata, - query_id=query_id, - ) - dfs: list[pl.DataFrame] = [] - metadata_collector: list[ChannelMetadata] = [] - for df, md in result.values(): - dfs.append(df) - if md is not None: - metadata_collector.extend(md) - - return pl.concat(dfs), metadata_collector or None - - -def _evaluate_pipeline_dask( - callback: EvaluatePipelineCallback, - ir: IR, - partition_info: MutableMapping[IR, PartitionInfo], - config_options: ConfigOptions[StreamingExecutor], - stats: StatsCollector, - collective_id_map: dict[IR, list[int]], - dask_worker: Any = None, - *, - collect_metadata: bool = False, - query_id: uuid.UUID, -) -> tuple[pl.DataFrame, list[ChannelMetadata] | None]: - """ - Build and evaluate a RapidsMPF streaming pipeline. - - Parameters - ---------- - callback - The callback function to evaluate the pipeline. - ir - The IR node. - partition_info - The partition information. - config_options - The configuration options. - stats - The statistics collector. - collective_id_map - Mapping from Shuffle/Repartition/Join IR nodes to reserved collective IDs. - dask_worker - Dask worker reference. - This kwarg is automatically populated by Dask - when evaluate_pipeline is called with `client.run`. - collect_metadata - Whether to collect metadata. - query_id - A unique identifier for the query. - - Returns - ------- - The output DataFrame and metadata collector. - """ - assert dask_worker is not None, "Dask worker must be provided" - - # NOTE: The Dask-CUDA cluster must be bootstrapped - # ahead of time using bootstrap_dask_cluster - # (rapidsmpf.integrations.dask.bootstrap_dask_cluster) - options = Options( - {"num_streaming_threads": str(max(config_options.executor.max_io_threads, 1))} - | get_environment_variables() - ) - dask_context = get_worker_context(dask_worker) - assert dask_context.comm is not None - with ( - Context(dask_context.comm.logger, dask_context.br, options) as rmpf_context, - cudf_polars.dsl.tracing.bound_contextvars(query_id=str(query_id)), - ): - # IDs are already reserved by the caller, just pass them through - return callback( - ir, - partition_info, - config_options, - stats, - collective_id_map, - dask_context.comm, - rmpf_context, - collect_metadata=collect_metadata, - query_id=query_id, - ) diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/io.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/io.py index 985567621d6..d5005441910 100644 --- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/io.py +++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/io.py @@ -185,7 +185,7 @@ async def dataframescan_node( distributed_scan If ``True``, the DataFrame is treated as a shared object and divided across workers so each rank reads a disjoint subset. This is normally - used in ``Cluster.DISTRIBUTED`` mode. + used in ``Cluster.RAY`` and ``Cluster.DASK`` modes. If ``False``, the DataFrame is treated as rank-local and each rank scans its local DataFrame in full. This is normally used in diff --git a/python/cudf_polars/cudf_polars/experimental/shuffle.py b/python/cudf_polars/cudf_polars/experimental/shuffle.py index efb134abf7c..8e24dd83fe6 100644 --- a/python/cudf_polars/cudf_polars/experimental/shuffle.py +++ b/python/cudf_polars/cudf_polars/experimental/shuffle.py @@ -6,7 +6,7 @@ import operator from functools import partial -from typing import TYPE_CHECKING, Any, Concatenate, Literal, TypeVar, TypedDict +from typing import TYPE_CHECKING, Any, Concatenate, TypeVar, TypedDict import pylibcudf as plc from rmm.pylibrmm.stream import DEFAULT_STREAM @@ -42,7 +42,6 @@ class ShuffleOptions(TypedDict): on: Sequence[str] column_names: Sequence[str] dtypes: Sequence[DataType] - cluster_kind: Literal["dask", "single"] # Experimental rapidsmpf shuffler integration @@ -61,12 +60,7 @@ def insert_partition( ) -> None: """Add cudf-polars DataFrame chunks to an RMP shuffler.""" from rapidsmpf.integrations.cudf.partition import partition_and_pack - - if options["cluster_kind"] == "dask": - from rapidsmpf.integrations.dask import get_worker_context - - else: - from rapidsmpf.integrations.single import get_worker_context + from rapidsmpf.integrations.single import get_worker_context context = get_worker_context() @@ -95,12 +89,7 @@ def extract_partition( unpack_and_concat, unspill_partitions, ) - - if options["cluster_kind"] == "dask": - from rapidsmpf.integrations.dask import get_worker_context - - else: - from rapidsmpf.integrations.single import get_worker_context + from rapidsmpf.integrations.single import get_worker_context context = get_worker_context() @@ -329,45 +318,27 @@ def _( shuffle_method = ir.shuffle_method # Try using rapidsmpf shuffler if we have "simple" shuffle - # keys, and the "shuffle_method" config is set to "rapidsmpf" + # keys, and the "shuffle_method" config is set to "rapidsmpf-single". _keys: list[Col] - if shuffle_method in ("rapidsmpf", "rapidsmpf-single") and len( + if shuffle_method == "rapidsmpf-single" and len( _keys := [ne.value for ne in ir.keys if isinstance(ne.value, Col)] ) == len(ir.keys): # pragma: no cover - cluster_kind: Literal["dask", "single"] - if shuffle_method == "rapidsmpf-single": - from rapidsmpf.integrations.single import rapidsmpf_shuffle_graph - - cluster_kind = "single" - else: - from rapidsmpf.integrations.dask import rapidsmpf_shuffle_graph - - cluster_kind = "dask" + from rapidsmpf.integrations.single import rapidsmpf_shuffle_graph shuffle_on = [k.name for k in _keys] - try: - return rapidsmpf_shuffle_graph( - get_key_name(ir.children[0]), - get_key_name(ir), - partition_info[ir.children[0]].count, - partition_info[ir].count, - RMPFIntegration, - { - "on": shuffle_on, - "column_names": list(ir.schema.keys()), - "dtypes": list(ir.schema.values()), - "cluster_kind": cluster_kind, - }, - ) - except ValueError as err: - # ValueError: rapidsmpf couldn't find a distributed client - if shuffle_method == "rapidsmpf": - # Only raise an error if the user specifically - # set the shuffle method to "rapidsmpf" - raise ValueError( - "The current Dask cluster does not support rapidsmpf shuffling." - ) from err + return rapidsmpf_shuffle_graph( + get_key_name(ir.children[0]), + get_key_name(ir), + partition_info[ir.children[0]].count, + partition_info[ir].count, + RMPFIntegration, + { + "on": shuffle_on, + "column_names": list(ir.schema.keys()), + "dtypes": list(ir.schema.values()), + }, + ) # Simple task-based fall-back return partial(_simple_shuffle_graph, context=context)( diff --git a/python/cudf_polars/cudf_polars/experimental/sort.py b/python/cudf_polars/cudf_polars/experimental/sort.py index 47105add263..6800fb4ab74 100644 --- a/python/cudf_polars/cudf_polars/experimental/sort.py +++ b/python/cudf_polars/cudf_polars/experimental/sort.py @@ -5,7 +5,7 @@ from __future__ import annotations from functools import partial -from typing import TYPE_CHECKING, Any, Literal, TypedDict +from typing import TYPE_CHECKING, Any, TypedDict import polars as pl @@ -295,7 +295,6 @@ class SortedShuffleOptions(TypedDict): null_order: Sequence[plc.types.NullOrder] column_names: Sequence[str] column_dtypes: Sequence[DataType] - cluster_kind: Literal["dask", "single"] # Experimental rapidsmpf shuffler integration @@ -313,12 +312,7 @@ def insert_partition( ) -> None: """Add cudf-polars DataFrame chunks to an RMP shuffler.""" from rapidsmpf.integrations.cudf.partition import split_and_pack - - if options["cluster_kind"] == "dask": - from rapidsmpf.integrations.dask import get_worker_context - - else: - from rapidsmpf.integrations.single import get_worker_context + from rapidsmpf.integrations.single import get_worker_context context = get_worker_context() @@ -360,12 +354,7 @@ def extract_partition( unpack_and_concat, unspill_partitions, ) - - if options["cluster_kind"] == "dask": - from rapidsmpf.integrations.dask import get_worker_context - - else: - from rapidsmpf.integrations.single import get_worker_context + from rapidsmpf.integrations.single import get_worker_context context = get_worker_context() @@ -646,41 +635,23 @@ def _( } # Try using rapidsmpf shuffler if we have "simple" shuffle - # keys, and the "shuffle_method" config is set to "rapidsmpf" + # keys, and the "shuffle_method" config is set to "rapidsmpf-single". shuffle_method = ir.shuffle_method - if shuffle_method in ("rapidsmpf", "rapidsmpf-single"): # pragma: no cover - try: - if shuffle_method == "rapidsmpf-single": - from rapidsmpf.integrations.single import rapidsmpf_shuffle_graph - - options["cluster_kind"] = "single" - else: - from rapidsmpf.integrations.dask import rapidsmpf_shuffle_graph - - options["cluster_kind"] = "dask" - graph.update( - rapidsmpf_shuffle_graph( - get_key_name(child), - get_key_name(ir), - partition_info[child].count, - partition_info[ir].count, - RMPFIntegrationSortedShuffle, - options, - sort_boundaries_name, - ) + if shuffle_method == "rapidsmpf-single": # pragma: no cover + from rapidsmpf.integrations.single import rapidsmpf_shuffle_graph + + graph.update( + rapidsmpf_shuffle_graph( + get_key_name(child), + get_key_name(ir), + partition_info[child].count, + partition_info[ir].count, + RMPFIntegrationSortedShuffle, + options, + sort_boundaries_name, ) - except (ImportError, ValueError) as err: - # ImportError: rapidsmpf is not installed - # ValueError: rapidsmpf couldn't find a distributed client - if shuffle_method == "rapidsmpf": # pragma: no cover - # Only raise an error if the user specifically - # set the shuffle method to "rapidsmpf" - raise ValueError( - "Rapidsmpf is not installed correctly or the current " - "Dask cluster does not support rapidsmpf shuffling." - ) from err - else: - return graph + ) + return graph # Simple task-based fall-back graph.update( diff --git a/python/cudf_polars/cudf_polars/experimental/spilling.py b/python/cudf_polars/cudf_polars/experimental/spilling.py deleted file mode 100644 index 6a5f73a68ed..00000000000 --- a/python/cudf_polars/cudf_polars/experimental/spilling.py +++ /dev/null @@ -1,148 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. -# SPDX-License-Identifier: Apache-2.0 -"""Spilling in multi-partition Dask execution using RAPIDSMPF.""" - -from __future__ import annotations - -from typing import TYPE_CHECKING, Any - -from dask.sizeof import sizeof -from distributed import get_worker -from rapidsmpf.integrations.dask.core import get_worker_context -from rapidsmpf.integrations.dask.spilling import SpillableWrapper -from rapidsmpf.memory.buffer import MemoryType - -from cudf_polars.containers import DataFrame - -if TYPE_CHECKING: - from collections.abc import Callable, MutableMapping - from typing import Any - - from cudf_polars.utils.config import ConfigOptions, StreamingExecutor - - -def wrap_arg(obj: Any) -> Any: - """ - Make `obj` spillable if it is a DataFrame. - - Parameters - ---------- - obj - The object to be wrapped (if it is a DataFrame). - - Returns - ------- - A SpillableWrapper if obj is a DataFrame, otherwise the original object. - """ - if isinstance(obj, DataFrame): - return SpillableWrapper(on_device=obj) - return obj - - -def unwrap_arg(obj: Any) -> Any: - """ - Unwraps a SpillableWrapper to retrieve the original object. - - Parameters - ---------- - obj - The object to be unwrapped. - - Returns - ------- - The unwrapped obj is a SpillableWrapper, otherwise the original object. - """ - if isinstance(obj, SpillableWrapper): - return obj.unspill() - return obj - - -def wrap_func_spillable( - func: Callable, - *, - make_func_output_spillable: bool, - target_partition_size: int, -) -> Callable: - """ - Wraps a function to handle spillable DataFrames. - - Parameters - ---------- - func - The function to be wrapped. - make_func_output_spillable - Whether to wrap the function's output in a SpillableWrapper. - target_partition_size - Target byte size for IO tasks. - - Returns - ------- - A wrapped function that processes spillable DataFrames. - """ - - def wrapper(*args: Any) -> Any: - # Make headroom before executing the task - headroom = 0 - probable_io_task = True - for arg in args: - if isinstance(arg, SpillableWrapper): - if arg.mem_type() == MemoryType.HOST: - headroom += sizeof(arg._on_host) - probable_io_task = False - if probable_io_task: - # Likely an IO task - Assume we need target_partition_size - headroom = target_partition_size - if headroom > 128_000_000: # Don't waste time on smaller data - ctx = get_worker_context(get_worker()) - with ctx.lock: - ctx.br.spill_manager.spill_to_make_headroom(headroom=headroom) - - ret: Any = func(*(unwrap_arg(arg) for arg in args)) - if make_func_output_spillable: - ret = wrap_arg(ret) - return ret - - return wrapper - - -def wrap_dataframe_in_spillable( - graph: MutableMapping[Any, Any], - ignore_key: str | tuple[str, int], - config_options: ConfigOptions[StreamingExecutor], -) -> MutableMapping[Any, Any]: - """ - Wraps functions within a task graph to handle spillable DataFrames. - - Only supports flat task graphs where each DataFrame can be found in the - outermost level. Currently, this is true for all cudf-polars task graphs. - - Parameters - ---------- - graph - Task graph. - ignore_key - The key to ignore when wrapping function, typically the key of the - output node. - config_options - GPUEngine configuration options. - - Returns - ------- - A new task graph with wrapped functions. - """ - target_partition_size = config_options.executor.target_partition_size - - ret = {} - for key, task in graph.items(): - assert isinstance(task, tuple) - ret[key] = tuple( - wrap_func_spillable( - a, - make_func_output_spillable=key != ignore_key, - target_partition_size=target_partition_size, - ) - if callable(a) - else a - for a in task - ) - return ret diff --git a/python/cudf_polars/cudf_polars/utils/config.py b/python/cudf_polars/cudf_polars/utils/config.py index 84418e2dd06..a6bbd73929b 100644 --- a/python/cudf_polars/cudf_polars/utils/config.py +++ b/python/cudf_polars/cudf_polars/utils/config.py @@ -121,15 +121,6 @@ def rapidsmpf_single_available() -> bool: # pragma: no cover return False -@functools.cache -def rapidsmpf_distributed_available() -> bool: # pragma: no cover - """Query whether rapidsmpf is available as a distributed shuffle method.""" - try: - return importlib.util.find_spec("rapidsmpf.integrations.dask") is not None - except (ImportError, ValueError): - return False - - class StreamingFallbackMode(enum.StrEnum): """ How the streaming executor handles operations that don't support multiple partitions. @@ -165,15 +156,17 @@ class Cluster(enum.StrEnum): """ The cluster configuration for the streaming executor. - * ``Cluster.SINGLE`` : Single-GPU execution. Currently uses a zero-dependency, + * ``Cluster.SINGLE`` : Single-GPU execution. Uses a zero-dependency, synchronous, single-threaded task scheduler. - * ``Cluster.DISTRIBUTED`` : Multi-GPU distributed execution. Currently - uses a Dask-based distributed scheduler and requires an - active Dask cluster. + * ``Cluster.SPMD`` : Multi-GPU SPMD execution via the rapidsmpf streaming + runtime. + * ``Cluster.RAY`` : Multi-GPU execution via Ray actors and the rapidsmpf + streaming runtime. + * ``Cluster.DASK`` : Multi-GPU execution via Dask workers and the rapidsmpf + streaming runtime. """ SINGLE = "single" - DISTRIBUTED = "distributed" SPMD = "spmd" RAY = "ray" DASK = "dask" @@ -188,8 +181,7 @@ class ShuffleMethod(enum.StrEnum): * ``ShuffleMethod._RAPIDSMPF_SINGLE`` : Use the single-process rapidsmpf shuffler. With :class:`cudf_polars.utils.config.StreamingExecutor`, the default of ``None`` - will attempt to use ``ShuffleMethod.RAPIDSMPF`` for a distributed cluster, - but will fall back to ``ShuffleMethod.TASKS`` if rapidsmpf is not installed. + resolves to ``ShuffleMethod.TASKS``. The user should **not** specify ``ShuffleMethod._RAPIDSMPF_SINGLE`` directly. A setting of ``ShuffleMethod.RAPIDSMPF`` will be converted to the single-process @@ -357,16 +349,14 @@ def default_broadcast_join_limit(cluster: str, runtime: str) -> int: # default_target_partition_size is used to set the # target partition size (i.e. 5x the 2.5% default). return min(5, int(max(1, (device_size * 0.125) // 1e9))) - elif ( - cluster == "single" - and _env_get_int("POLARS_GPU_ENABLE_CUDA_MANAGED_MEMORY", default=1) == 1 - ): - # We can lean on UVM to support most broadcast joins. + elif _env_get_int("POLARS_GPU_ENABLE_CUDA_MANAGED_MEMORY", default=1) == 1: + # The "tasks" runtime always runs single-GPU; we can lean on UVM + # to support most broadcast joins. return 32 else: - # Extra-conservative default for the "tasks" runtime. - # We cannot spill outside a rapidsmpf shuffle within - # this runtime. So, shuffling is usually preferred. + # Extra-conservative default for the "tasks" runtime without UVM. + # We cannot spill outside a rapidsmpf shuffle within this runtime, + # so shuffling is usually preferred. return 2 @@ -616,11 +606,10 @@ class StreamingExecutor: The cluster configuration for the streaming executor. ``Cluster.SINGLE`` by default. - This setting applies to both task-based and rapidsmpf execution modes: - * ``Cluster.SINGLE``: Single-GPU execution - * ``Cluster.DISTRIBUTED``: Multi-GPU distributed execution (requires - an active Dask cluster) + * ``Cluster.SPMD``: Multi-GPU SPMD execution (rapidsmpf runtime) + * ``Cluster.RAY``: Multi-GPU Ray execution (rapidsmpf runtime) + * ``Cluster.DASK``: Multi-GPU Dask execution (rapidsmpf runtime) fallback_mode How to handle errors when the GPU engine fails to execute a query. @@ -653,7 +642,7 @@ class StreamingExecutor: By default, cudf-polars uses a target partition size that's a fraction of the device memory, where the fraction depends on the cluster and runtime: - - distributed cluster or rapidsmpf runtime: 1/40th of the device memory + - rapidsmpf runtime: 1/40th of the device memory - single cluster and tasks runtime: 1/16th of the device memory The pynvml library is used to query the total device memory on the first @@ -677,20 +666,16 @@ class StreamingExecutor: on the cluster and runtime. shuffle_method The method to use for shuffling data between workers. Defaults to - 'rapidsmpf' for distributed cluster if available (otherwise 'tasks'), - and 'tasks' for single-GPU cluster. - rapidsmpf_spill - Whether to wrap task arguments and output in objects that are - spillable by 'rapidsmpf'. + 'tasks' for the single-GPU cluster. client_device_threshold Threshold for spilling data from device memory in rapidsmpf. Default is 50% of device memory on the client process. This argument is only used by the "rapidsmpf" runtime. sink_to_directory Whether multi-partition sink operations write to a directory rather - than a single file. For the distributed, spmd, ray, and dask clusters - this is always True; setting it to False raises a ValueError. - Defaults to False for the single-GPU cluster. + than a single file. For the spmd, ray, and dask clusters this is + always True; setting it to False raises a ValueError. Defaults to + False for the single-GPU cluster. dynamic_planning Options controlling dynamic shuffle planning. See :class:`~cudf_polars.utils.config.DynamicPlanningOptions` for more. @@ -709,9 +694,7 @@ class StreamingExecutor: Notes ----- The streaming executor does not currently support profiling a query via - the ``.profile()`` method. We recommend using nsys to profile queries - with single-GPU execution and Dask's built-in profiling tools - with distributed execution. + the ``.profile()`` method. We recommend using nsys to profile queries. """ _env_prefix = "CUDF_POLARS__EXECUTOR" @@ -770,11 +753,6 @@ class StreamingExecutor: default=ShuffleMethod.TASKS, ) ) - rapidsmpf_spill: bool = dataclasses.field( - default_factory=_make_default_factory( - f"{_env_prefix}__RAPIDSMPF_SPILL", _bool_converter, default=False - ) - ) client_device_threshold: float = dataclasses.field( default_factory=_make_default_factory( f"{_env_prefix}__CLIENT_DEVICE_THRESHOLD", float, default=0.5 @@ -820,23 +798,14 @@ def __post_init__(self) -> None: # noqa: D105 # Handle shuffle_method defaults for streaming executor if self.shuffle_method is None: - if self.cluster == "distributed" and rapidsmpf_distributed_available(): - # For distributed cluster, prefer rapidsmpf if available - object.__setattr__(self, "shuffle_method", "rapidsmpf") - else: - # Otherwise, use task-based shuffle for now. - # TODO: Evaluate single-process shuffle by default. - object.__setattr__(self, "shuffle_method", "tasks") + # Use task-based shuffle by default. + # TODO: Evaluate single-process shuffle by default. + object.__setattr__(self, "shuffle_method", "tasks") elif self.shuffle_method == "rapidsmpf-single": # The user should NOT specify "rapidsmpf-single" directly. raise ValueError("rapidsmpf-single is not a supported shuffle method.") elif self.shuffle_method == "rapidsmpf": - # Check that we have rapidsmpf installed - if self.cluster == "distributed" and not rapidsmpf_distributed_available(): - raise ValueError( - "rapidsmpf shuffle method requested, but rapidsmpf.integrations.dask is not installed." - ) - elif self.cluster == "single" and not rapidsmpf_single_available(): + if self.cluster == "single" and not rapidsmpf_single_available(): raise ValueError( "rapidsmpf shuffle method requested, but rapidsmpf is not installed." ) @@ -872,7 +841,7 @@ def __post_init__(self) -> None: # noqa: D105 DynamicPlanningOptions(**self.dynamic_planning), ) - if self.cluster in ("distributed", "spmd", "ray", "dask"): + if self.cluster in ("spmd", "ray", "dask"): if self.sink_to_directory is False: raise ValueError( f"The {self.cluster} cluster requires sink_to_directory=True" @@ -892,8 +861,6 @@ def __post_init__(self) -> None: # noqa: D105 raise TypeError("groupby_n_ary must be an int") if not isinstance(self.broadcast_join_limit, int): raise TypeError("broadcast_join_limit must be an int") - if not isinstance(self.rapidsmpf_spill, bool): - raise TypeError("rapidsmpf_spill must be bool") if not isinstance(self.sink_to_directory, bool): raise TypeError("sink_to_directory must be bool") if not isinstance(self.client_device_threshold, float): @@ -905,14 +872,6 @@ def __post_init__(self) -> None: # noqa: D105 if not isinstance(self.num_py_executors, int): raise TypeError("num_py_executors must be an int") - # RapidsMPF spill is only supported for distributed clusters for now. - # This is because the spilling API is still within the RMPF-Dask integration. - # (See https://github.com/rapidsai/rapidsmpf/issues/439) - if self.cluster == "single" and self.rapidsmpf_spill: # pragma: no cover - raise ValueError( - "rapidsmpf_spill is not supported for single-GPU execution." - ) - def __hash__(self) -> int: # noqa: D105 # cardinality factory, a dict, isn't natively hashable. We'll dump it # to json and hash that. diff --git a/python/cudf_polars/tests/conftest.py b/python/cudf_polars/tests/conftest.py index e62d4ce6f86..7ad45c06605 100644 --- a/python/cudf_polars/tests/conftest.py +++ b/python/cudf_polars/tests/conftest.py @@ -236,7 +236,7 @@ def pytest_addoption(parser): "--cluster", action="store", default="single", - choices=("single", "distributed"), + choices=("single",), help="Cluster to use for 'streaming' executor.", ) @@ -262,12 +262,6 @@ def pytest_configure(config): # apply globally rather than per-module. config.addinivalue_line("filterwarnings", "ignore::ResourceWarning") - if ( - config.getoption("--cluster") == "distributed" - and config.getoption("--executor") != "streaming" - ): - raise pytest.UsageError("Distributed cluster requires --executor='streaming'") - if config.getoption("--runtime") == "rapidsmpf": if config.getoption("--executor") == "in-memory": raise pytest.UsageError("Rapidsmpf runtime requires --executor='streaming'") diff --git a/python/cudf_polars/tests/experimental/legacy/__init__.py b/python/cudf_polars/tests/experimental/legacy/__init__.py deleted file mode 100644 index 0b14ab351bc..00000000000 --- a/python/cudf_polars/tests/experimental/legacy/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. -# SPDX-License-Identifier: Apache-2.0 - -"""Legacy testing.""" - -from __future__ import annotations - -__all__: list[str] = [] diff --git a/python/cudf_polars/tests/experimental/legacy/conftest.py b/python/cudf_polars/tests/experimental/legacy/conftest.py deleted file mode 100644 index 6139eaef0af..00000000000 --- a/python/cudf_polars/tests/experimental/legacy/conftest.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. -# SPDX-License-Identifier: Apache-2.0 -from __future__ import annotations - -import os - -import pytest - - -# scope="session" is important to not cause singificant slowdowns in CI -# https://github.com/rapidsai/cudf/pull/20137 -@pytest.fixture(autouse=True, scope="session") -def dask_cluster(pytestconfig, worker_id): - if ( - pytestconfig.getoption("--cluster") == "distributed" - and pytestconfig.getoption("--executor") == "streaming" - ): - worker_count = int(os.environ.get("PYTEST_XDIST_WORKER_COUNT", "0")) - from dask import config - from dask_cuda import LocalCUDACluster - - # Avoid "Sending large graph of size ..." warnings - # (We expect these for tests using literal/random arrays) - config.set({"distributed.admin.large-graph-warning-threshold": "20MB"}) - if worker_count > 0: - # Avoid port conflicts with multiple test runners - worker_index = int(worker_id.removeprefix("gw")) - scheduler_port = 8800 + worker_index - dashboard_address = 8900 + worker_index - else: - scheduler_port = None - dashboard_address = None - - n_workers = int(os.environ.get("CUDF_POLARS_NUM_WORKERS", "1")) - - with ( - LocalCUDACluster( - n_workers=n_workers, - scheduler_port=scheduler_port, - dashboard_address=dashboard_address, - ) as cluster, - cluster.get_client(), - ): - yield - else: - yield diff --git a/python/cudf_polars/tests/experimental/legacy/test_distributed.py b/python/cudf_polars/tests/experimental/legacy/test_distributed.py deleted file mode 100644 index dd8aedeca1f..00000000000 --- a/python/cudf_polars/tests/experimental/legacy/test_distributed.py +++ /dev/null @@ -1,67 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -import pytest - -import polars as pl - -from cudf_polars.testing.asserts import DEFAULT_RUNTIME -from cudf_polars.testing.io import make_lazy_frame - - -@pytest.mark.parametrize("source_format", ["frame", "parquet", "csv"]) -def test_simple_query_with_distributed_support(tmp_path, source_format) -> None: - # Test a trivial query that works for both the - # "tasks" and "rapidsmpf" runtimes in distributed mode. - - # Check that we have a distributed cluster running. - # This tests must be run with: --cluster='distributed' - distributed = pytest.importorskip("distributed") - try: - client = distributed.get_client() - except ValueError: - pytest.skip(reason="Requires distributed execution.") - - # check that we have a rapidsmpf cluster running - pytest.importorskip("rapidsmpf") - try: - from rapidsmpf.integrations.dask import bootstrap_dask_cluster - - bootstrap_dask_cluster(client) - except ValueError: - pytest.skip(reason="Requires a rapidsmpf-bootstrapped cluster.") - - # Setup the GPUEngine config - engine = pl.GPUEngine( - raise_on_fail=True, - executor="streaming", - executor_options={ - "max_rows_per_partition": 2, - "cluster": "distributed", - "runtime": DEFAULT_RUNTIME, - }, - ) - - # Create a simple DataFrame - df = pl.DataFrame( - { - "a": [1, 2, 3, 4, 5], - "b": [10, 20, 30, 40, 50], - } - ) - - # Create LazyFrame based on source format - if source_format == "frame": - lf = make_lazy_frame(df, fmt="frame") - else: - lf = make_lazy_frame(df, fmt=source_format, path=tmp_path, n_files=2) - - # Simple query: select and filter - q = lf.select("a", "b").filter(pl.col("a") > 2) - result = q.collect(engine=engine) - - # Check the result is correct - expected = df.lazy().select("a", "b").filter(pl.col("a") > 2).collect() - assert result.equals(expected) diff --git a/python/cudf_polars/tests/experimental/legacy/test_explain.py b/python/cudf_polars/tests/experimental/legacy/test_explain.py deleted file mode 100644 index 9d74d8d800a..00000000000 --- a/python/cudf_polars/tests/experimental/legacy/test_explain.py +++ /dev/null @@ -1,89 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -import pytest - -import polars as pl - -from cudf_polars.experimental.explain import explain_query, serialize_query -from cudf_polars.testing.asserts import DEFAULT_CLUSTER, DEFAULT_RUNTIME -from cudf_polars.testing.io import make_partitioned_source - - -@pytest.fixture(scope="module") -def df(): - return pl.DataFrame( - { - "x": range(25_000), - "y": ["cat", "dog"] * 12_500, - "z": [1.0, 2.0] * 12_500, - } - ) - - -def test_explain_physical_plan(tmp_path, df): - make_partitioned_source(df, tmp_path, fmt="parquet", n_files=5) - - q = ( - pl.scan_parquet(tmp_path) - .filter((pl.col("x") < 40_000) & (pl.col("z") > 1.0)) - .with_columns((pl.col("x") + pl.col("z")).alias("sum")) - .select(["sum", "y"]) - ) - - engine = pl.GPUEngine( - executor="streaming", - raise_on_fail=True, - executor_options={ - "target_partition_size": 10_000, - "cluster": DEFAULT_CLUSTER, - "runtime": DEFAULT_RUNTIME, - }, - ) - - plan = explain_query(q, engine) - - if DEFAULT_RUNTIME == "tasks": - # rapidsmpf runtime does not split Scan nodes at lowering time - assert "UNION" in plan - assert "SPLITSCAN" in plan - assert "SELECT ('sum', 'y')" in plan or "PROJECTION ('sum', 'y')" in plan - - -def test_shuffle_properties(): - # Join with broadcast_join_limit=1 forces shuffle-based join, producing - # Shuffle nodes in the lowered plan. - left = pl.LazyFrame({"a": ["x", "y", "x"], "b": [1, 2, 3]}) - right = pl.LazyFrame({"a": ["x", "y", "z"], "c": [4, 5, 6]}) - q = left.join(right, on="a", how="inner") - engine = pl.GPUEngine( - executor="streaming", - raise_on_fail=True, - executor_options={ - "max_rows_per_partition": 1, - "cluster": DEFAULT_CLUSTER, - "runtime": DEFAULT_RUNTIME, - "shuffle_method": DEFAULT_RUNTIME, - "broadcast_join_limit": 1, - "dynamic_planning": None, # Requires static planning - }, - ) - dag = serialize_query(q, engine) - - shuffle_nodes = [n for n in dag.nodes.values() if n.type == "Shuffle"] - assert len(shuffle_nodes) >= 1, "Expected at least one Shuffle node in lowered plan" - node = shuffle_nodes[0] - - if DEFAULT_RUNTIME == "tasks": - shuffle_method = "tasks" - elif DEFAULT_CLUSTER == "single": - shuffle_method = "rapidsmpf-single" - else: - shuffle_method = "rapidsmpf" - - assert node.properties == { - "keys": ["a"], - "shuffle_method": shuffle_method, - } diff --git a/python/cudf_polars/tests/experimental/legacy/test_parallel.py b/python/cudf_polars/tests/experimental/legacy/test_parallel.py deleted file mode 100644 index b097262958a..00000000000 --- a/python/cudf_polars/tests/experimental/legacy/test_parallel.py +++ /dev/null @@ -1,127 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -import pickle - -import pytest - -import polars as pl -from polars.testing import assert_frame_equal - -from cudf_polars import Translator -from cudf_polars.experimental.parallel import ( - get_scheduler, - lower_ir_graph, - task_graph, -) -from cudf_polars.experimental.statistics import collect_statistics -from cudf_polars.testing.asserts import ( - DEFAULT_CLUSTER, - DEFAULT_RUNTIME, -) -from cudf_polars.utils.config import ConfigOptions - - -@pytest.fixture(scope="module") -def engine(): - return pl.GPUEngine( - raise_on_fail=True, - executor="streaming", - executor_options={ - "max_rows_per_partition": 2, - "cluster": DEFAULT_CLUSTER, - "runtime": DEFAULT_RUNTIME, - }, - ) - - -@pytest.mark.skipif( - DEFAULT_RUNTIME == "rapidsmpf", - reason="Uses explicit task graph.", -) -def test_single_cluster(): - # Test that the single cluster clears - # the cache as tasks are executed. - engine = pl.GPUEngine( - raise_on_fail=True, - executor="streaming", - executor_options={ - "max_rows_per_partition": 4, - "cluster": "single", - "runtime": DEFAULT_RUNTIME, - }, - ) - left = pl.LazyFrame( - { - "x": range(15), - "y": [1, 2, 3] * 5, - "z": [1.0, 2.0, 3.0, 4.0, 5.0] * 3, - } - ) - right = pl.LazyFrame( - { - "xx": range(6), - "y": [2, 4, 3] * 2, - "zz": [1, 2] * 3, - } - ) - q = left.join(right, on="y").group_by("y").agg(pl.col("zz").mean()).sort(by="y") - - config_options = ConfigOptions.from_polars_engine(engine) - ir = Translator(q._ldf.visit(), engine).translate_ir() - ir, partition_info = lower_ir_graph( - ir, config_options, collect_statistics(ir, config_options) - ) - graph, key = task_graph( - ir, - partition_info, - config_options, - ) - scheduler = get_scheduler(config_options) - cache = {} - result = scheduler(graph, key, cache=cache) - assert_frame_equal(result.to_polars(), q.collect()) - - # The cache should only contain the final result - assert set(cache) == {key} - - -@pytest.mark.skipif( - DEFAULT_RUNTIME == "rapidsmpf", - reason="Uses explicit task graph.", -) -def test_task_graph_is_pickle_serializable(engine): - # Dask will fall back to using cloudpickle to serialize the task graph if - # necessary. We'd like to avoid that, since cloudpickle serialization / - # deserialization is typically slower than pickle. - - left = pl.LazyFrame( - { - "a": [1, 2, 3, 1, None], - "b": [1, 2, 3, 4, 5], - "c": [2, 3, 4, 5, 6], - } - ) - right = pl.LazyFrame( - { - "a": [1, 4, 3, 7, None, None, 1], - "c": [2, 3, 4, 5, 6, 7, 8], - "d": [6, None, 7, 8, -1, 2, 4], - } - ) - q = left.join(right, on="a").group_by("a").agg(pl.col("c").sum()) - - config_options = ConfigOptions.from_polars_engine(engine) - ir = Translator(q._ldf.visit(), engine).translate_ir() - ir, partition_info = lower_ir_graph( - ir, config_options, collect_statistics(ir, config_options) - ) - graph, _ = task_graph( - ir, - partition_info, - config_options, - ) - - pickle.loads(pickle.dumps(graph)) # no exception diff --git a/python/cudf_polars/tests/experimental/legacy/test_shuffle.py b/python/cudf_polars/tests/experimental/legacy/test_shuffle.py deleted file mode 100644 index f3c92c6450e..00000000000 --- a/python/cudf_polars/tests/experimental/legacy/test_shuffle.py +++ /dev/null @@ -1,101 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -from typing import Literal, cast - -import pytest - -import polars as pl -from polars.testing import assert_frame_equal - -from cudf_polars import Translator -from cudf_polars.dsl.expr import Col, NamedExpr -from cudf_polars.experimental.parallel import evaluate_streaming, lower_ir_graph -from cudf_polars.experimental.shuffle import Shuffle -from cudf_polars.experimental.statistics import collect_statistics -from cudf_polars.testing.asserts import DEFAULT_CLUSTER, DEFAULT_RUNTIME -from cudf_polars.utils.config import ConfigOptions - -SHUFFLE_METHODS = ["tasks", None] if DEFAULT_RUNTIME == "tasks" else [None] - - -@pytest.fixture(scope="module", params=SHUFFLE_METHODS) -def engine(request): - return pl.GPUEngine( - raise_on_fail=True, - executor="streaming", - executor_options={ - "max_rows_per_partition": 4, - "cluster": DEFAULT_CLUSTER, - "runtime": DEFAULT_RUNTIME, - "shuffle_method": request.param, - }, - ) - - -@pytest.fixture(scope="module") -def df(): - return pl.LazyFrame( - { - "x": [1, 2, 3, 4, 5, 6, 7], - "y": [1, 1, 1, 1, 1, 1, 1], - "z": ["a", "b", "c", "d", "e", "f", "g"], - } - ) - - -def test_hash_shuffle(df: pl.LazyFrame, engine: pl.GPUEngine) -> None: - # Extract translated IR - qir = Translator(df._ldf.visit(), engine).translate_ir() - - # Add first Shuffle node - keys = (NamedExpr("x", Col(qir.schema["x"], "x")),) - options = ConfigOptions.from_polars_engine(engine) - assert options.executor.name == "streaming" - qir1 = Shuffle( - qir.schema, - keys, - options.executor.shuffle_method, - qir, - ) - - # Add second Shuffle node (on the same keys) - qir2 = Shuffle( - qir.schema, - keys, - options.executor.shuffle_method, - qir1, - ) - - # Check that sequential shuffles on the same keys - # are replaced with a single shuffle node - partition_info = lower_ir_graph(qir2, options, collect_statistics(qir2, options))[1] - assert len([node for node in partition_info if isinstance(node, Shuffle)]) == 1 - - # Add second Shuffle node (on different keys) - keys2 = (NamedExpr("z", Col(qir.schema["z"], "z")),) - qir3 = Shuffle( - qir2.schema, - keys2, - options.executor.shuffle_method, - qir2, - ) - - # Check that we have an additional shuffle - # node after shuffling on different keys - partition_info = lower_ir_graph(qir3, options, collect_statistics(qir3, options))[1] - assert len([node for node in partition_info if isinstance(node, Shuffle)]) == 2 - - # Check that streaming evaluation works - result = evaluate_streaming( - qir3, - options, - ) - # Cast needed because polars' EngineType "cpu" isn't publicly exported. - # https://github.com/pola-rs/polars/issues/17420 - expect = df.collect( - engine=cast(Literal["auto", "in-memory", "streaming", "gpu"], "cpu") - ) - assert_frame_equal(result, expect, check_row_order=False) diff --git a/python/cudf_polars/tests/experimental/legacy/test_shuffler.py b/python/cudf_polars/tests/experimental/legacy/test_shuffler.py deleted file mode 100644 index 04b6e5f2405..00000000000 --- a/python/cudf_polars/tests/experimental/legacy/test_shuffler.py +++ /dev/null @@ -1,79 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -import pytest - -import polars as pl - -from cudf_polars.testing.asserts import ( - DEFAULT_CLUSTER, - DEFAULT_RUNTIME, - assert_gpu_result_equal, -) - -REQUIRE_TASKS_RUNTIME = pytest.mark.skipif( - DEFAULT_RUNTIME != "tasks", reason="Requires 'tasks' runtime." -) - - -@REQUIRE_TASKS_RUNTIME -@pytest.mark.parametrize("max_rows_per_partition", [1, 5]) -def test_join_rapidsmpf_single(max_rows_per_partition: int) -> None: - engine = pl.GPUEngine( - raise_on_fail=True, - executor="streaming", - executor_options={ - "max_rows_per_partition": max_rows_per_partition, - "broadcast_join_limit": 2, - "shuffle_method": "rapidsmpf", - "cluster": "single", - "runtime": DEFAULT_RUNTIME, - }, - ) - - left = pl.LazyFrame( - { - "x": range(15), - "y": [1, 2, 3] * 5, - "z": [1.0, 2.0, 3.0, 4.0, 5.0] * 3, - } - ) - right = pl.LazyFrame( - { - "xx": range(6), - "y": [2, 4, 3] * 2, - "zz": [1, 2] * 3, - } - ) - q = left.join(right, on="y", how="inner") - - assert_gpu_result_equal(q, engine=engine, check_row_order=False) - - -@REQUIRE_TASKS_RUNTIME -def test_sort_stable_rapidsmpf_warns(): - engine = pl.GPUEngine( - raise_on_fail=True, - executor="streaming", - executor_options={ - "max_rows_per_partition": 3, - "cluster": DEFAULT_CLUSTER, - "runtime": DEFAULT_RUNTIME, - "shuffle_method": "rapidsmpf", - "fallback_mode": "warn", - }, - ) - - df = pl.LazyFrame( - { - "x": range(15), - "y": [1, 2, 3] * 5, - "z": [1.0, 2.0, 3.0, 4.0, 5.0] * 3, - } - ) - - q = df.sort(by=["y", "z"], maintain_order=True) - with pytest.warns(UserWarning, match="Falling back to shuffle_method"): - assert_gpu_result_equal(q, engine=engine, check_row_order=True) diff --git a/python/cudf_polars/tests/experimental/legacy/test_sort.py b/python/cudf_polars/tests/experimental/legacy/test_sort.py deleted file mode 100644 index ce148879548..00000000000 --- a/python/cudf_polars/tests/experimental/legacy/test_sort.py +++ /dev/null @@ -1,152 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -import pytest - -import polars as pl - -from cudf_polars.testing.asserts import ( - DEFAULT_CLUSTER, - DEFAULT_RUNTIME, - assert_gpu_result_equal, -) - - -@pytest.fixture(scope="module") -def engine(): - return pl.GPUEngine( - raise_on_fail=True, - executor="streaming", - executor_options={ - "max_rows_per_partition": 3, - "cluster": DEFAULT_CLUSTER, - "runtime": DEFAULT_RUNTIME, - "shuffle_method": "tasks", - "fallback_mode": "raise", - }, - ) - - -@pytest.fixture(scope="module") -def engine_large(): - return pl.GPUEngine( - raise_on_fail=True, - executor="streaming", - executor_options={ - "max_rows_per_partition": 2_100, - "cluster": DEFAULT_CLUSTER, - "runtime": DEFAULT_RUNTIME, - "shuffle_method": "tasks", - "fallback_mode": "raise", - }, - ) - - -@pytest.fixture(scope="module") -def df(): - return pl.LazyFrame( - { - "x": [1, 2, 3, 4, 5, 6, 7], - "y": [1, 6, 7, 2, 5, 4, 3], - "z": ["e", "c", "b", "g", "a", "f", "d"], - } - ) - - -def large_frames(): - x = [1.0] * 10_000 - x[-1] = float("nan") - y = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] * 1000 - - yield pytest.param( - pl.LazyFrame( - { - "x": x, - } - ), - ["x"], - False, - id="all_equal_one_nan", - ) - - yield pytest.param( - pl.LazyFrame( - { - "x": x, - "y": y, - } - ), - ["x", "y"], - False, - id="two_cols", - ) - - idx = list(range(10_000)) - yield pytest.param( - pl.LazyFrame( - { - "x": x, - "y": y, - "idx": idx, - } - ), - ["x", "y"], - True, - id="two_col_stable", - ) - - -def test_sort(df, engine): - q = df.sort(by=["y", "z"]) - assert_gpu_result_equal(q, engine=engine) - - -@pytest.mark.parametrize("large_df,by,stable", list(large_frames())) -@pytest.mark.parametrize( - "nulls_last,descending", [(True, False), (True, True), (False, True)] -) -def test_large_sort(large_df, by, engine_large, stable, nulls_last, descending): - q = large_df.sort( - by=by, nulls_last=nulls_last, maintain_order=stable, descending=descending - ) - assert_gpu_result_equal(q, engine=engine_large) - - -def test_sort_head(df, engine): - q = df.sort(by=["y", "z"]).head(2) - assert_gpu_result_equal(q, engine=engine) - - -def test_sort_tail(df, engine): - q = df.sort(by=["y", "z"]).tail(2) - assert_gpu_result_equal(q, engine=engine) - - -@pytest.mark.parametrize("offset", [1, -4]) -def test_sort_slice(df, engine, offset): - # Slice in the middle, which distributed sorts need to be careful with - q = df.sort(by=["y", "z"]).slice(offset, 2) - with pytest.raises( - NotImplementedError, - match="This slice not supported for multiple partitions.", - ): - assert_gpu_result_equal(q, engine=engine) - - -def test_sort_after_sparse_join(): - engine = pl.GPUEngine( - raise_on_fail=True, - executor="streaming", - executor_options={ - "cluster": DEFAULT_CLUSTER, - "runtime": DEFAULT_RUNTIME, - "max_rows_per_partition": 4, - }, - ) - - left = pl.LazyFrame({"foo": list(range(5)), "bar": list(range(5))}) - right = pl.LazyFrame({"foo": list(range(1))}) - q = left.join(right, on="foo", how="inner").sort(by=["foo"]) - assert_gpu_result_equal(q, engine=engine) diff --git a/python/cudf_polars/tests/experimental/test_dask_serialize.py b/python/cudf_polars/tests/experimental/test_dask_serialize.py deleted file mode 100644 index 0d7c981bc52..00000000000 --- a/python/cudf_polars/tests/experimental/test_dask_serialize.py +++ /dev/null @@ -1,129 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -import pytest -from distributed.protocol import deserialize, serialize - -import polars as pl -from polars.testing.asserts import assert_frame_equal - -import rmm -from rmm.pylibrmm.stream import DEFAULT_STREAM - -from cudf_polars.containers import DataFrame -from cudf_polars.experimental.dask_registers import register -from cudf_polars.utils.cuda_stream import get_dask_cuda_stream - -# Must register serializers before running tests -register() - - -def convert_to_rmm(frame): - """Convert frame to RMM to simulate Dask UCX transfers.""" - if hasattr(frame, "__cuda_array_interface__"): - buf = rmm.DeviceBuffer(size=frame.nbytes) - buf.copy_from_device(frame) - return buf - else: - return frame - - -@pytest.mark.filterwarnings( - # If exceptions in threads aren't handled, they get raised as a warning by - # Pytest. The warnings raised by this test correspond to unhandled - # `ResourceWarning`s in `distributed.node` - # - # Since Pytest 8, these warnings get elevated to errors and exit the test - # suite, so we selectively filter them here if the unraisable exception - # concerns `socket.socket` - "ignore:.*socket.socket.*:pytest.PytestUnraisableExceptionWarning" -) -@pytest.mark.parametrize( - "polars_tbl", - [ - pl.DataFrame(), - pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}), - pl.DataFrame({"a": [1, 2, 3]}), - pl.DataFrame({"a": [1], "b": [2], "c": [3]}), - pl.DataFrame({"a": ["a", "bb", "ccc"]}), - pl.DataFrame({"a": [1, 2, None], "b": [None, 3, 4]}), - pl.DataFrame({"a": range(int(1e7))}), - ], -) -@pytest.mark.parametrize("protocol", ["cuda", "cuda_rmm", "dask"]) -@pytest.mark.parametrize( - "context", - [ - None, - {}, - { - "stream": DEFAULT_STREAM, - "device_mr": rmm.mr.get_current_device_resource(), - "staging_device_buffer": rmm.DeviceBuffer(size=2**20), - }, - ], -) -def test_dask_serialization_roundtrip(polars_tbl, protocol, context): - stream = get_dask_cuda_stream() - df = DataFrame.from_polars(polars_tbl, stream=stream) - - cuda_rmm = protocol == "cuda_rmm" - protocol = "cuda" if protocol == "cuda_rmm" else protocol - - header, frames = serialize( - df, on_error="raise", serializers=[protocol], context=context - ) - if cuda_rmm: - # Simulate Dask UCX transfers - frames = [convert_to_rmm(f) for f in frames] - res = deserialize(header, frames, deserializers=[protocol]) - - assert_frame_equal(df.to_polars(), res.to_polars()) - - # Check that we can serialize individual columns - for column in df.columns: - expect = DataFrame([column], stream=df.stream) - - header, frames = serialize( - column, on_error="raise", serializers=[protocol], context=context - ) - if cuda_rmm: - # Simulate Dask UCX transfers - frames = [convert_to_rmm(f) for f in frames] - res = deserialize(header, frames, deserializers=[protocol]) - - assert_frame_equal( - expect.to_polars(), DataFrame([res], stream=df.stream).to_polars() - ) - - -def test_dask_serialization_error(): - df = DataFrame.from_polars( - pl.DataFrame({"a": [1, 2, 3]}), stream=get_dask_cuda_stream() - ) - - header, frames = serialize( - df, - on_error="message", - serializers=["dask"], - context={ - "device_mr": rmm.mr.get_current_device_resource(), - "staging_device_buffer": rmm.DeviceBuffer(size=2**20), - }, - ) - assert header == {"serializer": "error"} - assert "ValueError: " in str(frames) - - header, frames = serialize( - df, - on_error="message", - serializers=["dask"], - context={ - "stream": df.stream, - "staging_device_buffer": rmm.DeviceBuffer(size=2**20), - }, - ) - assert header == {"serializer": "error"} - assert "ValueError: " in str(frames) diff --git a/python/cudf_polars/tests/experimental/test_dask_sizeof.py b/python/cudf_polars/tests/experimental/test_dask_sizeof.py deleted file mode 100644 index d4f2d16039f..00000000000 --- a/python/cudf_polars/tests/experimental/test_dask_sizeof.py +++ /dev/null @@ -1,32 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -import pytest -from dask.sizeof import sizeof - -import polars as pl - -from cudf_polars.containers import DataFrame -from cudf_polars.experimental.dask_registers import register -from cudf_polars.utils.cuda_stream import get_dask_cuda_stream - -# Must register sizeof dispatch before running tests -register() - - -@pytest.mark.parametrize( - "polars_tbl, size", - [ - (pl.DataFrame(), 0), - (pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}), 9 * 8), - (pl.DataFrame({"a": [1, 2, 3]}), 3 * 8), - (pl.DataFrame({"a": ["a"], "b": ["bc"]}), 2 * 8 + 3), - (pl.DataFrame({"a": [1, 2, None]}), 88), - ], -) -def test_dask_sizeof(polars_tbl, size): - df = DataFrame.from_polars(polars_tbl, stream=get_dask_cuda_stream()) - assert sizeof(df) == size - assert sum(sizeof(c) for c in df.columns) == size diff --git a/python/cudf_polars/tests/experimental/test_dask_tokenize.py b/python/cudf_polars/tests/experimental/test_dask_tokenize.py deleted file mode 100644 index 287c3a4c379..00000000000 --- a/python/cudf_polars/tests/experimental/test_dask_tokenize.py +++ /dev/null @@ -1,32 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -import dask.tokenize -import pytest - -import polars as pl - -from cudf_polars.containers import DataType -from cudf_polars.dsl.expressions.base import Col, NamedExpr -from cudf_polars.experimental.dask_registers import register - -# Must register sizeof dispatch before running tests -register() - - -@pytest.mark.parametrize( - "value", - [ - NamedExpr("a", Col(DataType(pl.Int64()), "a")), - DataType(pl.Int64()), - ], - ids=["named_expr", "data_type"], -) -def test_tokenize(value: DataType | NamedExpr) -> None: - normalizer = dask.tokenize.normalize_token.dispatch(type(value)) - package = normalizer.__module__.split(".")[0] - assert package == "cudf_polars" - - dask.tokenize.tokenize(value) diff --git a/python/cudf_polars/tests/experimental/test_io_multirank.py b/python/cudf_polars/tests/experimental/test_io_multirank.py index e1265602304..631f12fd85c 100644 --- a/python/cudf_polars/tests/experimental/test_io_multirank.py +++ b/python/cudf_polars/tests/experimental/test_io_multirank.py @@ -104,7 +104,7 @@ def test_sink_parquet_empty_rank(engine: StreamingEngine, tmp_path: Path) -> Non @pytest.mark.parametrize( "cluster", - [Cluster.SPMD, Cluster.RAY, Cluster.DASK, Cluster.DISTRIBUTED], + [Cluster.SPMD, Cluster.RAY, Cluster.DASK], ) def test_sink_to_directory_false_raises(cluster: Cluster) -> None: """Explicit ``sink_to_directory=False`` is rejected for every multi-rank cluster.""" diff --git a/python/cudf_polars/tests/experimental/test_sink.py b/python/cudf_polars/tests/experimental/test_sink.py index 4b28830f287..9b0573d2cb4 100644 --- a/python/cudf_polars/tests/experimental/test_sink.py +++ b/python/cudf_polars/tests/experimental/test_sink.py @@ -92,19 +92,6 @@ def test_sink_parquet_directory( assert len(list(check_path.iterdir())) == expected_file_count -def test_sink_parquet_raises_distributed() -> None: - engine = pl.GPUEngine( - raise_on_fail=True, - executor="streaming", - executor_options={ - "cluster": "distributed", - "sink_to_directory": False, - }, - ) - with pytest.raises(ValueError, match="distributed cluster"): - ConfigOptions.from_polars_engine(engine) - - def test_sink_parquet_raises_spmd(spmd_comm): from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine diff --git a/python/cudf_polars/tests/test_config.py b/python/cudf_polars/tests/test_config.py index 71f6c946184..3cd66bc527d 100644 --- a/python/cudf_polars/tests/test_config.py +++ b/python/cudf_polars/tests/test_config.py @@ -51,16 +51,6 @@ def rapidsmpf_single_available(request, monkeypatch): return request.param -@pytest.fixture(params=[False, True], ids=["norapidsmpf.dask", "rapidsmpf.dask"]) -def rapidsmpf_distributed_available(request, monkeypatch): - monkeypatch.setattr( - cudf_polars.utils.config, - "rapidsmpf_distributed_available", - lambda: request.param, - ) - return request.param - - def test_polars_verbose_warns(monkeypatch): def raise_unimplemented(self, *args): raise NotImplementedError("We don't support this") @@ -243,7 +233,7 @@ def test_parquet_options_from_none() -> None: def test_validate_streaming_executor_shuffle_method( - *, rapidsmpf_distributed_available: bool, rapidsmpf_single_available: bool + *, rapidsmpf_single_available: bool ) -> None: config = ConfigOptions.from_polars_engine( pl.GPUEngine( @@ -254,21 +244,6 @@ def test_validate_streaming_executor_shuffle_method( assert config.executor.name == "streaming" assert config.executor.shuffle_method == "tasks" - # rapidsmpf with distributed cluster - engine = pl.GPUEngine( - executor="streaming", - executor_options={"shuffle_method": "rapidsmpf", "cluster": "distributed"}, - ) - if rapidsmpf_distributed_available: - config = ConfigOptions.from_polars_engine(engine) - assert config.executor.name == "streaming" - assert config.executor.shuffle_method == "rapidsmpf" - else: - with pytest.raises( - ValueError, match="rapidsmpf.integrations.dask is not installed" - ): - ConfigOptions.from_polars_engine(engine) - # rapidsmpf with single cluster engine = pl.GPUEngine( executor="streaming", @@ -344,10 +319,7 @@ def test_validate_cluster() -> None: ) -def test_validate_shuffle_method_defaults( - *, - rapidsmpf_distributed_available: bool, -) -> None: +def test_validate_shuffle_method_defaults() -> None: config = ConfigOptions.from_polars_engine( pl.GPUEngine( executor="streaming", @@ -356,20 +328,6 @@ def test_validate_shuffle_method_defaults( assert config.executor.name == "streaming" assert config.executor.shuffle_method == "tasks" # Default for single cluster - # Test default for distributed cluster depends on rapidsmpf availability - config = ConfigOptions.from_polars_engine( - pl.GPUEngine( - executor="streaming", - executor_options={"cluster": "distributed"}, - ) - ) - assert config.executor.name == "streaming" - if rapidsmpf_distributed_available: - # Should be "rapidsmpf" if available, otherwise "tasks" - assert config.executor.shuffle_method == "rapidsmpf" - else: - assert config.executor.shuffle_method == "tasks" - with pytest.raises(ValueError, match="'foo' is not a valid ShuffleMethod"): ConfigOptions.from_polars_engine( pl.GPUEngine( @@ -387,7 +345,6 @@ def test_validate_shuffle_method_defaults( "target_partition_size", "groupby_n_ary", "broadcast_join_limit", - "rapidsmpf_spill", "sink_to_directory", "client_device_threshold", "max_io_threads", @@ -447,45 +404,31 @@ def test_parquet_options_from_env(monkeypatch: pytest.MonkeyPatch) -> None: ConfigOptions.from_polars_engine(engine) -def test_config_option_from_env( - monkeypatch: pytest.MonkeyPatch, *, rapidsmpf_distributed_available: bool -) -> None: +def test_config_option_from_env(monkeypatch: pytest.MonkeyPatch) -> None: with monkeypatch.context() as m: - m.setenv("CUDF_POLARS__EXECUTOR__CLUSTER", "distributed") + m.setenv("CUDF_POLARS__EXECUTOR__CLUSTER", "single") m.setenv("CUDF_POLARS__EXECUTOR__FALLBACK_MODE", "silent") m.setenv("CUDF_POLARS__EXECUTOR__MAX_ROWS_PER_PARTITION", "42") m.setenv("CUDF_POLARS__EXECUTOR__UNIQUE_FRACTION", '{"a": 0.5}') m.setenv("CUDF_POLARS__EXECUTOR__TARGET_PARTITION_SIZE", "100") m.setenv("CUDF_POLARS__EXECUTOR__GROUPBY_N_ARY", "43") m.setenv("CUDF_POLARS__EXECUTOR__BROADCAST_JOIN_LIMIT", "44") - m.setenv("CUDF_POLARS__EXECUTOR__RAPIDSMPF_SPILL", "1") - m.setenv("CUDF_POLARS__EXECUTOR__SINK_TO_DIRECTORY", "1") + m.setenv("CUDF_POLARS__EXECUTOR__SHUFFLE_METHOD", "tasks") m.setenv("CUDF_POLARS__CUDA_STREAM_POLICY", "default") - if rapidsmpf_distributed_available: - m.setenv("CUDF_POLARS__EXECUTOR__SHUFFLE_METHOD", "rapidsmpf") - else: - m.setenv("CUDF_POLARS__EXECUTOR__SHUFFLE_METHOD", "tasks") - engine = pl.GPUEngine() config = ConfigOptions.from_polars_engine(engine) assert config.executor.name == "streaming" - assert config.executor.cluster == "distributed" + assert config.executor.cluster == "single" assert config.executor.fallback_mode == "silent" assert config.executor.max_rows_per_partition == 42 assert config.executor.unique_fraction == {"a": 0.5} assert config.executor.target_partition_size == 100 assert config.executor.groupby_n_ary == 43 assert config.executor.broadcast_join_limit == 44 - assert config.executor.rapidsmpf_spill is True - assert config.executor.sink_to_directory is True + assert config.executor.shuffle_method == "tasks" assert config.cuda_stream_policy is None - if rapidsmpf_distributed_available: - assert config.executor.shuffle_method == "rapidsmpf" - else: - assert config.executor.shuffle_method == "tasks" - def test_target_partition_from_env( monkeypatch: pytest.MonkeyPatch, recwarn: pytest.WarningsRecorder @@ -955,11 +898,11 @@ def test_num_py_executors_from_env( assert config.executor.num_py_executors == 8 -def test_distributed_sink_to_directory_false_raises() -> None: +def test_dask_sink_to_directory_false_raises() -> None: with pytest.raises( - ValueError, match="The distributed cluster requires sink_to_directory=True" + ValueError, match="The dask cluster requires sink_to_directory=True" ): - StreamingExecutor(cluster=Cluster.DISTRIBUTED, sink_to_directory=False) + StreamingExecutor(cluster=Cluster.DASK, sink_to_directory=False) def test_get_dask_cuda_stream() -> None: diff --git a/python/cudf_polars/tests/test_profile.py b/python/cudf_polars/tests/test_profile.py index c0d5de759ad..cf48dc933d7 100644 --- a/python/cudf_polars/tests/test_profile.py +++ b/python/cudf_polars/tests/test_profile.py @@ -26,11 +26,10 @@ def test_profile_basic() -> None: assert_frame_equal(result, q.collect(engine="in-memory"), check_row_order=False) -@pytest.mark.parametrize("cluster", ["single", "distributed"]) -def test_profile_streaming_raises(cluster: str) -> None: +def test_profile_streaming_raises() -> None: df = pl.LazyFrame({"a": [1, 2, 3, 4]}) q = df.sort("a").group_by("a").len() - engine = pl.GPUEngine(executor="streaming", executor_options={"cluster": cluster}) + engine = pl.GPUEngine(executor="streaming", executor_options={"cluster": "single"}) with pytest.raises( NotImplementedError, match=r"profile\(\) is not supported with the streaming executor.", From 899508569e3ab17c60df230514745f4a0ec7cd75 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Mon, 4 May 2026 13:28:50 -0500 Subject: [PATCH 05/36] Fix StatsCollector.serialize to use value equality instead of object identity (#22366) Uses node directly as the dict key instead of `id(node)`, so nodes reconstructed on workers (introduced in #22287) are found correctly by value rather than failing with a `KeyError`. Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - Tom Augspurger (https://github.com/TomAugspurger) - Mads R. B. Kristensen (https://github.com/madsbk) URL: https://github.com/rapidsai/cudf/pull/22366 --- .../cudf_polars/experimental/base.py | 4 ++-- .../tests/experimental/test_stats.py | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/python/cudf_polars/cudf_polars/experimental/base.py b/python/cudf_polars/cudf_polars/experimental/base.py index be3eb6aeb53..73ed9b3dbe1 100644 --- a/python/cudf_polars/cudf_polars/experimental/base.py +++ b/python/cudf_polars/cudf_polars/experimental/base.py @@ -118,9 +118,9 @@ def serialize(self, ir: IR) -> list[SerializedStatsEntry]: traversal of *ir* so that the result is independent of object identity. """ - node_to_idx = {id(node): i for i, node in enumerate(traversal([ir]))} + node_to_idx = {node: i for i, node in enumerate(traversal([ir]))} return [ - {"index": node_to_idx[id(node)], "info": info.serialize()} + {"index": node_to_idx[node], "info": info.serialize()} for node, info in self.scan_stats.items() ] diff --git a/python/cudf_polars/tests/experimental/test_stats.py b/python/cudf_polars/tests/experimental/test_stats.py index 2b08a23b862..7d0d5dc01e4 100644 --- a/python/cudf_polars/tests/experimental/test_stats.py +++ b/python/cudf_polars/tests/experimental/test_stats.py @@ -12,6 +12,8 @@ import polars as pl from cudf_polars import Translator +from cudf_polars.containers import DataType +from cudf_polars.dsl.ir import Empty, Projection from cudf_polars.experimental.base import SerializedDataSourceInfo, StatsCollector from cudf_polars.experimental.io import ( DataFrameSourceInfo, @@ -266,3 +268,20 @@ def test_serialize_stats_roundtrip_parquet( assert rt.row_count == info.row_count for col in ("x", "y", "z"): assert rt.column_storage_size(col) == info.column_storage_size(col) + + +def test_serialize_uses_value_equality() -> None: + schema = {"x": DataType(pl.Int64())} + scan_x = Empty(schema) + scan_y = Empty(schema) + assert scan_x == scan_y + assert scan_x is not scan_y + + root = Projection(schema, scan_y) + + stats = StatsCollector() + stats.scan_stats[scan_x] = DataFrameSourceInfo(100) + + result = stats.serialize(root) + assert len(result) == 1 + assert result[0]["index"] >= 0 From 5255d5118ed3ce606bf56478711421a641bd72e0 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Mon, 4 May 2026 12:02:30 -0700 Subject: [PATCH 06/36] Pass managed pool MR explicitly in NDSH parquet data generation (#22344) Pass the managed-pool MR directly into each `cudf::datagen::generate_*` call instead of swapping it in as the current device resource and restoring on exit. Also fixes forwarding of the mr parameter down the datagen stack. There are still a few tiny allocations (KBs) that use the default mr because switching would require a copy. These should not cause OOM errors. Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Bradley Dice (https://github.com/bdice) - Tianyu Liu (https://github.com/kingcrimsontianyu) URL: https://github.com/rapidsai/cudf/pull/22344 --- .../ndsh_data_generator.cpp | 19 ++++++------ .../ndsh_data_generator/table_helpers.cpp | 13 ++++---- cpp/benchmarks/ndsh/utilities.cpp | 30 +++++++------------ 3 files changed, 27 insertions(+), 35 deletions(-) diff --git a/cpp/benchmarks/common/ndsh_data_generator/ndsh_data_generator.cpp b/cpp/benchmarks/common/ndsh_data_generator/ndsh_data_generator.cpp index 6bdd3a0d87e..587758d84bb 100644 --- a/cpp/benchmarks/common/ndsh_data_generator/ndsh_data_generator.cpp +++ b/cpp/benchmarks/common/ndsh_data_generator/ndsh_data_generator.cpp @@ -379,7 +379,8 @@ std::unique_ptr generate_lineitem_partial(cudf::table_view const& o auto const pred = cudf::ast::operation(cudf::ast::ast_operator::GREATER, col_ref, current_date_literal); auto mask = cudf::compute_column(cudf::table_view({l_shipdate_ts->view()}), pred, stream, mr); - auto mask_index_type = cudf::cast(mask->view(), cudf::data_type{cudf::type_id::INT8}); + auto mask_index_type = + cudf::cast(mask->view(), cudf::data_type{cudf::type_id::INT8}, stream, mr); auto const indices = cudf::test::fixed_width_column_wrapper({0, 1}).release(); auto const keys = cudf::test::strings_column_wrapper({"O", "F"}).release(); auto const gather_map = cudf::table_view({indices->view(), keys->view()}); @@ -465,7 +466,7 @@ std::unique_ptr generate_orders_dependent(cudf::table_view const& l requests[1].values = l_linestatus_mask; // Perform the aggregations - auto agg_result = gb.aggregate(requests); + auto agg_result = gb.aggregate(requests, stream, mr); // Create a `table_view` out of the `l_orderkey`, `count`, and `sum` columns auto const count = std::move(agg_result.second[0].results[0]); @@ -484,9 +485,9 @@ std::unique_ptr generate_orders_dependent(cudf::table_view const& l auto const count_ref = cudf::ast::column_reference(1); auto const sum_ref = cudf::ast::column_reference(2); auto const expr_a = cudf::ast::operation(cudf::ast::ast_operator::EQUAL, sum_ref, count_ref); - auto const mask_a = cudf::compute_column(table, expr_a); - auto const o_orderstatus_intermediate = - cudf::copy_if_else(cudf::string_scalar("O"), cudf::string_scalar("F"), mask_a->view()); + auto const mask_a = cudf::compute_column(table, expr_a, stream, mr); + auto const o_orderstatus_intermediate = cudf::copy_if_else( + cudf::string_scalar("O"), cudf::string_scalar("F"), mask_a->view(), stream, mr); // Then, we evaluate an expression `sum == 0` and generate a boolean mask auto zero_scalar = cudf::numeric_scalar(0); @@ -497,9 +498,9 @@ std::unique_ptr generate_orders_dependent(cudf::table_view const& l cudf::ast::operation(cudf::ast::ast_operator::NOT_EQUAL, sum_ref, zero_literal); auto const expr_b = cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_AND, expr_b_left, expr_b_right); - auto const mask_b = cudf::compute_column(table, expr_b); + auto const mask_b = cudf::compute_column(table, expr_b, stream, mr); return cudf::copy_if_else( - cudf::string_scalar("P"), o_orderstatus_intermediate->view(), mask_b->view()); + cudf::string_scalar("P"), o_orderstatus_intermediate->view(), mask_b->view(), stream, mr); }(); orders_dependent_columns.push_back(std::move(o_orderstatus)); @@ -514,7 +515,7 @@ std::unique_ptr generate_orders_dependent(cudf::table_view const& l requests.push_back(cudf::groupby::aggregation_request()); requests[0].aggregations.push_back(cudf::make_sum_aggregation()); requests[0].values = l_charge->view(); - auto agg_result = gb.aggregate(requests); + auto agg_result = gb.aggregate(requests, stream, mr); return std::move(agg_result.second[0].results[0]); }(); orders_dependent_columns.push_back(std::move(o_totalprice)); @@ -726,7 +727,7 @@ generate_orders_lineitem_part(double scale_factor, auto joined_table_columns = joined_table->release(); auto const l_quantity = std::move(joined_table_columns[1]); auto const l_quantity_fp = - cudf::cast(l_quantity->view(), cudf::data_type{cudf::type_id::FLOAT64}); + cudf::cast(l_quantity->view(), cudf::data_type{cudf::type_id::FLOAT64}, stream, mr); auto const p_retailprice = std::move(joined_table_columns[3]); return cudf::binary_operation(l_quantity_fp->view(), p_retailprice->view(), diff --git a/cpp/benchmarks/common/ndsh_data_generator/table_helpers.cpp b/cpp/benchmarks/common/ndsh_data_generator/table_helpers.cpp index ef6cc4971e6..8510e9d6c62 100644 --- a/cpp/benchmarks/common/ndsh_data_generator/table_helpers.cpp +++ b/cpp/benchmarks/common/ndsh_data_generator/table_helpers.cpp @@ -45,8 +45,9 @@ std::unique_ptr add_calendrical_days(cudf::column_view const& time rmm::device_async_resource_ref mr) { CUDF_BENCHMARK_RANGE(); - auto const days_duration_type = cudf::cast(days, cudf::data_type{cudf::type_id::DURATION_DAYS}); - auto const data_type = cudf::data_type{cudf::type_id::TIMESTAMP_DAYS}; + auto const days_duration_type = + cudf::cast(days, cudf::data_type{cudf::type_id::DURATION_DAYS}, stream, mr); + auto const data_type = cudf::data_type{cudf::type_id::TIMESTAMP_DAYS}; return cudf::binary_operation( timestamp_days, days_duration_type->view(), cudf::binary_operator::ADD, data_type, stream, mr); } @@ -358,13 +359,13 @@ std::unique_ptr perform_left_join(cudf::table_view const& left_inpu { CUDF_BENCHMARK_RANGE(); auto const part_a = cudf::strings::from_integers( - generate_random_numeric_column(10, 34, num_rows, stream, mr)->view()); + generate_random_numeric_column(10, 34, num_rows, stream, mr)->view(), stream, mr); auto const part_b = cudf::strings::from_integers( - generate_random_numeric_column(100, 999, num_rows, stream, mr)->view()); + generate_random_numeric_column(100, 999, num_rows, stream, mr)->view(), stream, mr); auto const part_c = cudf::strings::from_integers( - generate_random_numeric_column(100, 999, num_rows, stream, mr)->view()); + generate_random_numeric_column(100, 999, num_rows, stream, mr)->view(), stream, mr); auto const part_d = cudf::strings::from_integers( - generate_random_numeric_column(1000, 9999, num_rows, stream, mr)->view()); + generate_random_numeric_column(1000, 9999, num_rows, stream, mr)->view(), stream, mr); auto const phone_parts_table = cudf::table_view({part_a->view(), part_b->view(), part_c->view(), part_d->view()}); return cudf::strings::concatenate(phone_parts_table, diff --git a/cpp/benchmarks/ndsh/utilities.cpp b/cpp/benchmarks/ndsh/utilities.cpp index 61c9c9d32e9..89315bacec1 100644 --- a/cpp/benchmarks/ndsh/utilities.cpp +++ b/cpp/benchmarks/ndsh/utilities.cpp @@ -393,13 +393,9 @@ void generate_parquet_data_sources(double scale_factor, { CUDF_BENCHMARK_RANGE(); - // Set the memory resource to the managed pool - auto old_mr = cudf::get_current_device_resource_ref(); - // TODO: if old_mr is already managed pool or managed, don't create new one. + // Use a managed pool for parquet generation. rmm::mr::pool_memory_resource managed_pool_mr{rmm::mr::managed_memory_resource{}, rmm::percent_of_free_device_memory(50)}; - cudf::set_current_device_resource(managed_pool_mr); - // drawback: if already pool takes 50% of free memory, we are left with 50% of 50% of free memory std::unordered_set const requested_table_names = [&table_names]() { if (table_names.empty()) { @@ -414,9 +410,11 @@ void generate_parquet_data_sources(double scale_factor, }); std::unordered_map> tables; + auto const stream = cudf::get_default_stream(); + if (sources.count("orders") or sources.count("lineitem") or sources.count("part")) { - auto [orders, lineitem, part] = cudf::datagen::generate_orders_lineitem_part( - scale_factor, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + auto [orders, lineitem, part] = + cudf::datagen::generate_orders_lineitem_part(scale_factor, stream, managed_pool_mr); if (sources.count("orders")) { write_to_parquet_device_buffer(orders, SCHEMAS.at("orders"), sources.at("orders")); orders = {}; @@ -432,35 +430,27 @@ void generate_parquet_data_sources(double scale_factor, } if (sources.count("partsupp")) { - auto partsupp = cudf::datagen::generate_partsupp( - scale_factor, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + auto partsupp = cudf::datagen::generate_partsupp(scale_factor, stream, managed_pool_mr); write_to_parquet_device_buffer(partsupp, SCHEMAS.at("partsupp"), sources.at("partsupp")); } if (sources.count("supplier")) { - auto supplier = cudf::datagen::generate_supplier( - scale_factor, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + auto supplier = cudf::datagen::generate_supplier(scale_factor, stream, managed_pool_mr); write_to_parquet_device_buffer(supplier, SCHEMAS.at("supplier"), sources.at("supplier")); } if (sources.count("customer")) { - auto customer = cudf::datagen::generate_customer( - scale_factor, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + auto customer = cudf::datagen::generate_customer(scale_factor, stream, managed_pool_mr); write_to_parquet_device_buffer(customer, SCHEMAS.at("customer"), sources.at("customer")); } if (sources.count("nation")) { - auto nation = cudf::datagen::generate_nation(cudf::get_default_stream(), - cudf::get_current_device_resource_ref()); + auto nation = cudf::datagen::generate_nation(stream, managed_pool_mr); write_to_parquet_device_buffer(nation, SCHEMAS.at("nation"), sources.at("nation")); } if (sources.count("region")) { - auto region = cudf::datagen::generate_region(cudf::get_default_stream(), - cudf::get_current_device_resource_ref()); + auto region = cudf::datagen::generate_region(stream, managed_pool_mr); write_to_parquet_device_buffer(region, SCHEMAS.at("region"), sources.at("region")); } - - // Restore the original memory resource - cudf::set_current_device_resource(old_mr); } From 9407fd6686805881559702388a43503bc4726735 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Mon, 4 May 2026 21:31:53 +0000 Subject: [PATCH 07/36] Fix compile warnings in libcudf examples (#22335) Fixes some compile warnings in the libcudf tests. These are deprecation warnings about the missing alignment parameter for the custom allocators in the `hybrid_scan_io` and `parquet_io` examples. ``` /cudf/cpp/examples/parquet_io/io_source.hpp:61:66: warning: 'void cuda::mr::__4::__ibasic_async_resource< >::deallocate(cuda::__4::stream_ref, void*, size_t) [with = {cuda::__4::__ireference, cuda::mr::__4::__ibasic_resource<>, cuda::mr::__4::__with_property::__iproperty<>, cuda::mr::__4::__with_property::__iproperty<>, cuda::__4::__icopyable<>, cuda::__4::__iequality_comparable<> > >}; size_t = long unsigned int]' is deprecated: Specify an explicit alignment argument. The default alignment will be removed in a future release. [-Wdeprecated-declarations] 61 | void deallocate(T* ptr, std::size_t n) noexcept { mr.deallocate(stream, ptr, n * sizeof(T)); } ``` Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Yunsong Wang (https://github.com/PointKernel) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/22335 --- cpp/examples/hybrid_scan_io/io_source.hpp | 7 +++++-- cpp/examples/parquet_io/io_source.hpp | 9 ++++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/cpp/examples/hybrid_scan_io/io_source.hpp b/cpp/examples/hybrid_scan_io/io_source.hpp index 12670d2208d..500e9476f50 100644 --- a/cpp/examples/hybrid_scan_io/io_source.hpp +++ b/cpp/examples/hybrid_scan_io/io_source.hpp @@ -53,12 +53,15 @@ struct pinned_allocator : public std::allocator { T* allocate(std::size_t n) { - auto ptr = mr.allocate(stream, n * sizeof(T)); + auto ptr = mr.allocate(stream, n * sizeof(T), alignof(T)); stream.synchronize(); return static_cast(ptr); } - void deallocate(T* ptr, std::size_t n) noexcept { mr.deallocate(stream, ptr, n * sizeof(T)); } + void deallocate(T* ptr, std::size_t n) noexcept + { + mr.deallocate(stream, ptr, n * sizeof(T), alignof(T)); + } private: rmm::host_async_resource_ref mr; diff --git a/cpp/examples/parquet_io/io_source.hpp b/cpp/examples/parquet_io/io_source.hpp index 6862b326ca9..713d35d8044 100644 --- a/cpp/examples/parquet_io/io_source.hpp +++ b/cpp/examples/parquet_io/io_source.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -53,12 +53,15 @@ struct pinned_allocator : public std::allocator { T* allocate(std::size_t n) { - auto ptr = mr.allocate(stream, n * sizeof(T)); + auto ptr = mr.allocate(stream, n * sizeof(T), alignof(T)); stream.synchronize(); return static_cast(ptr); } - void deallocate(T* ptr, std::size_t n) noexcept { mr.deallocate(stream, ptr, n * sizeof(T)); } + void deallocate(T* ptr, std::size_t n) noexcept + { + mr.deallocate(stream, ptr, n * sizeof(T), alignof(T)); + } private: rmm::host_async_resource_ref mr; From 0e82b62836340a503f8cfdcd8a875524a6654f03 Mon Sep 17 00:00:00 2001 From: Yunsong Wang <12716979+PointKernel@users.noreply.github.com> Date: Mon, 4 May 2026 15:52:42 -0700 Subject: [PATCH 08/36] Add skip axis to all join benchmarks (#22241) This PR updates the join benchmarks to include a skip axis, allowing users to optionally include large table sizes, which is not possible in the current setup due to its unconditional skip of those sizes. Authors: - Yunsong Wang (https://github.com/PointKernel) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Shruti Shivakumar (https://github.com/shrshi) URL: https://github.com/rapidsai/cudf/pull/22241 --- cpp/benchmarks/join/conditional_join.cu | 8 ++-- cpp/benchmarks/join/distinct_join.cu | 6 ++- cpp/benchmarks/join/filter_join_indices.cu | 14 ++++-- cpp/benchmarks/join/join.cu | 12 +++-- cpp/benchmarks/join/join_common.hpp | 33 +++++++++---- cpp/benchmarks/join/left_join.cu | 56 +++++++++------------- cpp/benchmarks/join/mixed_join.cu | 20 +++++--- cpp/benchmarks/join/multiplicity_join.cu | 9 ++-- cpp/benchmarks/join/sort_merge_join.cu | 8 ++-- 9 files changed, 96 insertions(+), 70 deletions(-) diff --git a/cpp/benchmarks/join/conditional_join.cu b/cpp/benchmarks/join/conditional_join.cu index dacd1c7a648..98d7c49b20d 100644 --- a/cpp/benchmarks/join/conditional_join.cu +++ b/cpp/benchmarks/join/conditional_join.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -53,7 +53,8 @@ NVBENCH_BENCH_TYPES(nvbench_conditional_inner_join, .set_name("conditional_inner_join") .set_type_axes_names({"Nullable", "NullEquality", "DataType"}) .add_int64_axis("left_size", CONDITIONAL_JOIN_SIZE_RANGE) - .add_int64_axis("right_size", CONDITIONAL_JOIN_SIZE_RANGE); + .add_int64_axis("right_size", CONDITIONAL_JOIN_SIZE_RANGE) + .add_int64_axis("skip_large_sizes", {1}); NVBENCH_BENCH_TYPES(nvbench_conditional_left_join, NVBENCH_TYPE_AXES(JOIN_NULLABLE_RANGE, @@ -62,4 +63,5 @@ NVBENCH_BENCH_TYPES(nvbench_conditional_left_join, .set_name("conditional_left_join") .set_type_axes_names({"Nullable", "NullEquality", "DataType"}) .add_int64_axis("left_size", CONDITIONAL_JOIN_SIZE_RANGE) - .add_int64_axis("right_size", CONDITIONAL_JOIN_SIZE_RANGE); + .add_int64_axis("right_size", CONDITIONAL_JOIN_SIZE_RANGE) + .add_int64_axis("skip_large_sizes", {1}); diff --git a/cpp/benchmarks/join/distinct_join.cu b/cpp/benchmarks/join/distinct_join.cu index ea64a26bbbd..6fe8928128c 100644 --- a/cpp/benchmarks/join/distinct_join.cu +++ b/cpp/benchmarks/join/distinct_join.cu @@ -53,7 +53,8 @@ NVBENCH_BENCH_TYPES(nvbench_distinct_inner_join, .set_name("distinct_inner_join") .set_type_axes_names({"Nullable", "NullEquality", "DataType"}) .add_int64_axis("left_size", JOIN_SIZE_RANGE) - .add_int64_axis("right_size", JOIN_SIZE_RANGE); + .add_int64_axis("right_size", JOIN_SIZE_RANGE) + .add_int64_axis("skip_large_sizes", {1}); NVBENCH_BENCH_TYPES(nvbench_distinct_left_join, NVBENCH_TYPE_AXES(JOIN_NULLABLE_RANGE, @@ -62,4 +63,5 @@ NVBENCH_BENCH_TYPES(nvbench_distinct_left_join, .set_name("distinct_left_join") .set_type_axes_names({"Nullable", "NullEquality", "DataType"}) .add_int64_axis("left_size", JOIN_SIZE_RANGE) - .add_int64_axis("right_size", JOIN_SIZE_RANGE); + .add_int64_axis("right_size", JOIN_SIZE_RANGE) + .add_int64_axis("skip_large_sizes", {1}); diff --git a/cpp/benchmarks/join/filter_join_indices.cu b/cpp/benchmarks/join/filter_join_indices.cu index 5f6cb43afdc..1cc66d475c7 100644 --- a/cpp/benchmarks/join/filter_join_indices.cu +++ b/cpp/benchmarks/join/filter_join_indices.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -131,7 +131,8 @@ NVBENCH_BENCH_TYPES(nvbench_filter_join_indices_inner_join, .set_name("filter_join_indices_inner_join") .set_type_axes_names({"Nullable", "NullEquality", "DataType"}) .add_int64_axis("left_size", JOIN_SIZE_RANGE) - .add_int64_axis("right_size", JOIN_SIZE_RANGE); + .add_int64_axis("right_size", JOIN_SIZE_RANGE) + .add_int64_axis("skip_large_sizes", {1}); NVBENCH_BENCH_TYPES(nvbench_filter_join_indices_inner_join_complex_ast, NVBENCH_TYPE_AXES(JOIN_NULLABLE_RANGE, @@ -141,7 +142,8 @@ NVBENCH_BENCH_TYPES(nvbench_filter_join_indices_inner_join_complex_ast, .set_type_axes_names({"Nullable", "NullEquality", "DataType"}) .add_int64_axis("left_size", JOIN_SIZE_RANGE) .add_int64_axis("right_size", JOIN_SIZE_RANGE) - .add_int64_axis("ast_levels", {1, 5, 10}); + .add_int64_axis("ast_levels", {1, 5, 10}) + .add_int64_axis("skip_large_sizes", {1}); NVBENCH_BENCH_TYPES(nvbench_filter_join_indices_left_join, NVBENCH_TYPE_AXES(JOIN_NULLABLE_RANGE, @@ -150,7 +152,8 @@ NVBENCH_BENCH_TYPES(nvbench_filter_join_indices_left_join, .set_name("filter_join_indices_left_join") .set_type_axes_names({"Nullable", "NullEquality", "DataType"}) .add_int64_axis("left_size", JOIN_SIZE_RANGE) - .add_int64_axis("right_size", JOIN_SIZE_RANGE); + .add_int64_axis("right_size", JOIN_SIZE_RANGE) + .add_int64_axis("skip_large_sizes", {1}); NVBENCH_BENCH_TYPES(nvbench_filter_join_indices_full_join, NVBENCH_TYPE_AXES(JOIN_NULLABLE_RANGE, @@ -159,4 +162,5 @@ NVBENCH_BENCH_TYPES(nvbench_filter_join_indices_full_join, .set_name("filter_join_indices_full_join") .set_type_axes_names({"Nullable", "NullEquality", "DataType"}) .add_int64_axis("left_size", JOIN_SIZE_RANGE) - .add_int64_axis("right_size", JOIN_SIZE_RANGE); + .add_int64_axis("right_size", JOIN_SIZE_RANGE) + .add_int64_axis("skip_large_sizes", {1}); diff --git a/cpp/benchmarks/join/join.cu b/cpp/benchmarks/join/join.cu index d5404a99611..88d2cf22dcd 100644 --- a/cpp/benchmarks/join/join.cu +++ b/cpp/benchmarks/join/join.cu @@ -91,7 +91,8 @@ NVBENCH_BENCH_TYPES(nvbench_inner_join, .set_type_axes_names({"Nullable", "NullEquality", "DataType"}) .add_int64_axis("num_keys", nvbench::range(1, 5, 1)) .add_int64_axis("left_size", JOIN_SIZE_RANGE) - .add_int64_axis("right_size", JOIN_SIZE_RANGE); + .add_int64_axis("right_size", JOIN_SIZE_RANGE) + .add_int64_axis("skip_large_sizes", {1}); NVBENCH_BENCH_TYPES(nvbench_left_join, NVBENCH_TYPE_AXES(JOIN_NULLABLE_RANGE, @@ -101,7 +102,8 @@ NVBENCH_BENCH_TYPES(nvbench_left_join, .set_type_axes_names({"Nullable", "NullEquality", "DataType"}) .add_int64_axis("num_keys", nvbench::range(1, 5, 1)) .add_int64_axis("left_size", JOIN_SIZE_RANGE) - .add_int64_axis("right_size", JOIN_SIZE_RANGE); + .add_int64_axis("right_size", JOIN_SIZE_RANGE) + .add_int64_axis("skip_large_sizes", {1}); NVBENCH_BENCH_TYPES(nvbench_full_join, NVBENCH_TYPE_AXES(JOIN_NULLABLE_RANGE, @@ -111,7 +113,8 @@ NVBENCH_BENCH_TYPES(nvbench_full_join, .set_type_axes_names({"Nullable", "NullEquality", "DataType"}) .add_int64_axis("num_keys", nvbench::range(1, 5, 1)) .add_int64_axis("left_size", JOIN_SIZE_RANGE) - .add_int64_axis("right_size", JOIN_SIZE_RANGE); + .add_int64_axis("right_size", JOIN_SIZE_RANGE) + .add_int64_axis("skip_large_sizes", {1}); NVBENCH_BENCH_TYPES(nvbench_inner_join_selectivity, NVBENCH_TYPE_AXES(DEFAULT_JOIN_NULL_EQUALITY, SELECTIVITY_JOIN_DATATYPES)) @@ -120,4 +123,5 @@ NVBENCH_BENCH_TYPES(nvbench_inner_join_selectivity, .add_int64_axis("left_size", {100'000'000}) .add_int64_axis("right_size", {100'000}) .add_int64_axis("num_probes", {4}) - .add_float64_axis("selectivity", JOIN_SELECTIVITY_RANGE); + .add_float64_axis("selectivity", JOIN_SELECTIVITY_RANGE) + .add_int64_axis("skip_large_sizes", {1}); diff --git a/cpp/benchmarks/join/join_common.hpp b/cpp/benchmarks/join/join_common.hpp index 807335d0336..aa594a3c5b6 100644 --- a/cpp/benchmarks/join/join_common.hpp +++ b/cpp/benchmarks/join/join_common.hpp @@ -60,6 +60,26 @@ inline void create_complex_ast_expression(cudf::ast::tree& tree, cudf::size_type } } +// Returns true (and marks `state` as skipped) when the `skip_large_sizes` axis is enabled and +// the build side of this benchmark is larger than the probe side. `build_is_right` selects which +// side is the build side: true for hash-style benches that preprocess the right table, false for +// benches like `mark_join` that preprocess the left. +inline bool should_skip_large_sizes(nvbench::state& state, bool build_is_right = true) +{ + if (state.get_int64("skip_large_sizes") == 0) { return false; } + auto const left_size = state.get_int64("left_size"); + auto const right_size = state.get_int64("right_size"); + if (build_is_right && right_size > left_size) { + state.skip("build (right) should be smaller than probe (left)"); + return true; + } + if (!build_is_right && left_size > right_size) { + state.skip("build (left) should be smaller than probe (right)"); + return true; + } + return false; +} + template & key_types, Join JoinFunc, - int multiplicity = 1, - double selectivity = 0.3, - bool skip_large_right_tbl = true) + int multiplicity = 1, + double selectivity = 0.3, + bool build_is_right = true) { + if (should_skip_large_sizes(state, build_is_right)) { return; } + auto const right_size = static_cast(state.get_int64("right_size")); auto const left_size = static_cast(state.get_int64("left_size")); - if (skip_large_right_tbl && right_size > left_size) { - state.skip("Skip large right table"); - return; - } - auto const num_keys = key_types.size(); auto const num_payload_cols = 2; auto [build_table, probe_table] = generate_input_tables( diff --git a/cpp/benchmarks/join/left_join.cu b/cpp/benchmarks/join/left_join.cu index 37cf4d6ae8c..3f567fc0371 100644 --- a/cpp/benchmarks/join/left_join.cu +++ b/cpp/benchmarks/join/left_join.cu @@ -18,18 +18,10 @@ void nvbench_left_anti_join(nvbench::state& state, nvbench::enum_type>) { auto const num_probes = static_cast(state.get_int64("num_probes")); - auto const left_size = state.get_int64("left_size"); - auto const right_size = state.get_int64("right_size"); auto const selectivity = state.get_float64("selectivity"); auto const join_type = state.get_string("join_type"); - if (join_type == "mark_join" && left_size > right_size) { - state.skip("mark_join: build (left) should be smaller than probe (right)"); - return; - } - if (join_type == "filtered_join" && right_size > left_size) { - state.skip("filtered_join: build (right) should be smaller than probe (left)"); - return; - } + // filtered_join builds on the right side; mark_join builds on the left side. + auto const build_is_right = (join_type == "filtered_join"); auto dtypes = cycle_dtypes(get_type_or_group(static_cast(DataType)), num_keys); auto join = [num_probes, &join_type](cudf::table_view const& left, @@ -51,9 +43,8 @@ void nvbench_left_anti_join(nvbench::state& state, } }; - auto const skip_large_right = (join_type == "filtered_join"); BM_join( - state, dtypes, join, 1, selectivity, skip_large_right); + state, dtypes, join, 1, selectivity, build_is_right); } template @@ -63,18 +54,10 @@ void nvbench_left_semi_join(nvbench::state& state, nvbench::enum_type>) { auto const num_probes = static_cast(state.get_int64("num_probes")); - auto const left_size = state.get_int64("left_size"); - auto const right_size = state.get_int64("right_size"); auto const selectivity = state.get_float64("selectivity"); auto const join_type = state.get_string("join_type"); - if (join_type == "mark_join" && left_size > right_size) { - state.skip("mark_join: build (left) should be smaller than probe (right)"); - return; - } - if (join_type == "filtered_join" && right_size > left_size) { - state.skip("filtered_join: build (right) should be smaller than probe (left)"); - return; - } + // filtered_join builds on the right side; mark_join builds on the left side. + auto const build_is_right = (join_type == "filtered_join"); auto dtypes = cycle_dtypes(get_type_or_group(static_cast(DataType)), num_keys); auto join = [num_probes, &join_type](cudf::table_view const& left, @@ -95,9 +78,8 @@ void nvbench_left_semi_join(nvbench::state& state, return obj.semi_join(left); } }; - auto const skip_large_right = (join_type == "filtered_join"); BM_join( - state, dtypes, join, 1, selectivity, skip_large_right); + state, dtypes, join, 1, selectivity, build_is_right); } template @@ -151,12 +133,14 @@ void nvbench_mark_left_semi_join_selectivity( { auto const num_probes = static_cast(state.get_int64("num_probes")); auto const selectivity = state.get_float64("selectivity"); + auto const prefilter = + state.get_int64("use_prefilter") != 0 ? cudf::join_prefilter::YES : cudf::join_prefilter::NO; auto dtypes = cycle_dtypes(get_type_or_group(static_cast(DataType)), num_keys); - auto join = [num_probes](cudf::table_view const& left, - cudf::table_view const& right, - cudf::null_equality compare_nulls) { - cudf::mark_join obj(left, compare_nulls, cudf::join_prefilter::YES, cudf::get_default_stream()); + auto join = [num_probes, prefilter](cudf::table_view const& left, + cudf::table_view const& right, + cudf::null_equality compare_nulls) { + cudf::mark_join obj(left, compare_nulls, prefilter, cudf::get_default_stream()); for (auto i = 0; i < num_probes - 1; i++) { [[maybe_unused]] auto result = obj.semi_join(right); } @@ -176,7 +160,8 @@ NVBENCH_BENCH_TYPES(nvbench_left_anti_join, .add_int64_axis("right_size", JOIN_SIZE_RANGE) .add_int64_axis("num_probes", {4}) .add_float64_axis("selectivity", {0.3}) - .add_string_axis("join_type", {"mark_join", "filtered_join"}); + .add_string_axis("join_type", {"mark_join", "filtered_join"}) + .add_int64_axis("skip_large_sizes", {1}); NVBENCH_BENCH_TYPES(nvbench_left_semi_join, NVBENCH_TYPE_AXES(JOIN_NULLABLE_RANGE, @@ -188,7 +173,8 @@ NVBENCH_BENCH_TYPES(nvbench_left_semi_join, .add_int64_axis("right_size", JOIN_SIZE_RANGE) .add_int64_axis("num_probes", {4}) .add_float64_axis("selectivity", {0.3}) - .add_string_axis("join_type", {"mark_join", "filtered_join"}); + .add_string_axis("join_type", {"mark_join", "filtered_join"}) + .add_int64_axis("skip_large_sizes", {1}); NVBENCH_BENCH_TYPES(nvbench_filtered_left_anti_join_selectivity, NVBENCH_TYPE_AXES(DEFAULT_JOIN_NULL_EQUALITY, SELECTIVITY_JOIN_DATATYPES)) @@ -197,7 +183,8 @@ NVBENCH_BENCH_TYPES(nvbench_filtered_left_anti_join_selectivity, .add_int64_axis("left_size", {100'000'000}) .add_int64_axis("right_size", {100'000}) .add_int64_axis("num_probes", {4}) - .add_float64_axis("selectivity", JOIN_SELECTIVITY_RANGE); + .add_float64_axis("selectivity", JOIN_SELECTIVITY_RANGE) + .add_int64_axis("skip_large_sizes", {1}); NVBENCH_BENCH_TYPES(nvbench_filtered_left_semi_join_selectivity, NVBENCH_TYPE_AXES(DEFAULT_JOIN_NULL_EQUALITY, SELECTIVITY_JOIN_DATATYPES)) @@ -206,7 +193,8 @@ NVBENCH_BENCH_TYPES(nvbench_filtered_left_semi_join_selectivity, .add_int64_axis("left_size", {100'000'000}) .add_int64_axis("right_size", {100'000}) .add_int64_axis("num_probes", {4}) - .add_float64_axis("selectivity", JOIN_SELECTIVITY_RANGE); + .add_float64_axis("selectivity", JOIN_SELECTIVITY_RANGE) + .add_int64_axis("skip_large_sizes", {1}); NVBENCH_BENCH_TYPES(nvbench_mark_left_semi_join_selectivity, NVBENCH_TYPE_AXES(DEFAULT_JOIN_NULL_EQUALITY, SELECTIVITY_JOIN_DATATYPES)) @@ -215,4 +203,6 @@ NVBENCH_BENCH_TYPES(nvbench_mark_left_semi_join_selectivity, .add_int64_axis("left_size", {100'000}) .add_int64_axis("right_size", {100'000'000}) .add_int64_axis("num_probes", {4}) - .add_float64_axis("selectivity", JOIN_SELECTIVITY_RANGE); + .add_float64_axis("selectivity", JOIN_SELECTIVITY_RANGE) + .add_int64_axis("use_prefilter", {1}) + .add_int64_axis("skip_large_sizes", {1}); diff --git a/cpp/benchmarks/join/mixed_join.cu b/cpp/benchmarks/join/mixed_join.cu index 1dcc9544101..f05ed2f3b39 100644 --- a/cpp/benchmarks/join/mixed_join.cu +++ b/cpp/benchmarks/join/mixed_join.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -166,7 +166,8 @@ NVBENCH_BENCH_TYPES(nvbench_mixed_inner_join, .set_name("mixed_inner_join") .set_type_axes_names({"Nullable", "NullEquality", "DataType"}) .add_int64_axis("left_size", JOIN_SIZE_RANGE) - .add_int64_axis("right_size", JOIN_SIZE_RANGE); + .add_int64_axis("right_size", JOIN_SIZE_RANGE) + .add_int64_axis("skip_large_sizes", {1}); NVBENCH_BENCH_TYPES(nvbench_mixed_inner_join_complex_ast, NVBENCH_TYPE_AXES(JOIN_NULLABLE_RANGE, @@ -176,7 +177,8 @@ NVBENCH_BENCH_TYPES(nvbench_mixed_inner_join_complex_ast, .set_type_axes_names({"Nullable", "NullEquality", "DataType"}) .add_int64_axis("left_size", JOIN_SIZE_RANGE) .add_int64_axis("right_size", JOIN_SIZE_RANGE) - .add_int64_axis("ast_levels", {1, 5, 10}); + .add_int64_axis("ast_levels", {1, 5, 10}) + .add_int64_axis("skip_large_sizes", {1}); NVBENCH_BENCH_TYPES(nvbench_mixed_left_join, NVBENCH_TYPE_AXES(JOIN_NULLABLE_RANGE, @@ -185,7 +187,8 @@ NVBENCH_BENCH_TYPES(nvbench_mixed_left_join, .set_name("mixed_left_join") .set_type_axes_names({"Nullable", "NullEquality", "DataType"}) .add_int64_axis("left_size", JOIN_SIZE_RANGE) - .add_int64_axis("right_size", JOIN_SIZE_RANGE); + .add_int64_axis("right_size", JOIN_SIZE_RANGE) + .add_int64_axis("skip_large_sizes", {1}); NVBENCH_BENCH_TYPES(nvbench_mixed_full_join, NVBENCH_TYPE_AXES(JOIN_NULLABLE_RANGE, @@ -194,7 +197,8 @@ NVBENCH_BENCH_TYPES(nvbench_mixed_full_join, .set_name("mixed_full_join") .set_type_axes_names({"Nullable", "NullEquality", "DataType"}) .add_int64_axis("left_size", JOIN_SIZE_RANGE) - .add_int64_axis("right_size", JOIN_SIZE_RANGE); + .add_int64_axis("right_size", JOIN_SIZE_RANGE) + .add_int64_axis("skip_large_sizes", {1}); NVBENCH_BENCH_TYPES(nvbench_mixed_left_semi_join, NVBENCH_TYPE_AXES(JOIN_NULLABLE_RANGE, @@ -203,7 +207,8 @@ NVBENCH_BENCH_TYPES(nvbench_mixed_left_semi_join, .set_name("mixed_left_semi_join") .set_type_axes_names({"Nullable", "NullEquality", "DataType"}) .add_int64_axis("left_size", JOIN_SIZE_RANGE) - .add_int64_axis("right_size", JOIN_SIZE_RANGE); + .add_int64_axis("right_size", JOIN_SIZE_RANGE) + .add_int64_axis("skip_large_sizes", {1}); NVBENCH_BENCH_TYPES(nvbench_mixed_left_anti_join, NVBENCH_TYPE_AXES(JOIN_NULLABLE_RANGE, @@ -212,4 +217,5 @@ NVBENCH_BENCH_TYPES(nvbench_mixed_left_anti_join, .set_name("mixed_left_anti_join") .set_type_axes_names({"Nullable", "NullEquality", "DataType"}) .add_int64_axis("left_size", JOIN_SIZE_RANGE) - .add_int64_axis("right_size", JOIN_SIZE_RANGE); + .add_int64_axis("right_size", JOIN_SIZE_RANGE) + .add_int64_axis("skip_large_sizes", {1}); diff --git a/cpp/benchmarks/join/multiplicity_join.cu b/cpp/benchmarks/join/multiplicity_join.cu index a1aff0e1ef3..e9d8878db06 100644 --- a/cpp/benchmarks/join/multiplicity_join.cu +++ b/cpp/benchmarks/join/multiplicity_join.cu @@ -115,7 +115,8 @@ NVBENCH_BENCH_TYPES(nvbench_hm_inner_join, .add_int64_axis("num_keys", nvbench::range(1, 3, 1)) .add_int64_axis("left_size", JOIN_SIZE_RANGE) .add_int64_axis("right_size", JOIN_SIZE_RANGE) - .add_int64_axis("multiplicity", {100, 1'000, 10'000, 50'000}); + .add_int64_axis("multiplicity", {100, 1'000, 10'000, 50'000}) + .add_int64_axis("skip_large_sizes", {1}); NVBENCH_BENCH_TYPES(nvbench_hm_left_join, NVBENCH_TYPE_AXES(JOIN_NULLABLE_RANGE, @@ -127,7 +128,8 @@ NVBENCH_BENCH_TYPES(nvbench_hm_left_join, .add_int64_axis("num_keys", nvbench::range(1, 3, 1)) .add_int64_axis("left_size", JOIN_SIZE_RANGE) .add_int64_axis("right_size", JOIN_SIZE_RANGE) - .add_int64_axis("multiplicity", {100, 1'000, 10'000, 50'000}); + .add_int64_axis("multiplicity", {100, 1'000, 10'000, 50'000}) + .add_int64_axis("skip_large_sizes", {1}); NVBENCH_BENCH_TYPES(nvbench_hm_full_join, NVBENCH_TYPE_AXES(JOIN_NULLABLE_RANGE, @@ -139,4 +141,5 @@ NVBENCH_BENCH_TYPES(nvbench_hm_full_join, .add_int64_axis("num_keys", nvbench::range(1, 3, 1)) .add_int64_axis("left_size", JOIN_SIZE_RANGE) .add_int64_axis("right_size", JOIN_SIZE_RANGE) - .add_int64_axis("multiplicity", {100, 1'000, 10'000, 50'000}); + .add_int64_axis("multiplicity", {100, 1'000, 10'000, 50'000}) + .add_int64_axis("skip_large_sizes", {1}); diff --git a/cpp/benchmarks/join/sort_merge_join.cu b/cpp/benchmarks/join/sort_merge_join.cu index 0dcdc21791d..461960e1c2a 100644 --- a/cpp/benchmarks/join/sort_merge_join.cu +++ b/cpp/benchmarks/join/sort_merge_join.cu @@ -40,10 +40,7 @@ void nvbench_sort_merge_inner_join(nvbench::state& state, auto const multiplicity = 1; auto const selectivity = 0.3; - if (right_size > left_size) { - state.skip("Skip large right table"); - return; - } + if (should_skip_large_sizes(state)) { return; } auto dtypes = cycle_dtypes(get_type_or_group(static_cast(DataType)), num_keys); @@ -116,4 +113,5 @@ NVBENCH_BENCH_TYPES(nvbench_sort_merge_inner_join, .add_int64_axis("num_keys", nvbench::range(1, 3, 1)) .add_int64_axis("left_size", {10'000, 100'000}) .add_int64_axis("right_size", {10'000, 100'000}) - .add_int64_axis("use_key_remap", {0, 1}); + .add_int64_axis("use_key_remap", {0, 1}) + .add_int64_axis("skip_large_sizes", {1}); From c2f583c7a4ec2503ac41ebbc75a98cb3e89c58b9 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Tue, 5 May 2026 05:50:21 -0500 Subject: [PATCH 09/36] Expose __from_arrow__ on masked extension dtype proxies (#22373) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Pandas' `BaseMaskedDtype` defines `__from_arrow__` for converting a `pyarrow.Array` (including `NullArray`/`ChunkedArray` of nulls) into the matching `BaseMaskedArray`. The cudf.pandas final proxy types for `BooleanDtype`, `Int{8,16,32,64}Dtype`, `UInt{8,16,32,64}Dtype`, and `Float{32,64}Dtype` did not list `__from_arrow__` in their `additional_attributes`, so the proxy `__getattr__` raised `AttributeError` even though the slow object has it. ## Change Add `"__from_arrow__": _FastSlowAttribute("__from_arrow__")` to all eleven masked dtype proxy declarations in `python/cudf/cudf/pandas/_wrappers/pandas.py`, mirroring the existing pattern on `ArrowDtype`. ## Tests / Conftest Removes 25 entries from `conftest-patch.py` that were xfailed only because of the missing attribute: - 22 parametrizations of `tests/arrays/masked/test_arrow_compat.py::test_from_arrow_null` (all four masked dtype families × two arrow array shapes). - `tests/arrays/masked/test_arrow_compat.py::test_arrow_from_arrow_uint`. - `tests/arrays/masked/test_arrow_compat.py::test_dataframe_from_arrow_types_mapper`. - `tests/indexes/multi/test_constructors.py::test_from_frame_missing_values_multiIndex`. All 22 `test_from_arrow_null` cases pass, the full `test_arrow_compat.py` file passes (69 passed, 22 unrelated xfails), and the cudf-side `cudf_pandas_tests/` suite is clean (435 passed). Co-authored-by: Claude Opus 4.7 (1M context) --- python/cudf/cudf/pandas/_wrappers/pandas.py | 11 ++++++++ .../cudf/pandas/scripts/conftest-patch.py | 25 ------------------- 2 files changed, 11 insertions(+), 25 deletions(-) diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py index 006473953cd..0bdc05205f5 100644 --- a/python/cudf/cudf/pandas/_wrappers/pandas.py +++ b/python/cudf/cudf/pandas/_wrappers/pandas.py @@ -882,6 +882,7 @@ def Index__setattr__(self, name, value): slow_to_fast=_Unusable(), bases=(pd.api.extensions.ExtensionDtype,), additional_attributes={ + "__from_arrow__": _FastSlowAttribute("__from_arrow__"), "__hash__": _FastSlowAttribute("__hash__"), }, ) @@ -908,6 +909,7 @@ def Index__setattr__(self, name, value): slow_to_fast=_Unusable(), bases=(pd.api.extensions.ExtensionDtype,), additional_attributes={ + "__from_arrow__": _FastSlowAttribute("__from_arrow__"), "__hash__": _FastSlowAttribute("__hash__"), }, ) @@ -921,6 +923,7 @@ def Index__setattr__(self, name, value): slow_to_fast=_Unusable(), bases=(pd.api.extensions.ExtensionDtype,), additional_attributes={ + "__from_arrow__": _FastSlowAttribute("__from_arrow__"), "__hash__": _FastSlowAttribute("__hash__"), }, ) @@ -933,6 +936,7 @@ def Index__setattr__(self, name, value): slow_to_fast=_Unusable(), bases=(pd.api.extensions.ExtensionDtype,), additional_attributes={ + "__from_arrow__": _FastSlowAttribute("__from_arrow__"), "__hash__": _FastSlowAttribute("__hash__"), }, ) @@ -945,6 +949,7 @@ def Index__setattr__(self, name, value): slow_to_fast=_Unusable(), bases=(pd.api.extensions.ExtensionDtype,), additional_attributes={ + "__from_arrow__": _FastSlowAttribute("__from_arrow__"), "__hash__": _FastSlowAttribute("__hash__"), }, ) @@ -957,6 +962,7 @@ def Index__setattr__(self, name, value): slow_to_fast=_Unusable(), bases=(pd.api.extensions.ExtensionDtype,), additional_attributes={ + "__from_arrow__": _FastSlowAttribute("__from_arrow__"), "__hash__": _FastSlowAttribute("__hash__"), }, ) @@ -969,6 +975,7 @@ def Index__setattr__(self, name, value): slow_to_fast=_Unusable(), bases=(pd.api.extensions.ExtensionDtype,), additional_attributes={ + "__from_arrow__": _FastSlowAttribute("__from_arrow__"), "__hash__": _FastSlowAttribute("__hash__"), }, ) @@ -981,6 +988,7 @@ def Index__setattr__(self, name, value): slow_to_fast=_Unusable(), bases=(pd.api.extensions.ExtensionDtype,), additional_attributes={ + "__from_arrow__": _FastSlowAttribute("__from_arrow__"), "__hash__": _FastSlowAttribute("__hash__"), }, ) @@ -993,6 +1001,7 @@ def Index__setattr__(self, name, value): slow_to_fast=_Unusable(), bases=(pd.api.extensions.ExtensionDtype,), additional_attributes={ + "__from_arrow__": _FastSlowAttribute("__from_arrow__"), "__hash__": _FastSlowAttribute("__hash__"), }, ) @@ -1074,6 +1083,7 @@ def Index__setattr__(self, name, value): slow_to_fast=_Unusable(), bases=(pd.api.extensions.ExtensionDtype,), additional_attributes={ + "__from_arrow__": _FastSlowAttribute("__from_arrow__"), "__hash__": _FastSlowAttribute("__hash__"), }, ) @@ -1086,6 +1096,7 @@ def Index__setattr__(self, name, value): slow_to_fast=_Unusable(), bases=(pd.api.extensions.ExtensionDtype,), additional_attributes={ + "__from_arrow__": _FastSlowAttribute("__from_arrow__"), "__hash__": _FastSlowAttribute("__hash__"), }, ) diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py index 8e65569d557..83202033fa0 100644 --- a/python/cudf/cudf/pandas/scripts/conftest-patch.py +++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py @@ -537,30 +537,6 @@ def pytest_unconfigure(config): "tests/arrays/masked/test_arrow_compat.py::test_arrow_array[UInt64]": "TODO: Add a reason for failure", "tests/arrays/masked/test_arrow_compat.py::test_arrow_array[UInt8]": "TODO: Add a reason for failure", "tests/arrays/masked/test_arrow_compat.py::test_arrow_array[boolean]": "TODO: Add a reason for failure", - "tests/arrays/masked/test_arrow_compat.py::test_arrow_from_arrow_uint": "AttributeError: 'UInt32Dtype' object has no attribute '__from_arrow__'", - "tests/arrays/masked/test_arrow_compat.py::test_dataframe_from_arrow_types_mapper": "ValueError: This column does not support to be converted to a pandas ExtensionArray", - "tests/arrays/masked/test_arrow_compat.py::test_from_arrow_null[Float32-arr0]": "AttributeError: 'Float32Dtype' object has no attribute '__from_arrow__'", - "tests/arrays/masked/test_arrow_compat.py::test_from_arrow_null[Float32-arr1]": "AttributeError: 'Float32Dtype' object has no attribute '__from_arrow__'", - "tests/arrays/masked/test_arrow_compat.py::test_from_arrow_null[Float64-arr0]": "AttributeError: 'Float64Dtype' object has no attribute '__from_arrow__'", - "tests/arrays/masked/test_arrow_compat.py::test_from_arrow_null[Float64-arr1]": "AttributeError: 'Float64Dtype' object has no attribute '__from_arrow__'", - "tests/arrays/masked/test_arrow_compat.py::test_from_arrow_null[Int16-arr0]": "AttributeError: 'Int16Dtype' object has no attribute '__from_arrow__'", - "tests/arrays/masked/test_arrow_compat.py::test_from_arrow_null[Int16-arr1]": "AttributeError: 'Int16Dtype' object has no attribute '__from_arrow__'", - "tests/arrays/masked/test_arrow_compat.py::test_from_arrow_null[Int32-arr0]": "AttributeError: 'Int32Dtype' object has no attribute '__from_arrow__'", - "tests/arrays/masked/test_arrow_compat.py::test_from_arrow_null[Int32-arr1]": "AttributeError: 'Int32Dtype' object has no attribute '__from_arrow__'", - "tests/arrays/masked/test_arrow_compat.py::test_from_arrow_null[Int64-arr0]": "AttributeError: 'Int64Dtype' object has no attribute '__from_arrow__'", - "tests/arrays/masked/test_arrow_compat.py::test_from_arrow_null[Int64-arr1]": "AttributeError: 'Int64Dtype' object has no attribute '__from_arrow__'", - "tests/arrays/masked/test_arrow_compat.py::test_from_arrow_null[Int8-arr0]": "AttributeError: 'Int8Dtype' object has no attribute '__from_arrow__'", - "tests/arrays/masked/test_arrow_compat.py::test_from_arrow_null[Int8-arr1]": "AttributeError: 'Int8Dtype' object has no attribute '__from_arrow__'", - "tests/arrays/masked/test_arrow_compat.py::test_from_arrow_null[UInt16-arr0]": "AttributeError: 'UInt16Dtype' object has no attribute '__from_arrow__'", - "tests/arrays/masked/test_arrow_compat.py::test_from_arrow_null[UInt16-arr1]": "AttributeError: 'UInt16Dtype' object has no attribute '__from_arrow__'", - "tests/arrays/masked/test_arrow_compat.py::test_from_arrow_null[UInt32-arr0]": "AttributeError: 'UInt32Dtype' object has no attribute '__from_arrow__'", - "tests/arrays/masked/test_arrow_compat.py::test_from_arrow_null[UInt32-arr1]": "AttributeError: 'UInt32Dtype' object has no attribute '__from_arrow__'", - "tests/arrays/masked/test_arrow_compat.py::test_from_arrow_null[UInt64-arr0]": "AttributeError: 'UInt64Dtype' object has no attribute '__from_arrow__'", - "tests/arrays/masked/test_arrow_compat.py::test_from_arrow_null[UInt64-arr1]": "AttributeError: 'UInt64Dtype' object has no attribute '__from_arrow__'", - "tests/arrays/masked/test_arrow_compat.py::test_from_arrow_null[UInt8-arr0]": "AttributeError: 'UInt8Dtype' object has no attribute '__from_arrow__'", - "tests/arrays/masked/test_arrow_compat.py::test_from_arrow_null[UInt8-arr1]": "AttributeError: 'UInt8Dtype' object has no attribute '__from_arrow__'", - "tests/arrays/masked/test_arrow_compat.py::test_from_arrow_null[boolean-arr0]": "AttributeError: 'BooleanDtype' object has no attribute '__from_arrow__'", - "tests/arrays/masked/test_arrow_compat.py::test_from_arrow_null[boolean-arr1]": "AttributeError: 'BooleanDtype' object has no attribute '__from_arrow__'", "tests/arrays/masked/test_arrow_compat.py::test_from_arrow_type_error[Float32]": "TODO: Add a reason for failure", "tests/arrays/masked/test_arrow_compat.py::test_from_arrow_type_error[Float64]": "TODO: Add a reason for failure", "tests/arrays/masked/test_arrow_compat.py::test_from_arrow_type_error[Int16]": "TODO: Add a reason for failure", @@ -4310,7 +4286,6 @@ def pytest_unconfigure(config): "tests/indexes/multi/test_constructors.py::test_create_index_existing_name": "TODO: Add a reason for failure", "tests/indexes/multi/test_constructors.py::test_from_arrays_respects_none_names": "TODO: Add a reason for failure", "tests/indexes/multi/test_constructors.py::test_from_frame_dtype_fidelity": "TODO: Add a reason for failure", - "tests/indexes/multi/test_constructors.py::test_from_frame_missing_values_multiIndex": "AttributeError: 'Float64Dtype' object has no attribute '__from_arrow__'", "tests/indexes/multi/test_constructors.py::test_multiindex_inference_consistency": "TODO: Add a reason for failure", "tests/indexes/multi/test_conversion.py::test_to_frame_column_rangeindex": "AssertionError: Index are different", "tests/indexes/multi/test_conversion.py::test_to_frame_dtype_fidelity": "TODO: Add a reason for failure", From 5c4c21abd4a5786d9d4b1aeab15b0fc24841b7ea Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Tue, 5 May 2026 05:53:49 -0500 Subject: [PATCH 10/36] Fix datetime resolution for empty data inputs (#22363) ## Description In pandas empty datetime inputs default to `s` resolution, this PR fixes that inconsistency and matches `cudf` with `pandas`. This PR also fixes `freq` preservation in `Groupby.size` ## Checklist - [x] I am familiar with the [Contributing Guidelines](https://github.com/rapidsai/cudf/blob/HEAD/CONTRIBUTING.md). - [x] New or existing tests cover these changes. - [x] The documentation is up to date with these changes. --- python/cudf/cudf/core/index.py | 29 ++- python/cudf/cudf/core/resample.py | 44 ++--- python/cudf/cudf/core/tools/datetimes.py | 55 +++--- .../cudf/pandas/scripts/conftest-patch.py | 170 ------------------ .../general_functions/test_to_datetime.py | 99 ++++++++++ .../cudf/cudf/tests/groupby/test_resample.py | 75 ++++++-- .../datetimeindex/methods/test_to_pandas.py | 27 +++ .../indexes/datetimeindex/test_constructor.py | 43 ++++- 8 files changed, 307 insertions(+), 235 deletions(-) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 41be251acce..54ffdf33352 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -3206,7 +3206,15 @@ def __init__( data = data.astype(dtype) elif data.dtype.kind != "M": if is_dtype_obj_string(data.dtype): - data = data.astype(np.dtype("datetime64[us]")) + # Pandas's array_to_datetime falls back to [s] when no + # concrete (non-NaT) datetime is observed — empty input or + # an all-NaT/None array (pandas-dev/pandas#55901). Otherwise + # parsed strings land on [us]. + if len(data) == 0 or data.null_count == len(data): + target_unit = "s" + else: + target_unit = "us" + data = data.astype(np.dtype(f"datetime64[{target_unit}]")) else: data = data.astype(np.dtype("datetime64[ns]")) @@ -4030,11 +4038,20 @@ def to_pandas( ) -> pd.DatetimeIndex: result = super().to_pandas(nullable=nullable, arrow_type=arrow_type) if not arrow_type and self._freq is not None: - # Re-infer from the result's values rather than trusting the cached - # self._freq, which (e.g. via deserialization or external assignment) - # may not conform. Pandas validates on assignment and raises when - # values don't match, so inferring keeps the proxy round-trip robust. - result.freq = result.inferred_freq + # Prefer pandas's inferred_freq because the cached self._freq may + # not conform (e.g. after deserialization or external assignment) + # and pandas validates the assignment against the index values. + # Fall back to the cached freq when inference is impossible + # (empty / single-element indexes), so resample round-trips + # preserve `freq` to match pandas. + inferred = result.inferred_freq + if inferred is None: + try: + result.freq = self._freq._maybe_as_fast_pandas_offset() + except ValueError: + pass + else: + result.freq = inferred return result @_performance_tracking diff --git a/python/cudf/cudf/core/resample.py b/python/cudf/cudf/core/resample.py index 3008ba19d08..9bef8000e2b 100644 --- a/python/cudf/cudf/core/resample.py +++ b/python/cudf/cudf/core/resample.py @@ -4,7 +4,6 @@ from typing import TYPE_CHECKING -import numpy as np import pandas as pd import cudf @@ -70,6 +69,25 @@ def asfreq(self): ) ) + def size(self): + # GroupBy.size bypasses _Resampler.agg and so doesn't pick up the + # bin-label freq. Re-align to the full set of bins (filling empty + # buckets with 0, since size is non-null in pandas) and re-attach + # the freq. + result = super().size() + if len(self.grouping.bin_labels) != len(result): + index = cudf.Index( + self.grouping.bin_labels, name=self.grouping.names[0] + ) + result = ( + result._align_to_index( + index, how="right", sort=False, allow_non_unique=True + ) + .fillna(0) + .astype(result.dtype) + ) + return self._restore_freq(result.sort_index()) + def _scan_fill( self, method: plc.replace.ReplacePolicy, limit: int | None ) -> DataFrameOrSeries: @@ -260,26 +278,12 @@ def _handle_frequency_grouper(self, by): freq=freq, ) - # We want the (resampled) column of timestamps in the result - # to have a resolution closest to the resampling - # frequency. For example, if resampling from '1T' to '1s', we - # want the resulting timestamp column to by of dtype - # 'datetime64[s]'. libcudf requires the bin labels and key - # column to have the same dtype, so we compute a `result_type` + # Pandas resample preserves the input column's resolution, so the + # resulting timestamp column should match `key_column.dtype` rather + # than be derived from the offset. libcudf requires the bin labels + # and key column to share a dtype, so we compute a `result_type` # and cast them both to that type. - if offset.rule_code.lower() in {"d", "h"}: - # unsupported resolution (we don't support resolutions >s) - result_type = np.dtype("datetime64[s]") - else: - try: - result_type = np.dtype(f"datetime64[{offset.rule_code}]") - # TODO: Ideally, we can avoid one cast by having `date_range` - # generate timestamps of a given dtype. Currently, it can - # only generate timestamps with 'ns' precision - except TypeError: - # unsupported resolution (we don't support resolutions >s) - # fall back to using datetime64[s] - result_type = np.dtype("datetime64[s]") + result_type = key_column.dtype cast_key_column = key_column.astype(result_type) cast_bin_labels = bin_labels.astype(result_type) diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py index e56c29e68d4..c83610287df 100644 --- a/python/cudf/cudf/core/tools/datetimes.py +++ b/python/cudf/cudf/core/tools/datetimes.py @@ -133,7 +133,7 @@ def to_datetime( >>> cudf.to_datetime(df) 0 2015-02-04 1 2016-03-05 - dtype: datetime64[s] + dtype: datetime64[us] >>> cudf.to_datetime(1490195805, unit='s') Timestamp('2017-03-22 15:16:45') >>> cudf.to_datetime(1490195805433502912, unit='ns') @@ -216,32 +216,31 @@ def to_datetime( .str.zfill(2) ) format = "%Y-%m-%d" + target_unit = "us" for u in ["h", "m", "s", "ms", "us", "ns"]: value = unit_rev.get(u) if value is not None and value in arg: arg_col = arg._data[value] if arg_col.dtype.kind == "f": - col = new_series._column.strptime( - np.dtype("datetime64[ns]"), format=format - ) + target_unit = "ns" break elif arg_col.dtype.kind == "O": string_col = cast("StringColumn", arg_col) if not string_col.is_all_integer(): - col = new_series._column.strptime( - np.dtype("datetime64[ns]"), format=format - ) + target_unit = "ns" break - else: - col = new_series._column.strptime( - np.dtype("datetime64[s]"), format=format - ) + elif u == "ns": + # An explicit nanosecond field forces ns precision + # (pandas widens to [ns] when ns is present). + target_unit = "ns" + col = new_series._column.strptime( + np.dtype(f"datetime64[{target_unit}]"), format=format + ) times_column = None - factor_denominator = ( - unit_to_nanoseconds_conversion["s"] - if np.datetime_data(col.dtype)[0] == "s" - else 1 + col_unit = np.datetime_data(col.dtype)[0] + factor_denominator = unit_to_nanoseconds_conversion.get( + col_unit, 1 ) for u in ["h", "m", "s", "ms", "us", "ns"]: value = unit_rev.get(u) @@ -259,9 +258,12 @@ def to_datetime( np.dtype(np.float64) ) - factor = ( - unit_to_nanoseconds_conversion[u] / factor_denominator - ) + factor_numerator = unit_to_nanoseconds_conversion[u] + factor: int | float + if factor_numerator % factor_denominator == 0: + factor = factor_numerator // factor_denominator + else: + factor = factor_numerator / factor_denominator if times_column is None: times_column = current_col * factor @@ -280,9 +282,6 @@ def to_datetime( ) return Series._from_column(col, index=arg.index) else: - if unit is None and is_scalar(arg): - unit = "ns" - col = _process_col( col=as_column(arg), unit=unit, @@ -333,13 +332,13 @@ def _process_col( # int column out of it to parse against `format`. # Instead we directly cast to int and perform # parsing against `format`. + # Pandas 3 defaults parsed datetimes to `datetime64[us]` + # regardless of format precision. col = ( col.astype(np.dtype(np.int64)) .astype(DEFAULT_STRING_DTYPE) .strptime( - dtype=np.dtype("datetime64[us]") - if "%f" in format - else np.dtype("datetime64[s]"), + dtype=np.dtype("datetime64[us]"), format=format, ) ) @@ -373,7 +372,13 @@ def _process_col( ) elif col.dtype.kind == "O": - if unit not in (None, "ns") or col.is_all_null: + if col.is_all_null: + # Pandas converts all-null inputs to NaT at second precision + # regardless of `unit`/`format`; mirror that here without + # routing through the int/float path (which would land on + # the [ns]/[us] defaults). + return col.astype(np.dtype("datetime64[s]")) + if unit not in (None, "ns"): try: col = col.astype(np.dtype(np.int64)) except ValueError: diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py index 83202033fa0..373e0bf1670 100644 --- a/python/cudf/cudf/pandas/scripts/conftest-patch.py +++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py @@ -212,50 +212,34 @@ def pytest_unconfigure(config): "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_dti_cmp_str['US/Eastern']": "AssertionError: numpy array are different", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[ge-'Asia/Tokyo'-DataFrame-other0]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[ge-'Asia/Tokyo'-DataFrame-other2]": "Failed: DID NOT RAISE ", - "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[ge-'Asia/Tokyo'-Index-other0]": "Failed: DID NOT RAISE ", - "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[ge-'Asia/Tokyo'-Index-other2]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[ge-'Asia/Tokyo'-Series-other0]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[ge-'Asia/Tokyo'-Series-other2]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[ge-'US/Eastern'-DataFrame-other0]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[ge-'US/Eastern'-DataFrame-other2]": "Failed: DID NOT RAISE ", - "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[ge-'US/Eastern'-Index-other0]": "Failed: DID NOT RAISE ", - "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[ge-'US/Eastern'-Index-other2]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[ge-'US/Eastern'-Series-other0]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[ge-'US/Eastern'-Series-other2]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[gt-'Asia/Tokyo'-DataFrame-other0]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[gt-'Asia/Tokyo'-DataFrame-other2]": "Failed: DID NOT RAISE ", - "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[gt-'Asia/Tokyo'-Index-other0]": "Failed: DID NOT RAISE ", - "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[gt-'Asia/Tokyo'-Index-other2]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[gt-'Asia/Tokyo'-Series-other0]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[gt-'Asia/Tokyo'-Series-other2]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[gt-'US/Eastern'-DataFrame-other0]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[gt-'US/Eastern'-DataFrame-other2]": "Failed: DID NOT RAISE ", - "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[gt-'US/Eastern'-Index-other0]": "Failed: DID NOT RAISE ", - "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[gt-'US/Eastern'-Index-other2]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[gt-'US/Eastern'-Series-other0]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[gt-'US/Eastern'-Series-other2]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[le-'Asia/Tokyo'-DataFrame-other0]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[le-'Asia/Tokyo'-DataFrame-other2]": "Failed: DID NOT RAISE ", - "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[le-'Asia/Tokyo'-Index-other0]": "Failed: DID NOT RAISE ", - "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[le-'Asia/Tokyo'-Index-other2]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[le-'Asia/Tokyo'-Series-other0]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[le-'Asia/Tokyo'-Series-other2]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[le-'US/Eastern'-DataFrame-other0]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[le-'US/Eastern'-DataFrame-other2]": "Failed: DID NOT RAISE ", - "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[le-'US/Eastern'-Index-other0]": "Failed: DID NOT RAISE ", - "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[le-'US/Eastern'-Index-other2]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[le-'US/Eastern'-Series-other0]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[le-'US/Eastern'-Series-other2]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[lt-'Asia/Tokyo'-DataFrame-other0]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[lt-'Asia/Tokyo'-DataFrame-other2]": "Failed: DID NOT RAISE ", - "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[lt-'Asia/Tokyo'-Index-other0]": "Failed: DID NOT RAISE ", - "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[lt-'Asia/Tokyo'-Index-other2]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[lt-'Asia/Tokyo'-Series-other0]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[lt-'Asia/Tokyo'-Series-other2]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[lt-'US/Eastern'-DataFrame-other0]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[lt-'US/Eastern'-DataFrame-other2]": "Failed: DID NOT RAISE ", - "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[lt-'US/Eastern'-Index-other0]": "Failed: DID NOT RAISE ", - "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[lt-'US/Eastern'-Index-other2]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[lt-'US/Eastern'-Series-other0]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestDatetimeIndexComparisons::test_scalar_comparison_tzawareness[lt-'US/Eastern'-Series-other2]": "Failed: DID NOT RAISE ", "tests/arithmetic/test_datetime64.py::TestTimestampSeriesArithmetic::test_operators_datetimelike_with_timezones": "AssertionError: Attributes of Series are different", @@ -1128,7 +1112,6 @@ def pytest_unconfigure(config): "tests/copy_view/test_methods.py::test_transpose": "AssertionError: assert False", "tests/copy_view/test_methods.py::test_truncate[kwargs0]": "TODO: Add a reason for failure", "tests/copy_view/test_methods.py::test_truncate[kwargs1]": "TODO: Add a reason for failure", - "tests/copy_view/test_methods.py::test_tz_convert_localize[tz_convert-Europe/Berlin]": "assert False", "tests/copy_view/test_methods.py::test_tz_convert_localize[tz_localize-None]": "assert False", "tests/copy_view/test_methods.py::test_update_chained_assignment": "TODO: Add a reason for failure", "tests/copy_view/test_methods.py::test_update_frame": "TODO: Add a reason for failure", @@ -2475,7 +2458,6 @@ def pytest_unconfigure(config): "tests/frame/methods/test_to_numpy.py::TestToNumpy::test_to_numpy_copy": "TODO: Add a reason for failure", "tests/frame/methods/test_to_numpy.py::TestToNumpy::test_to_numpy_mixed_dtype_to_str": "TODO: Add a reason for failure", "tests/frame/methods/test_to_records.py::TestDataFrameToRecords::test_to_records_dt64tz_column": "TODO: Add a reason for failure", - "tests/frame/methods/test_to_timestamp.py::TestToTimestamp::test_to_timestamp_columns": "AssertionError: Index are different", "tests/frame/methods/test_transpose.py::TestTranspose::test_transpose_get_view_dt64tzget_view": "assert 3 == 1", "tests/frame/methods/test_truncate.py::TestDataFrameTruncate::test_truncate_multiindex[DataFrame]": "TODO: Add a reason for failure", "tests/frame/methods/test_value_counts.py::test_data_frame_value_counts_dropna_false[NAType]": "AssertionError: Series.index level [0] are different", @@ -3048,7 +3030,6 @@ def pytest_unconfigure(config): "tests/frame/test_ufunc.py::test_binary_input_aligns_index[int64]": "TODO: Add a reason for failure", "tests/frame/test_unary.py::TestDataFrameUnaryOperators::test_pos_object_raises": "Failed: DID NOT RAISE ", "tests/frame/test_unary.py::TestDataFrameUnaryOperators::test_pos_raises": "Failed: DID NOT RAISE ", - "tests/generic/test_finalize.py::test_finalize_called[to_period]": "ValueError: You must pass a freq argument as current index has none.", "tests/generic/test_frame.py::TestDataFrame::test_metadata_propagation_indiv": "TODO: Add a reason for failure", "tests/generic/test_generic.py::TestGeneric::test_stdlib_copy_shallow_copies[DataFrame]": "assert False", "tests/generic/test_generic.py::TestGeneric::test_stdlib_copy_shallow_copies[Series]": "assert False", @@ -3446,8 +3427,6 @@ def pytest_unconfigure(config): "tests/groupby/test_counting.py::test_count": "AssertionError: DataFrame are different", "tests/groupby/test_counting.py::test_count_arrow_string_array[string=string[pyarrow]]": "TODO: Add a reason for failure", "tests/groupby/test_counting.py::test_count_arrow_string_array[string=string[python]]": "TODO: Add a reason for failure", - "tests/groupby/test_cumulative.py::test_cummax_datetime": "AssertionError: Attributes of Series are different", - "tests/groupby/test_cumulative.py::test_cummin_datetime": "AssertionError: Attributes of Series are different", "tests/groupby/test_cumulative.py::test_cython_api2[False]": "AssertionError: DataFrame are different", "tests/groupby/test_cumulative.py::test_groupby_cumprod_nan_influences_other_columns": "TODO: Add a reason for failure", "tests/groupby/test_cumulative.py::test_numpy_compat[cumprod]": "TODO: Add a reason for failure", @@ -3456,7 +3435,6 @@ def pytest_unconfigure(config): "tests/groupby/test_groupby.py::test_dont_clobber_name_column": "AssertionError: DataFrame are different", "tests/groupby/test_groupby.py::test_group_name_available_in_inference_pass": "TODO: Add a reason for failure", "tests/groupby/test_groupby.py::test_group_on_two_row_multiindex_returns_one_tuple_key": "TODO: Add a reason for failure", - "tests/groupby/test_groupby.py::test_groupby_agg_ohlc_non_first": "AssertionError: (, None)", "tests/groupby/test_groupby.py::test_groupby_all_nan_groups_drop": "TODO: Add a reason for failure", "tests/groupby/test_groupby.py::test_groupby_column_index_name_lost[shift]": "TODO: Add a reason for failure", "tests/groupby/test_groupby.py::test_groupby_column_index_name_lost[sum]": "TODO: Add a reason for failure", @@ -4585,7 +4563,6 @@ def pytest_unconfigure(config): "tests/indexes/test_datetimelike.py::TestDatetimeLike::test_argsort_matches_array[simple_index1]": "TODO: Add a reason for failure", "tests/indexes/test_datetimelike.py::TestDatetimeLike::test_argsort_matches_array[simple_index2]": "TODO: Add a reason for failure", "tests/indexes/test_index_new.py::TestIndexConstructorInference::test_constructor_datetimes_mixed_tzs": "AssertionError: Index are different", - "tests/indexes/test_index_new.py::TestIndexConstructorInference::test_infer_nat[nan1]": "AssertionError: assert (dtype(', None)", - "tests/resample/test_base.py::test_resample_empty_sum_string[string=str[pyarrow]-1]": "AssertionError: (<20 * Seconds>, None)", - "tests/resample/test_base.py::test_resample_empty_sum_string[string=str[python]-0]": "AssertionError: (<20 * Seconds>, None)", - "tests/resample/test_base.py::test_resample_empty_sum_string[string=str[python]-1]": "AssertionError: (<20 * Seconds>, None)", - "tests/resample/test_base.py::test_resample_empty_sum_string[string=string[pyarrow]-0]": "AssertionError: (<20 * Seconds>, None)", - "tests/resample/test_base.py::test_resample_empty_sum_string[string=string[pyarrow]-1]": "AssertionError: (<20 * Seconds>, None)", - "tests/resample/test_base.py::test_resample_empty_sum_string[string=string[python]-0]": "AssertionError: (<20 * Seconds>, None)", - "tests/resample/test_base.py::test_resample_empty_sum_string[string=string[python]-1]": "AssertionError: (<20 * Seconds>, None)", - "tests/resample/test_base.py::test_resample_size_empty_dataframe[D-index0]": "AssertionError: Series.index are different", - "tests/resample/test_base.py::test_resample_size_empty_dataframe[h-index0]": "AssertionError: Series.index are different", "tests/resample/test_datetime_index.py::test_resample_anchored_intraday2[ms]": "TODO: Add a reason for failure", "tests/resample/test_datetime_index.py::test_resample_anchored_intraday2[ns]": "TODO: Add a reason for failure", "tests/resample/test_datetime_index.py::test_resample_anchored_intraday2[s]": "TODO: Add a reason for failure", @@ -5874,37 +5795,15 @@ def pytest_unconfigure(config): "tests/resample/test_datetime_index.py::test_resample_origin_epoch_with_tz_day_vs_24h[ns]": "TODO: Add a reason for failure", "tests/resample/test_datetime_index.py::test_resample_origin_epoch_with_tz_day_vs_24h[s]": "TODO: Add a reason for failure", "tests/resample/test_datetime_index.py::test_resample_origin_epoch_with_tz_day_vs_24h[us]": "TODO: Add a reason for failure", - "tests/resample/test_datetime_index.py::test_resample_rounding[ms]": "TODO: Add a reason for failure", - "tests/resample/test_datetime_index.py::test_resample_rounding[ns]": "TODO: Add a reason for failure", - "tests/resample/test_datetime_index.py::test_resample_rounding[us]": "TODO: Add a reason for failure", - "tests/resample/test_datetime_index.py::test_resample_size[ms]": "TODO: Add a reason for failure", - "tests/resample/test_datetime_index.py::test_resample_size[ns]": "TODO: Add a reason for failure", - "tests/resample/test_datetime_index.py::test_resample_size[s]": "TODO: Add a reason for failure", - "tests/resample/test_datetime_index.py::test_resample_size[us]": "TODO: Add a reason for failure", - "tests/resample/test_datetime_index.py::test_resample_upsampling_picked_but_not_correct[ms]": "TODO: Add a reason for failure", - "tests/resample/test_datetime_index.py::test_resample_upsampling_picked_but_not_correct[ns]": "TODO: Add a reason for failure", - "tests/resample/test_datetime_index.py::test_resample_upsampling_picked_but_not_correct[us]": "TODO: Add a reason for failure", - "tests/resample/test_period_index.py::TestPeriodIndex::test_evenly_divisible_with_no_extra_bins": "AssertionError: DataFrame.index are different", "tests/resample/test_period_index.py::TestPeriodIndex::test_evenly_divisible_with_no_extra_bins2": "TODO: Add a reason for failure", - "tests/resample/test_period_index.py::TestPeriodIndex::test_resample_tz_localized2": "AssertionError: (, None)", "tests/resample/test_period_index.py::TestPeriodIndex::test_resample_tz_localized[ms]": "TODO: Add a reason for failure", "tests/resample/test_period_index.py::TestPeriodIndex::test_resample_tz_localized[ns]": "TODO: Add a reason for failure", "tests/resample/test_period_index.py::TestPeriodIndex::test_resample_tz_localized[s]": "TODO: Add a reason for failure", "tests/resample/test_period_index.py::TestPeriodIndex::test_resample_tz_localized[us]": "TODO: Add a reason for failure", "tests/resample/test_period_index.py::TestPeriodIndex::test_with_local_timezone[tz0]": "TODO: Add a reason for failure", "tests/resample/test_period_index.py::TestPeriodIndex::test_with_local_timezone[tz1]": "TODO: Add a reason for failure", - "tests/resample/test_resample_api.py::test_agg_both_mean_std_named_result[df_col_resample-agg0]": "TODO: Add a reason for failure", - "tests/resample/test_resample_api.py::test_agg_both_mean_std_named_result[df_col_resample-agg1]": "TODO: Add a reason for failure", - "tests/resample/test_resample_api.py::test_agg_both_mean_std_named_result[df_col_resample-agg2]": "TODO: Add a reason for failure", - "tests/resample/test_resample_api.py::test_agg_both_mean_std_named_result[df_grouper_resample-agg0]": "TODO: Add a reason for failure", "tests/resample/test_resample_api.py::test_agg_both_mean_std_named_result[df_grouper_resample-agg1]": "TODO: Add a reason for failure", "tests/resample/test_resample_api.py::test_agg_both_mean_std_named_result[df_grouper_resample-agg2]": "TODO: Add a reason for failure", - "tests/resample/test_resample_api.py::test_agg_both_mean_std_named_result[df_mult_resample-agg0]": "TODO: Add a reason for failure", - "tests/resample/test_resample_api.py::test_agg_both_mean_std_named_result[df_mult_resample-agg1]": "TODO: Add a reason for failure", - "tests/resample/test_resample_api.py::test_agg_both_mean_std_named_result[df_mult_resample-agg2]": "TODO: Add a reason for failure", - "tests/resample/test_resample_api.py::test_agg_both_mean_std_named_result[df_resample-agg0]": "TODO: Add a reason for failure", - "tests/resample/test_resample_api.py::test_agg_both_mean_std_named_result[df_resample-agg1]": "TODO: Add a reason for failure", - "tests/resample/test_resample_api.py::test_agg_both_mean_std_named_result[df_resample-agg2]": "TODO: Add a reason for failure", "tests/resample/test_resample_api.py::test_agg_both_mean_sum[df_col_resample-agg1]": "TODO: Add a reason for failure", "tests/resample/test_resample_api.py::test_agg_both_mean_sum[df_grouper_resample-agg1]": "TODO: Add a reason for failure", "tests/resample/test_resample_api.py::test_agg_both_mean_sum[df_mult_resample-agg1]": "TODO: Add a reason for failure", @@ -5916,40 +5815,15 @@ def pytest_unconfigure(config): "tests/resample/test_resample_api.py::test_agg_specificationerror_series[df_grouper_resample-agg0]": "TODO: Add a reason for failure", "tests/resample/test_resample_api.py::test_agg_specificationerror_series[df_mult_resample-agg0]": "TODO: Add a reason for failure", "tests/resample/test_resample_api.py::test_agg_specificationerror_series[df_resample-agg0]": "TODO: Add a reason for failure", - "tests/resample/test_resample_api.py::test_agg_with_lambda[df_col_resample-agg0]": "TODO: Add a reason for failure", - "tests/resample/test_resample_api.py::test_agg_with_lambda[df_col_resample-agg1]": "TODO: Add a reason for failure", - "tests/resample/test_resample_api.py::test_agg_with_lambda[df_col_resample-agg2]": "TODO: Add a reason for failure", - "tests/resample/test_resample_api.py::test_agg_with_lambda[df_grouper_resample-agg0]": "TODO: Add a reason for failure", "tests/resample/test_resample_api.py::test_agg_with_lambda[df_grouper_resample-agg1]": "TODO: Add a reason for failure", "tests/resample/test_resample_api.py::test_agg_with_lambda[df_grouper_resample-agg2]": "TODO: Add a reason for failure", - "tests/resample/test_resample_api.py::test_agg_with_lambda[df_mult_resample-agg0]": "TODO: Add a reason for failure", - "tests/resample/test_resample_api.py::test_agg_with_lambda[df_mult_resample-agg1]": "TODO: Add a reason for failure", - "tests/resample/test_resample_api.py::test_agg_with_lambda[df_mult_resample-agg2]": "TODO: Add a reason for failure", - "tests/resample/test_resample_api.py::test_agg_with_lambda[df_resample-agg0]": "TODO: Add a reason for failure", - "tests/resample/test_resample_api.py::test_agg_with_lambda[df_resample-agg1]": "TODO: Add a reason for failure", - "tests/resample/test_resample_api.py::test_agg_with_lambda[df_resample-agg2]": "TODO: Add a reason for failure", "tests/resample/test_resample_api.py::test_groupby_resample_api": "TODO: Add a reason for failure", "tests/resample/test_resample_api.py::test_transform_frame[None]": "TODO: Add a reason for failure", "tests/resample/test_resample_api.py::test_transform_frame[date]": "TODO: Add a reason for failure", - "tests/resample/test_resampler_grouper.py::test_apply_columns_multilevel": "AssertionError: (, None)", "tests/resample/test_resampler_grouper.py::test_deferred_with_groupby": "TODO: Add a reason for failure", "tests/resample/test_resampler_grouper.py::test_getitem": "AssertionError: Series.index are different", - "tests/resample/test_time_grouper.py::test_aggregate_normal[count]": "TODO: Add a reason for failure", - "tests/resample/test_time_grouper.py::test_aggregate_normal[first]": "TODO: Add a reason for failure", - "tests/resample/test_time_grouper.py::test_aggregate_normal[last]": "TODO: Add a reason for failure", - "tests/resample/test_time_grouper.py::test_aggregate_normal[max]": "TODO: Add a reason for failure", - "tests/resample/test_time_grouper.py::test_aggregate_normal[mean]": "TODO: Add a reason for failure", - "tests/resample/test_time_grouper.py::test_aggregate_normal[median]": "TODO: Add a reason for failure", - "tests/resample/test_time_grouper.py::test_aggregate_normal[min]": "TODO: Add a reason for failure", - "tests/resample/test_time_grouper.py::test_aggregate_normal[nunique]": "TODO: Add a reason for failure", "tests/resample/test_time_grouper.py::test_aggregate_normal[ohlc]": "TODO: Add a reason for failure", - "tests/resample/test_time_grouper.py::test_aggregate_normal[prod]": "TODO: Add a reason for failure", - "tests/resample/test_time_grouper.py::test_aggregate_normal[quantile]": "TODO: Add a reason for failure", "tests/resample/test_time_grouper.py::test_aggregate_normal[sem]": "TODO: Add a reason for failure", - "tests/resample/test_time_grouper.py::test_aggregate_normal[size]": "TODO: Add a reason for failure", - "tests/resample/test_time_grouper.py::test_aggregate_normal[std]": "TODO: Add a reason for failure", - "tests/resample/test_time_grouper.py::test_aggregate_normal[sum]": "TODO: Add a reason for failure", - "tests/resample/test_time_grouper.py::test_aggregate_normal[var]": "TODO: Add a reason for failure", "tests/resample/test_time_grouper.py::test_apply_iteration": "TODO: Add a reason for failure", "tests/resample/test_time_grouper.py::test_groupby_resample_interpolate_with_apply_syntax": "AssertionError: DataFrame.index level [1] are different", "tests/resample/test_time_grouper.py::test_groupby_resample_interpolate_with_apply_syntax_off_grid": "AssertionError: DataFrame.index level [1] are different", @@ -6236,7 +6110,6 @@ def pytest_unconfigure(config): "tests/reshape/test_cut.py::test_cut_pass_series_name_to_factor": "TODO: Add a reason for failure", "tests/reshape/test_cut.py::test_cut_unordered_labels[data0-bins0-labels0-expected_codes0-expected_labels0]": "TODO: Add a reason for failure", "tests/reshape/test_cut.py::test_cut_unordered_labels[data1-bins1-labels1-expected_codes1-expected_labels1]": "TODO: Add a reason for failure", - "tests/reshape/test_cut.py::test_datetime_bin[1]": "AssertionError: Series category.categories are different", "tests/reshape/test_cut.py::test_datetime_nan_error": "TODO: Add a reason for failure", "tests/reshape/test_cut.py::test_label_precision": "TODO: Add a reason for failure", "tests/reshape/test_cut.py::test_no_right": "TODO: Add a reason for failure", @@ -7286,7 +7159,6 @@ def pytest_unconfigure(config): "tests/strings/test_split_partition.py::test_split_nan_expand[string=object]": "AssertionError: DataFrame.iloc[:, 0] (column name='0') are different", "tests/strings/test_strings.py::test_index_str_accessor_multiindex_raises": "TODO: Add a reason for failure", "tests/strings/test_strings.py::test_split_join_roundtrip[string=string[pyarrow]]": "AssertionError: Series are different", - "tests/strings/test_strings.py::test_split_join_roundtrip[string=string[python]]": "AssertionError: Series are different", "tests/strings/test_strings.py::test_string_slice_out_of_bounds[string=object]": "AssertionError: Series are different", "tests/test_algos.py::TestFactorize::test_basic": "TODO: Add a reason for failure", "tests/test_algos.py::TestFactorize::test_datetime64_factorize[False]": "TODO: Add a reason for failure", @@ -7449,10 +7321,6 @@ def pytest_unconfigure(config): "tests/tools/test_to_datetime.py::TestOrigin::test_julian": "AssertionError: Attributes of Series are different", "tests/tools/test_to_datetime.py::TestOrigin::test_to_datetime_out_of_bounds_with_format_arg[%Y-%d-%m %H:%M:%S-None]": "TODO: Add a reason for failure", "tests/tools/test_to_datetime.py::TestOrigin::test_to_datetime_out_of_bounds_with_format_arg[%Y-%m-%d %H:%M:%S-None]": "TODO: Add a reason for failure", - "tests/tools/test_to_datetime.py::TestOrigin::test_to_datetime_out_of_bounds_with_format_arg[None-UserWarning]": "AssertionError: assert 1833 == 2417", - "tests/tools/test_to_datetime.py::TestTimeConversionFormats::test_to_datetime_format_YYYYMMDD_with_none[input_s4]": "AssertionError: Attributes of Series are different", - "tests/tools/test_to_datetime.py::TestTimeConversionFormats::test_to_datetime_format_YYYYMMDD_with_none[input_s5]": "AssertionError: Attributes of Series are different", - "tests/tools/test_to_datetime.py::TestTimeConversionFormats::test_to_datetime_format_YYYYMMDD_with_none[input_s6]": "AssertionError: Attributes of Series are different", "tests/tools/test_to_datetime.py::TestTimeConversionFormats::test_to_datetime_format_weeks[False-2013020-%Y%U%w-2013-01-13]": "AssertionError: assert Timestamp('2013-01-19 00:00:00') == Timestamp('2013-01-13 00:00:00')", "tests/tools/test_to_datetime.py::TestTimeConversionFormats::test_to_datetime_format_weeks[True-2013020-%Y%U%w-2013-01-13]": "AssertionError: assert Timestamp('2013-01-19 00:00:00') == Timestamp('2013-01-13 00:00:00')", "tests/tools/test_to_datetime.py::TestToDatetime::test_datetime_bool[False-False]": "TODO: Add a reason for failure", @@ -7464,7 +7332,6 @@ def pytest_unconfigure(config): "tests/tools/test_to_datetime.py::TestToDatetime::test_datetime_invalid_scalar[None-00:01:99]": "TODO: Add a reason for failure", "tests/tools/test_to_datetime.py::TestToDatetime::test_datetime_invalid_scalar[None-a]": "TODO: Add a reason for failure", "tests/tools/test_to_datetime.py::TestToDatetime::test_datetime_outofbounds_scalar[%H:%M:%S-3000/12/11 00:00:00]": "TODO: Add a reason for failure", - "tests/tools/test_to_datetime.py::TestToDatetime::test_datetime_outofbounds_scalar[None-3000/12/11 00:00:00]": "TODO: Add a reason for failure", "tests/tools/test_to_datetime.py::TestToDatetime::test_error_iso_week_year[coerce-Day of the year directive '%j' is not compatible with ISO year directive '%G'. Use '%Y' instead.-1999 50-%G %j]": "TODO: Add a reason for failure", "tests/tools/test_to_datetime.py::TestToDatetime::test_error_iso_week_year[coerce-Day of the year directive '%j' is not compatible with ISO year directive '%G'. Use '%Y' instead.-1999 51 6 256-%G %V %u %j]": "TODO: Add a reason for failure", "tests/tools/test_to_datetime.py::TestToDatetime::test_error_iso_week_year[coerce-ISO week directive '%V' is incompatible with the year directive '%Y'. Use the ISO year '%G' instead.-1999 50-%Y %V]": "TODO: Add a reason for failure", @@ -7487,10 +7354,6 @@ def pytest_unconfigure(config): "tests/tools/test_to_datetime.py::TestToDatetime::test_mixed_offsets_with_native_datetime_utc_false_raises": "assert False", "tests/tools/test_to_datetime.py::TestToDatetime::test_to_datetime_arrow[index-None-False]": "AssertionError: assert DatetimeIndex([1965-04-03 00:00:00, 1965-04-17 00:00:00, 1965-05-01 00:00:00,\n 1965-05-...", "tests/tools/test_to_datetime.py::TestToDatetime::test_to_datetime_arrow[index-US/Central-False]": "AssertionError: assert Index([1965-04-03 00:00:00-06:00, 1965-04-17 00:00:00-06:00,\n 1965-05-01 00:00:00-05:00...", - "tests/tools/test_to_datetime.py::TestToDatetime::test_to_datetime_converts_null_like_to_nat[False-input2]": "AssertionError: Attributes of Series are different", - "tests/tools/test_to_datetime.py::TestToDatetime::test_to_datetime_converts_null_like_to_nat[False-input3]": "AssertionError: Attributes of Series are different", - "tests/tools/test_to_datetime.py::TestToDatetime::test_to_datetime_converts_null_like_to_nat[True-input2]": "AssertionError: Attributes of Series are different", - "tests/tools/test_to_datetime.py::TestToDatetime::test_to_datetime_converts_null_like_to_nat[True-input3]": "AssertionError: Attributes of Series are different", "tests/tools/test_to_datetime.py::TestToDatetime::test_to_datetime_dt64d_out_of_bounds[False]": "TODO: Add a reason for failure", "tests/tools/test_to_datetime.py::TestToDatetime::test_to_datetime_dt64d_out_of_bounds[True]": "TODO: Add a reason for failure", "tests/tools/test_to_datetime.py::TestToDatetime::test_to_datetime_dt64s_out_of_ns_bounds[False-coerce-dt0]": "TODO: Add a reason for failure", @@ -7504,24 +7367,10 @@ def pytest_unconfigure(config): "tests/tools/test_to_datetime.py::TestToDatetime::test_to_datetime_dtarr[None]": "TODO: Add a reason for failure", "tests/tools/test_to_datetime.py::TestToDatetime::test_to_datetime_dtarr[US/Central]": "TODO: Add a reason for failure", "tests/tools/test_to_datetime.py::TestToDatetime::test_to_datetime_overflow": "TODO: Add a reason for failure", - "tests/tools/test_to_datetime.py::TestToDatetime::test_to_datetime_with_format_out_of_bounds[00010101]": "TODO: Add a reason for failure", - "tests/tools/test_to_datetime.py::TestToDatetime::test_to_datetime_with_format_out_of_bounds[13000101]": "TODO: Add a reason for failure", - "tests/tools/test_to_datetime.py::TestToDatetime::test_to_datetime_with_format_out_of_bounds[30000101]": "TODO: Add a reason for failure", - "tests/tools/test_to_datetime.py::TestToDatetime::test_to_datetime_with_format_out_of_bounds[99990101]": "TODO: Add a reason for failure", "tests/tools/test_to_datetime.py::TestToDatetime::test_week_without_day_and_calendar_year[2017-20-%Y-%W]": "TODO: Add a reason for failure", "tests/tools/test_to_datetime.py::TestToDatetime::test_week_without_day_and_calendar_year[2017-21-%Y-%U]": "TODO: Add a reason for failure", - "tests/tools/test_to_datetime.py::TestToDatetimeDataFrame::test_dataframe_field_aliases[False]": "AssertionError: Attributes of Series are different", - "tests/tools/test_to_datetime.py::TestToDatetimeDataFrame::test_dataframe_field_aliases[True]": "AssertionError: Attributes of Series are different", - "tests/tools/test_to_datetime.py::TestToDatetimeDataFrame::test_dataframe_field_aliases_column_subset[False-unit1]": "AssertionError: Attributes of Series are different", - "tests/tools/test_to_datetime.py::TestToDatetimeDataFrame::test_dataframe_field_aliases_column_subset[True-unit1]": "AssertionError: Attributes of Series are different", - "tests/tools/test_to_datetime.py::TestToDatetimeDataFrame::test_dataframe_int16[False]": "AssertionError: Attributes of Series are different", - "tests/tools/test_to_datetime.py::TestToDatetimeDataFrame::test_dataframe_int16[True]": "AssertionError: Attributes of Series are different", - "tests/tools/test_to_datetime.py::TestToDatetimeDataFrame::test_dataframe_mixed[False]": "AssertionError: Attributes of Series are different", - "tests/tools/test_to_datetime.py::TestToDatetimeDataFrame::test_dataframe_mixed[True]": "AssertionError: Attributes of Series are different", "tests/tools/test_to_datetime.py::TestToDatetimeDataFrame::test_dataframe_str_dtype[False]": "TODO: Add a reason for failure", "tests/tools/test_to_datetime.py::TestToDatetimeDataFrame::test_dataframe_str_dtype[True]": "TODO: Add a reason for failure", - "tests/tools/test_to_datetime.py::TestToDatetimeMisc::test_string_na_nat_conversion_with_name[False]": "AssertionError: Attributes of Series are different", - "tests/tools/test_to_datetime.py::TestToDatetimeMisc::test_string_na_nat_conversion_with_name[True]": "AssertionError: Attributes of Series are different", "tests/tools/test_to_datetime.py::TestToDatetimeMisc::test_to_datetime_barely_out_of_bounds": "TODO: Add a reason for failure", "tests/tools/test_to_datetime.py::TestToDatetimeMisc::test_to_datetime_iso8601_exact_fails[2012-01-01 10-%Y-%m-%d]": "TODO: Add a reason for failure", "tests/tools/test_to_datetime.py::TestToDatetimeMisc::test_to_datetime_iso8601_exact_fails[2012-01-01 10:00-%Y-%m-%d %H]": "TODO: Add a reason for failure", @@ -7741,31 +7590,12 @@ def pytest_unconfigure(config): "tests/window/test_rolling.py::test_closed_fixed_binary_col[False-5]": "TODO: Add a reason for failure", "tests/window/test_rolling.py::test_closed_fixed_binary_col[True-10]": "TODO: Add a reason for failure", "tests/window/test_rolling.py::test_closed_fixed_binary_col[True-5]": "TODO: Add a reason for failure", - "tests/window/test_rolling.py::test_iter_rolling_datetime[expected0-expected_index0-1D]": "AssertionError: (, None)", - "tests/window/test_rolling.py::test_iter_rolling_datetime[expected1-expected_index1-2D]": "AssertionError: (, None)", - "tests/window/test_rolling.py::test_iter_rolling_datetime[expected2-expected_index2-3D]": "AssertionError: (, None)", "tests/window/test_rolling.py::test_rolling_non_monotonic[mean-expected1]": "TODO: Add a reason for failure", "tests/window/test_rolling.py::test_rolling_non_monotonic[sum-expected2]": "TODO: Add a reason for failure", "tests/window/test_rolling.py::test_rolling_non_monotonic[var-expected0]": "TODO: Add a reason for failure", "tests/window/test_rolling.py::test_rolling_var_same_value_count_logic[values0-3-1-expected0]": "TODO: Add a reason for failure", "tests/window/test_rolling.py::test_variable_window_nonunique[DataFrame-right-expected2]": "TODO: Add a reason for failure", "tests/window/test_rolling.py::test_variable_window_nonunique[Series-right-expected2]": "TODO: Add a reason for failure", - "tests/window/test_rolling_functions.py::test_rolling_max_gh6297[10]": "TODO: Add a reason for failure", - "tests/window/test_rolling_functions.py::test_rolling_max_gh6297[1]": "TODO: Add a reason for failure", - "tests/window/test_rolling_functions.py::test_rolling_max_gh6297[2]": "TODO: Add a reason for failure", - "tests/window/test_rolling_functions.py::test_rolling_max_gh6297[5]": "TODO: Add a reason for failure", - "tests/window/test_rolling_functions.py::test_rolling_max_gh6297[None]": "TODO: Add a reason for failure", - "tests/window/test_rolling_functions.py::test_rolling_max_resample[10]": "TODO: Add a reason for failure", - "tests/window/test_rolling_functions.py::test_rolling_max_resample[1]": "TODO: Add a reason for failure", - "tests/window/test_rolling_functions.py::test_rolling_max_resample[2]": "TODO: Add a reason for failure", - "tests/window/test_rolling_functions.py::test_rolling_max_resample[5]": "TODO: Add a reason for failure", - "tests/window/test_rolling_functions.py::test_rolling_max_resample[None]": "TODO: Add a reason for failure", - "tests/window/test_rolling_functions.py::test_rolling_median_resample": "TODO: Add a reason for failure", - "tests/window/test_rolling_functions.py::test_rolling_min_resample[10]": "TODO: Add a reason for failure", - "tests/window/test_rolling_functions.py::test_rolling_min_resample[1]": "TODO: Add a reason for failure", - "tests/window/test_rolling_functions.py::test_rolling_min_resample[2]": "TODO: Add a reason for failure", - "tests/window/test_rolling_functions.py::test_rolling_min_resample[5]": "TODO: Add a reason for failure", - "tests/window/test_rolling_functions.py::test_rolling_min_resample[None]": "TODO: Add a reason for failure", "tests/window/test_timeseries_window.py::TestRollingTS::test_invalid_minp[1.0]": "TODO: Add a reason for failure", "tests/window/test_timeseries_window.py::TestRollingTS::test_invalid_minp[foo]": "TODO: Add a reason for failure", "tests/window/test_timeseries_window.py::TestRollingTS::test_invalid_minp[minp2]": "TODO: Add a reason for failure", diff --git a/python/cudf/cudf/tests/general_functions/test_to_datetime.py b/python/cudf/cudf/tests/general_functions/test_to_datetime.py index 72a159f148c..dc47cf1ea86 100644 --- a/python/cudf/cudf/tests/general_functions/test_to_datetime.py +++ b/python/cudf/cudf/tests/general_functions/test_to_datetime.py @@ -308,3 +308,102 @@ def test_to_datetime_errors_non_scalar_not_implemented(errors): def test_to_datetime_errors_ignore_deprecated(): with pytest.warns(FutureWarning): cudf.to_datetime("2001-01-01 00:04:45", errors="ignore") + + +@pytest.mark.parametrize( + "data", + [ + # all-None object inputs land on [s] in pandas 3 — see + # pandas-dev/pandas#55901 (NPY_FR_GENERIC fallback to "s"). + pd.Series([None, None, None]), + pd.Series([None] * 5), + pd.Series([], dtype="object"), + ], +) +def test_to_datetime_all_null_object_returns_seconds(data): + expected = pd.to_datetime(data) + actual = cudf.to_datetime(cudf.from_pandas(data)) + assert actual.dtype == expected.dtype + assert_eq(actual, expected) + + +@pytest.mark.parametrize( + "scalar", + ["1/1/2000", "2020-01-01", "2020-01-01 12:34:56"], +) +def test_to_datetime_scalar_string_returns_us(scalar): + # Scalar string parsing should land on [us] (pandas 3 default). + expected = pd.to_datetime(scalar) + actual = cudf.to_datetime(scalar) + assert actual.unit == expected.unit + assert actual == expected + + +@pytest.mark.parametrize( + "values", + [ + [19801222, 20010112, None], + [19801222, 20010112, np.nan], + [19801222, 20010112], + ], +) +def test_to_datetime_int_with_format_us(values): + # Float-with-format path (triggered when a None/nan widens int -> float) + # must land on [us] regardless of whether the format contains "%f". + expected = pd.to_datetime(values, format="%Y%m%d") + actual = cudf.to_datetime(values, format="%Y%m%d") + assert actual.dtype == expected.dtype + assert_eq(actual, expected, check_exact=False) + + +@pytest.mark.parametrize( + "df", + [ + pd.DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]}), + pd.DataFrame( + { + "year": [2015, 2016], + "month": [2, 3], + "day": [4, 5], + } + ).astype("int16"), + pd.DataFrame( + { + "year": [2015, 2016], + "month": [2, 3], + "day": [4, 5], + "hour": [6, 7], + "minute": [58, 59], + "second": [10, 11], + } + ), + ], +) +def test_to_datetime_dataframe_default_us(df): + # DataFrame -> datetime defaults to [us] in pandas 3 (was [s] in cuDF). + expected = pd.to_datetime(df) + actual = cudf.to_datetime(cudf.from_pandas(df)) + assert actual.dtype == expected.dtype + assert_eq(actual, expected) + + +def test_to_datetime_dataframe_with_ns_field_widens_to_ns(): + # When a ns field is explicitly present, the result must widen to [ns] + # (and the integer factor arithmetic must not lose the trailing ns). + df = pd.DataFrame( + { + "year": [2015, 2016], + "month": [2, 3], + "day": [4, 5], + "hour": [6, 7], + "minute": [58, 59], + "second": [10, 11], + "ms": [1, 1], + "us": [2, 2], + "ns": [3, 3], + } + ) + expected = pd.to_datetime(df) + actual = cudf.to_datetime(cudf.from_pandas(df)) + assert actual.dtype == expected.dtype + assert_eq(actual, expected) diff --git a/python/cudf/cudf/tests/groupby/test_resample.py b/python/cudf/cudf/tests/groupby/test_resample.py index bdd82dc5baa..2c3b9f5aa60 100644 --- a/python/cudf/cudf/tests/groupby/test_resample.py +++ b/python/cudf/cudf/tests/groupby/test_resample.py @@ -156,22 +156,22 @@ def test_dataframe_resample_level(): @pytest.mark.parametrize( - "in_freq, sampling_freq, out_freq", + "in_freq, sampling_freq", [ - ("1ns", "1us", "us"), - ("1us", "10us", "us"), - ("ms", "100us", "us"), - ("ms", "1s", "s"), - ("s", "1min", "s"), - ("1min", "30s", "s"), - ("1D", "10D", "s"), - ("10D", "1D", "s"), + ("1ns", "1us"), + ("1us", "10us"), + ("ms", "100us"), + ("ms", "1s"), + ("s", "1min"), + ("1min", "30s"), + ("1D", "10D"), + ("10D", "1D"), ], ) -def test_resampling_frequency_conversion(in_freq, sampling_freq, out_freq): +def test_resampling_frequency_conversion(in_freq, sampling_freq): rng = np.random.default_rng(seed=0) - # test that we cast to the appropriate frequency - # when resampling: + # Pandas resample preserves the input column's unit; verify cuDF + # matches that behavior across sampling frequencies. pdf = pd.DataFrame( { "x": rng.standard_normal(size=100), @@ -183,7 +183,7 @@ def test_resampling_frequency_conversion(in_freq, sampling_freq, out_freq): got = gdf.resample(sampling_freq, on="y").mean() assert_resample_results_equal(expect, got) - assert got.index.dtype == np.dtype(f"datetime64[{out_freq}]") + assert got.index.dtype == pdf["y"].dtype def test_resampling_downsampling_ms(): @@ -198,3 +198,52 @@ def test_resampling_downsampling_ms(): result = gdf.resample("10ms", on="time").mean() result.index = result.index.astype("datetime64[ns]") assert_eq(result, expected, check_freq=False) + + +@pytest.mark.parametrize("input_unit", ["s", "ms", "us", "ns"]) +@pytest.mark.parametrize("freq", ["D", "h", "30min"]) +@pytest.mark.parametrize( + "agg", ["mean", "sum", "min", "max", "first", "last", "count", "var"] +) +def test_resample_empty_preserves_input_unit_and_freq(input_unit, freq, agg): + # Resample on an empty datetime index must preserve the input column's + # unit (pandas behavior; cuDF previously collapsed to [s] for D/h offsets) + # and must keep the offset attached to the result index. + idx = pd.DatetimeIndex([], dtype=f"datetime64[{input_unit}]", name="t") + pser = pd.Series([], index=idx, dtype=float) + gser = cudf.from_pandas(pser) + + expected = getattr(pser.resample(freq), agg)() + actual = getattr(gser.resample(freq), agg)() + + assert actual.index.dtype == expected.index.dtype + assert_eq(actual, expected, check_dtype=False, check_index_type=False) + + +def test_resample_size_matches_pandas_with_empty_buckets(): + # GroupBy.size bypasses _Resampler.agg, so the freq has to be re-attached + # by the size override; empty buckets must come back as 0 (not NaN), and + # the result must be sorted by bin label like pandas. + idx = pd.date_range("2020-01-01", periods=4, freq="1h") + pser = pd.Series(range(4), index=idx) + gser = cudf.from_pandas(pser) + + expected = pser.resample("30min").size() + actual = gser.resample("30min").size() + + assert_eq(actual, expected, check_dtype=False, check_index_type=False) + + # Empty case + idx_empty = pd.DatetimeIndex([], dtype="datetime64[us]", name="t") + pser_empty = pd.Series([], index=idx_empty, dtype=float) + gser_empty = cudf.from_pandas(pser_empty) + + expected_empty = pser_empty.resample("h").size() + actual_empty = gser_empty.resample("h").size() + + assert_eq( + actual_empty, + expected_empty, + check_dtype=False, + check_index_type=False, + ) diff --git a/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_to_pandas.py b/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_to_pandas.py index b2567237ec0..f5331769793 100644 --- a/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_to_pandas.py +++ b/python/cudf/cudf/tests/indexes/datetimeindex/methods/test_to_pandas.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import pandas as pd +import pytest import cudf from cudf.testing import assert_eq @@ -57,3 +58,29 @@ def test_to_pandas_externally_set_stale_freq_matches_pandas_inferred(): assert actual.freq == pd.tseries.frequencies.to_offset(expected_freq) assert_eq(actual.values, pidx.values) + + +@pytest.mark.parametrize("freq", ["D", "h", "30min", "2D", "ME"]) +def test_to_pandas_empty_with_freq_falls_back_to_cached(freq): + # Empty indexes have nothing to infer from, so to_pandas() must fall back + # to the cached freq mapped through DateOffset._maybe_as_fast_pandas_offset + # (matches pandas, which keeps the offset on empty resample/asfreq output). + pidx = pd.DatetimeIndex([], dtype="datetime64[us]", freq=freq, name="t") + gidx = cudf.from_pandas(pidx) + + actual = gidx.to_pandas() + assert actual.freq == pidx.freq + assert actual.dtype == pidx.dtype + assert_eq(actual, pidx) + + +@pytest.mark.parametrize("freq", ["D", "h", "30min"]) +def test_to_pandas_single_element_with_freq_falls_back_to_cached(freq): + # Single-element indexes can't infer freq (pandas inferred_freq is None), + # but the cached freq is still authoritative — preserve it on round-trip. + pidx = pd.DatetimeIndex(["2020-01-01"], freq=freq, name="t") + gidx = cudf.from_pandas(pidx) + + actual = gidx.to_pandas() + assert actual.freq == pidx.freq + assert_eq(actual, pidx) diff --git a/python/cudf/cudf/tests/indexes/datetimeindex/test_constructor.py b/python/cudf/cudf/tests/indexes/datetimeindex/test_constructor.py index 795455aeebe..1e91ffda8b8 100644 --- a/python/cudf/cudf/tests/indexes/datetimeindex/test_constructor.py +++ b/python/cudf/cudf/tests/indexes/datetimeindex/test_constructor.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 @@ -98,3 +98,44 @@ def test_from_pandas_datetimeindex_freq(): actual = cudf.Index(expected) assert_eq(expected, actual) assert actual.freq is not None + + +@pytest.mark.parametrize( + "data", + [ + [], + (), + [None], + [None, None], + pd.Series([], dtype="object"), + ], +) +def test_datetime_index_empty_object_default_dtype(data): + # Pandas's array_to_datetime falls back to "s" precision when no + # concrete (non-NaT) datetime is observed (pandas-dev/pandas#55901). + # cuDF should match so cudf.pandas-wrapped pd.DatetimeIndex([]) doesn't + # diverge from plain pandas. Inputs to this test deliberately carry no + # explicit dtype so cuDF's default-unit logic is what's exercised. + pd_data = data + gd_data = cudf.from_pandas(data) if isinstance(data, pd.Series) else data + expected = pd.DatetimeIndex(pd_data) + actual = cudf.DatetimeIndex(gd_data) + assert actual.dtype == expected.dtype + assert_eq(actual, expected) + + +@pytest.mark.parametrize( + "values", + [ + ["2020-01-01"], + ["2020-01-01", "2020-01-02"], + ["1970-01-01 00:00:00.000000"], + ], +) +def test_datetime_index_string_input_us_default(values): + # Non-empty string parsing should still resolve to [us] (pandas 3 default + # when no nanosecond-precision component is present). + expected = pd.DatetimeIndex(values) + actual = cudf.DatetimeIndex(values) + assert actual.dtype == expected.dtype + assert_eq(actual, expected) From 31732df7d02d13ea53555d5f2a275c85abcf2331 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Tue, 5 May 2026 16:57:45 -0500 Subject: [PATCH 11/36] Expose additional proxy attributes for IntervalArray, Styler, and _MethodProxy (#22374) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Three pandas-tests xfail entries surfaced `AttributeError` failures that were just missing entries in the proxy `additional_attributes` (or on `_MethodProxy` itself). ## Changes - **`IntervalArray` proxy** now exposes `_left` and `_right` (private), matching the existing `_data`/`_mask` plumbing. Fixes `test_series_from_temporary_intervalindex_readonly_data`. - **`Styler` proxy** now exposes `_compute`, `_display_funcs_column_names`, and `_display_funcs_index_names` (all private). Fixes `test_format_index_names_clear[_display_funcs_column_names-kwargs1]` and `[_display_funcs_index_names-kwargs0]`. - **`_MethodProxy`** now exposes `__func__` (forwarded to the slow underlying method), mirroring the existing `__name__` and `__doc__` properties. This is required for callers that introspect classmethod descriptors via `type(x).method.__func__`. ## Conftest Removed three `NODEIDS_THAT_FAIL` entries whose underlying tests now pass. ## Notes on remaining `AttributeError` xfails Audited the remaining 17 `AttributeError` xfail entries; they fall into a few buckets that need deeper changes (out of scope for this PR): - **Slow-side `isinstance` failures** (`Styler._compute`, `'DataFrame'/'SubclassedDataFrame' object has no attribute 'dtype'`): the slow-side function's `__globals__` was bound at import time before the proxy classes were installed, so `isinstance(proxy_df, real_DataFrame)` is `False` inside the slow module. Needs a different mechanism than `additional_attributes`. - **Mixed-type Series limitations** (`quantile_box`, `quantile_box_nat`, `quantile_date_range`, `quantile_ea_scalar`): cuDF documents that it returns a `DataFrame` instead of a `Series` when the result would be mixed-type — the proxy preserves that type, breaking downstream `assert_series_equal`. - **`.values` returning ndarray for nullable dtypes** (`test_construct_from_dict_ea_series`): pure pandas returns `IntegerArray`; cuDF returns `ndarray`. - **Other one-offs** (`SparseArray.reshape`, abstract `_from_sequence_of_strings`, custom accessor `xyz`, loc setitem datetime parsing, `_fsproxy_slow` proxy-conversion failure): each needs its own targeted fix. Co-authored-by: Claude Opus 4.7 (1M context) --- python/cudf/cudf/pandas/_wrappers/pandas.py | 9 +++++++++ python/cudf/cudf/pandas/fast_slow_proxy.py | 4 ++++ python/cudf/cudf/pandas/scripts/conftest-patch.py | 3 --- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py index 0bdc05205f5..f0b0158d87b 100644 --- a/python/cudf/cudf/pandas/_wrappers/pandas.py +++ b/python/cudf/cudf/pandas/_wrappers/pandas.py @@ -1035,6 +1035,8 @@ def Index__setattr__(self, name, value): "__array_ufunc__": _FastSlowAttribute("__array_ufunc__"), "_data": _FastSlowAttribute("_data", private=True), "_mask": _FastSlowAttribute("_mask", private=True), + "_left": _FastSlowAttribute("_left", private=True), + "_right": _FastSlowAttribute("_right", private=True), }, ) @@ -1363,6 +1365,7 @@ def Index__setattr__(self, name, value): "_display_funcs": _FastSlowAttribute( "_display_funcs", private=True ), + "_compute": _FastSlowAttribute("_compute", private=True), "table_styles": _FastSlowAttribute("table_styles"), "columns": _FastSlowAttribute("columns"), "caption": _FastSlowAttribute("caption"), @@ -1372,6 +1375,12 @@ def Index__setattr__(self, name, value): "_display_funcs_index": _FastSlowAttribute( "_display_funcs_index", private=True ), + "_display_funcs_column_names": _FastSlowAttribute( + "_display_funcs_column_names", private=True + ), + "_display_funcs_index_names": _FastSlowAttribute( + "_display_funcs_index_names", private=True + ), "uuid": _FastSlowAttribute("uuid"), "hide_index_": _FastSlowAttribute("hide_index_"), "hide_index_names": _FastSlowAttribute("hide_index_names"), diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 91119a5519d..985cce7c644 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -1125,6 +1125,10 @@ def __dir__(self): def __doc__(self): return self._fsproxy_slow.__doc__ + @property + def __func__(self): + return self._fsproxy_slow.__func__ + @property def __name__(self): return self._fsproxy_slow.__name__ diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py index 373e0bf1670..7b1e709496d 100644 --- a/python/cudf/cudf/pandas/scripts/conftest-patch.py +++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py @@ -766,7 +766,6 @@ def pytest_unconfigure(config): "tests/copy_view/index/test_index.py::test_index_to_frame": "TODO: Add a reason for failure", "tests/copy_view/index/test_index.py::test_index_values": "TODO: Add a reason for failure", "tests/copy_view/index/test_index.py::test_set_index_series": "TODO: Add a reason for failure", - "tests/copy_view/index/test_intervalindex.py::test_series_from_temporary_intervalindex_readonly_data": "AttributeError: 'IntervalArray' object has no attribute '_left'. Did you mean: 'left'?", "tests/copy_view/test_array.py::test_dataframe_array_ea_dtypes[np.array]": "AssertionError: assert False", "tests/copy_view/test_array.py::test_dataframe_array_ea_dtypes[np.asarray-dtype]": "AssertionError: assert False", "tests/copy_view/test_array.py::test_dataframe_array_ea_dtypes[np.asarray]": "AssertionError: assert False", @@ -5152,8 +5151,6 @@ def pytest_unconfigure(config): "tests/io/excel/test_writers.py::TestExcelWriterEngineTests::test_register_writer": "TODO: Add a reason for failure", "tests/io/excel/test_writers.py::TestFSPath::test_excelfile_fspath": "TODO: Add a reason for failure", "tests/io/formats/style/test_format.py::test_format_clear[format_index-_display_funcs_columns-kwargs2]": "TODO: Add a reason for failure", - "tests/io/formats/style/test_format.py::test_format_index_names_clear[_display_funcs_column_names-kwargs1]": "AttributeError: 'Styler' object has no attribute '_display_funcs_column_names'", - "tests/io/formats/style/test_format.py::test_format_index_names_clear[_display_funcs_index_names-kwargs0]": "AttributeError: 'Styler' object has no attribute '_display_funcs_index_names'", "tests/io/formats/style/test_html.py::test_from_custom_template_style": "TODO: Add a reason for failure", "tests/io/formats/style/test_html.py::test_from_custom_template_table": "TODO: Add a reason for failure", "tests/io/formats/style/test_html.py::test_replaced_css_class_names": "TODO: Add a reason for failure", From 4aa57e5091b766221ac6f54f4ecfede71c40b254 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Tue, 5 May 2026 23:25:55 +0200 Subject: [PATCH 12/36] Multi-rank fixes for cudf-polars streaming (#22361) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix bugs that appear when running with `num_ranks > 1`, where client-side `pl.concat(per_rank_outputs)` exposes assumptions that do not hold under single-rank execution. These were all discovered while working on multi-rank tests. **NB:** Please take a close look during review, as I’m still a bit unfamiliar with the IR part of cudf-polars. Authors: - Mads R. B. Kristensen (https://github.com/madsbk) Approvers: - Matthew Murray (https://github.com/Matt711) - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/cudf/pull/22361 --- .../experimental/rapidsmpf/join.py | 12 ++++- .../experimental/rapidsmpf/union.py | 46 ++++++++++++------- .../cudf_polars/experimental/select.py | 8 +++- .../tests/experimental/test_dataframescan.py | 9 ++++ python/cudf_polars/tests/test_groupby.py | 9 +++- 5 files changed, 62 insertions(+), 22 deletions(-) diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/join.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/join.py index abb2e7082f0..b36b07342ce 100644 --- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/join.py +++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/join.py @@ -205,9 +205,19 @@ async def _collect_small_side_for_broadcast( for s_id in range(len(chunks)): inserter.insert(s_id, chunks.pop(0)) stream = ir_context.get_cuda_stream() + gathered = await allgather.extract_concatenated(stream) + # When every rank inserted zero chunks, the AllGather has no schema + # to infer and returns a 0-column table. Substitute a properly typed + # empty table for the small side so downstream joins still match the + # expected schema. + table = ( + empty_table_chunk(ir, context, stream).table_view() + if gathered.num_columns() == 0 and len(ir.schema) > 0 + else gathered + ) dfs = [ DataFrame.from_table( - await allgather.extract_concatenated(stream), + table, list(ir.schema.keys()), list(ir.schema.values()), stream, diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/union.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/union.py index b4cb6a922b9..2484620234d 100644 --- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/union.py +++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/union.py @@ -24,6 +24,7 @@ ) if TYPE_CHECKING: + from rapidsmpf.communicator.communicator import Communicator from rapidsmpf.streaming.core.channel import Channel from rapidsmpf.streaming.core.context import Context @@ -34,6 +35,7 @@ @define_actor() async def union_node( context: Context, + comm: Communicator, ir: Union, ir_context: IRExecutionContext, ch_out: Channel[TableChunk], @@ -46,6 +48,9 @@ async def union_node( ---------- context The rapidsmpf context. + comm + The communicator. Used to suppress duplicated children's chunks on + non-root ranks so they aren't emitted twice cluster-wide. ir The Union IR node. ir_context @@ -61,14 +66,19 @@ async def union_node( # Merge and forward metadata. # Union loses partitioning/ordering info since sources may differ. # TODO: Warn users that Union does NOT preserve order? - total_local_count = 0 - duplicated = True metadata = await gather_in_task_group( *(recv_metadata(ch, context) for ch in chs_in) ) - for meta in metadata: - total_local_count += meta.local_count - duplicated = duplicated and meta.duplicated + # When a child has duplicated=True, every rank has produced the same + # data and only rank 0 should forward it -- otherwise the downstream + # client-side concat would over-count by `nranks - 1` for each + # duplicated chunk. + skip = tuple(meta.duplicated and comm.rank != 0 for meta in metadata) + total_local_count = sum( + 0 if drop else meta.local_count + for meta, drop in zip(metadata, skip, strict=True) + ) + duplicated = all(meta.duplicated for meta in metadata) await send_metadata( ch_out, context, @@ -79,21 +89,22 @@ async def union_node( ) seq_num_offset = 0 - for ch_in in chs_in: + for ch_in, drop in zip(chs_in, skip, strict=True): num_ch_chunks = 0 while (msg := await ch_in.recv(context)) is not None: - num_ch_chunks += 1 - await ch_out.send( - context, - Message( - msg.sequence_number + seq_num_offset, - TableChunk.from_message( - msg, br=context.br() - ).make_available_and_spill( - context.br(), allow_overbooking=True + if not drop: + await ch_out.send( + context, + Message( + msg.sequence_number + seq_num_offset, + TableChunk.from_message( + msg, br=context.br() + ).make_available_and_spill( + context.br(), allow_overbooking=True + ), ), - ), - ) + ) + num_ch_chunks += 1 seq_num_offset += num_ch_chunks await ch_out.drain(context) @@ -116,6 +127,7 @@ def _( nodes[ir] = [ union_node( rec.state["context"], + rec.state["comm"], ir, rec.state["ir_context"], channels[ir].reserve_input_slot(), diff --git a/python/cudf_polars/cudf_polars/experimental/select.py b/python/cudf_polars/cudf_polars/experimental/select.py index 25d0189fdf6..9ab30f9be13 100644 --- a/python/cudf_polars/cudf_polars/experimental/select.py +++ b/python/cudf_polars/cudf_polars/experimental/select.py @@ -431,13 +431,17 @@ def _( ) named_expr = expr.NamedExpr(ir.exprs[0].name or "len", lit_expr) + # Use Empty as the input so the streaming network's metadata flows + # `duplicated=True` end-to-end. Without that, every rank emits the + # literal once and the client concatenates N copies. + input_ir: IR = Empty({}) new_node = Select( {named_expr.name: named_expr.value.dtype}, [named_expr], should_broadcast=True, - df=child, + df=input_ir, ) - partition_info[new_node] = PartitionInfo(count=1) + partition_info[input_ir] = partition_info[new_node] = PartitionInfo(count=1) return new_node, partition_info if not any( diff --git a/python/cudf_polars/tests/experimental/test_dataframescan.py b/python/cudf_polars/tests/experimental/test_dataframescan.py index 57684734fea..dbf22848824 100644 --- a/python/cudf_polars/tests/experimental/test_dataframescan.py +++ b/python/cudf_polars/tests/experimental/test_dataframescan.py @@ -60,6 +60,15 @@ def test_parallel_dataframescan(df, streaming_engine_factory, max_rows_per_parti assert count == 1 +@pytest.mark.xfail( + reason=( + "Multi-rank Union interleaves child outputs across ranks: client " + "receives [rank0_A, rank0_B, rank1_A, rank1_B] instead of the " + "polars-CPU [A, B]. Tracked in " + "https://github.com/rapidsai/cudf/issues/22376." + ), + strict=False, +) def test_dataframescan_concat(df, streaming_engine_factory): streaming_engine = streaming_engine_factory( StreamingOptions(max_rows_per_partition=1_000), diff --git a/python/cudf_polars/tests/test_groupby.py b/python/cudf_polars/tests/test_groupby.py index a14177b9f0c..f14160a1043 100644 --- a/python/cudf_polars/tests/test_groupby.py +++ b/python/cudf_polars/tests/test_groupby.py @@ -501,8 +501,13 @@ def test_groupby_sum_decimal_null_group(engine: pl.GPUEngine) -> None: @pytest.mark.xfail( - raises=AssertionError, - reason="https://github.com/rapidsai/cudf/issues/19610", + raises=(AssertionError, pl.exceptions.SchemaError), + reason=( + "https://github.com/rapidsai/cudf/issues/19610 — in-memory engine " + "fails with AssertionError (wrong values); multi-rank streaming " + "fails earlier with SchemaError (literal agg yields a divergent " + "schema after cross-rank concat)." + ), ) def test_groupby_literal_agg(engine: pl.GPUEngine): df = pl.LazyFrame({"c0": [True, False]}) From aa23377bbb79b99accbd986ffd72d5a864dbc225 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Tue, 5 May 2026 15:40:28 -0700 Subject: [PATCH 13/36] Fix reading of large CSV files (>64MB) (#22375) Fixes a regression in #22237 where reading a CSV larger than the internal 64 MiB chunk size dropped all rows past the first chunk. Root cause is a misuse of a clamped value to determine the EOF state. This PR fixes the EOF transition so it only happens in the last chunk. Also added a large test - all previous CSV tests were below the chunk threshold. Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Yunsong Wang (https://github.com/PointKernel) - Basit Ayantunde (https://github.com/lamarrr) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/22375 --- cpp/src/io/csv/csv_gpu.cu | 3 ++- cpp/tests/io/csv_test.cpp | 46 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/csv/csv_gpu.cu b/cpp/src/io/csv/csv_gpu.cu index 3a407e03d99..90a112a9a43 100644 --- a/cpp/src/io/csv/csv_gpu.cu +++ b/cpp/src/io/csv/csv_gpu.cu @@ -736,7 +736,8 @@ CUDF_KERNEL void __launch_bounds__(rowofs_block_dim) ctx = make_char_context(ROW_CTX_NONE, ROW_CTX_QUOTE, ROW_CTX_NONE); } } else { - if (cur <= end && cur == data_end) { + bool const is_last_chunk = data_end_off <= data.size(); + if (is_last_chunk && cur <= end && cur == data_end) { // Add a newline at data end (need the extra row offset to infer length of previous row) ctx = make_char_context(ROW_CTX_EOF, ROW_CTX_EOF, ROW_CTX_EOF, 1, 1, 1); } else { diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp index 7be01e80f4f..2cb96215ab6 100644 --- a/cpp/tests/io/csv_test.cpp +++ b/cpp/tests/io/csv_test.cpp @@ -12,9 +12,12 @@ #include #include +#include #include #include #include +#include +#include #include #include #include @@ -1307,6 +1310,49 @@ TEST_F(CsvReaderTest, TypeInferenceEmptyDelimitedFields) expect_column_data_equal(std::vector{3, 6}, result_view.column(2)); } +TEST_F(CsvReaderTest, MultiChunkRowCount) +{ + // TODO: add reader option to set chunk size and use it here + constexpr size_t chunk_threshold = 64ull * 1024 * 1024; + std::string const row = "123,456,789\n"; + size_t const num_rows = (chunk_threshold / row.size()) + 1024; + + std::string buffer; + buffer.reserve(num_rows * row.size()); + for (size_t i = 0; i < num_rows; ++i) { + buffer.append(row); + } + + cudf::io::csv_reader_options const in_opts = + cudf::io::csv_reader_options::builder( + cudf::io::source_info{cudf::host_span{ + reinterpret_cast(buffer.data()), buffer.size()}}) + .header(-1); + auto const result = cudf::io::read_csv(in_opts); + auto const result_view = result.tbl->view(); + + ASSERT_EQ(result_view.num_columns(), 3); + EXPECT_EQ(static_cast(result_view.num_rows()), num_rows); + EXPECT_EQ(result_view.column(0).type().id(), type_id::INT64); + EXPECT_EQ(result_view.column(1).type().id(), type_id::INT64); + EXPECT_EQ(result_view.column(2).type().id(), type_id::INT64); + + // All rows are identical, so verifying min == max == expected + auto const i64 = cudf::data_type{cudf::type_id::INT64}; + auto const min_agg = cudf::make_min_aggregation(); + auto const max_agg = cudf::make_max_aggregation(); + auto const all_equal = [&](cudf::column_view const& col, int64_t expected) { + using scalar_t = cudf::numeric_scalar; + auto const min = cudf::reduce(col, *min_agg, i64); + auto const max = cudf::reduce(col, *max_agg, i64); + return static_cast(*min).value() == expected && + static_cast(*max).value() == expected; + }; + EXPECT_TRUE(all_equal(result_view.column(0), 123)); + EXPECT_TRUE(all_equal(result_view.column(1), 456)); + EXPECT_TRUE(all_equal(result_view.column(2), 789)); +} + TEST_F(CsvReaderTest, TypeInferenceThousands) { std::string buffer = "1`400,123,1`234.56\n123`456,123456,12.34"; From 4aa13f159c0998c1ea80deea4bb98510a4a509da Mon Sep 17 00:00:00 2001 From: Yunsong Wang <12716979+PointKernel@users.noreply.github.com> Date: Tue, 5 May 2026 15:44:29 -0700 Subject: [PATCH 14/36] Add decimal128 to groupby_max_cardinality benchmark (#22162) Closes #22154 This PR adds decimal128 values to the groupby_max_cardinality benchmark. Authors: - Yunsong Wang (https://github.com/PointKernel) Approvers: - Muhammad Haseeb (https://github.com/mhaseeb123) - David Wendt (https://github.com/davidwendt) URL: https://github.com/rapidsai/cudf/pull/22162 --- cpp/benchmarks/groupby/group_max.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/cpp/benchmarks/groupby/group_max.cpp b/cpp/benchmarks/groupby/group_max.cpp index 29d5645d220..d837cfac665 100644 --- a/cpp/benchmarks/groupby/group_max.cpp +++ b/cpp/benchmarks/groupby/group_max.cpp @@ -1,15 +1,18 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #include #include +#include #include #include +NVBENCH_DECLARE_TYPE_STRINGS(numeric::decimal128, "decimal128", "decimal128"); + template void groupby_max_helper(nvbench::state& state, cudf::size_type num_rows, @@ -26,8 +29,13 @@ void groupby_max_helper(nvbench::state& state, }(); auto const make_values = [&]() { - auto builder = data_profile_builder().cardinality(0).distribution( - cudf::type_to_id(), distribution_id::UNIFORM, 0, num_rows); + auto builder = data_profile_builder().cardinality(0); + if constexpr (cudf::is_fixed_point()) { + builder.distribution( + cudf::type_to_id(), distribution_id::UNIFORM, 0, num_rows, numeric::scale_type{0}); + } else { + builder.distribution(cudf::type_to_id(), distribution_id::UNIFORM, 0, num_rows); + } if (null_probability > 0) { builder.null_probability(null_probability); } else { @@ -91,7 +99,8 @@ NVBENCH_BENCH_TYPES(bench_groupby_max, .add_float64_axis("null_probability", {0, 0.1, 0.9}) .add_int64_axis("num_aggregations", {1, 2, 4, 8, 16, 32}); -NVBENCH_BENCH_TYPES(bench_groupby_max_cardinality, NVBENCH_TYPE_AXES(nvbench::type_list)) +NVBENCH_BENCH_TYPES(bench_groupby_max_cardinality, + NVBENCH_TYPE_AXES(nvbench::type_list)) .set_name("groupby_max_cardinality") .add_int64_axis("num_aggregations", {1, 2, 3, 4, 5, 6, 7, 8}) .add_int64_axis("cardinality", {20, 50, 100, 1'000, 10'000, 100'000, 1'000'000}); From c5cb03bedb3e8c8f6dfe8ac7cc53fa5441cedb9d Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Wed, 6 May 2026 00:52:34 +0200 Subject: [PATCH 15/36] `StreamingEngine._reset()` (#22364) Generalizes `RayEngine._reset()` to `SPMDEngine` and `DaskEngine`. All three engines now swap the rapidsmpf `Context` and the Polars `GPUEngine` configuration in place, while keeping the communicator and RMM resource alive. The test suite is refactored to take advantage of this. A session-scoped `streaming_engines` fixture bootstraps the SPMD communicator and constructs a shared `SPMDEngine`. Per-test `spmd_engine` and `streaming_engine_factory` fixtures call `_reset` on the cached engine instead of constructing a new one. This pattern will be extended to `RayEngine` and `DaskEngine` tests in the future as they are incorporated into the test matrix. Authors: - Mads R. B. Kristensen (https://github.com/madsbk) Approvers: - Peter Andreas Entschev (https://github.com/pentschev) URL: https://github.com/rapidsai/cudf/pull/22364 --- .../experimental/rapidsmpf/frontend/core.py | 94 ++++++ .../experimental/rapidsmpf/frontend/dask.py | 104 ++++++- .../experimental/rapidsmpf/frontend/ray.py | 95 +----- .../experimental/rapidsmpf/frontend/spmd.py | 104 ++++++- .../cudf_polars/testing/engine_utils.py | 46 +-- python/cudf_polars/tests/conftest.py | 104 ++++--- .../experimental/test_all_gather_host_data.py | 16 +- .../tests/experimental/test_allgather.py | 11 +- .../tests/experimental/test_dask.py | 94 ++++++ .../tests/experimental/test_io_multirank.py | 6 +- .../tests/experimental/test_ray.py | 6 +- .../tests/experimental/test_sink.py | 4 +- .../tests/experimental/test_spilling.py | 198 ++++++------ .../tests/experimental/test_spmd.py | 293 +++++++++++------- .../tests/experimental/test_statistics.py | 6 +- 15 files changed, 781 insertions(+), 400 deletions(-) diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/core.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/core.py index 69e6a36dca2..7bc8dabddec 100644 --- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/core.py +++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/core.py @@ -13,6 +13,7 @@ import cuda.core from rapidsmpf.coll import AllGather +from rapidsmpf.config import Options, get_environment_variables from rapidsmpf.memory.packed_data import PackedData from rapidsmpf.statistics import Statistics from rapidsmpf.streaming.core.actor import run_actor_network @@ -50,6 +51,39 @@ T = TypeVar("T") +def resolve_rapidsmpf_options(rapidsmpf_options: Options | None) -> Options: + """ + Resolve ``rapidsmpf_options`` and apply cross-frontend defaults. + + If ``None`` is passed, constructs an ``Options`` instance from + environment variables. Then applies defaults that should be consistent + across SPMD, Ray, and Dask. Defaults are set via + ``Options.insert_if_absent``, so explicit values or environment + variables always take precedence. + + Defaults applied: + + - ``num_streaming_threads=4``: moderate worker count for the rapidsmpf + streaming runtime, shared across frontends. + + Parameters + ---------- + rapidsmpf_options + Existing options to resolve, or ``None`` to construct from environment + variables. + + Returns + ------- + Options + Resolved options with cross-frontend defaults applied. + """ + if rapidsmpf_options is None: + rapidsmpf_options = Options(get_environment_variables()) + + rapidsmpf_options.insert_if_absent({"num_streaming_threads": "4"}) + return rapidsmpf_options + + @dataclasses.dataclass(frozen=True) class ClusterInfo: """ @@ -201,6 +235,66 @@ def global_statistics(self, *, clear: bool = False) -> Statistics: """ return Statistics.merge(self.gather_statistics(clear=clear)) + def _reset( + self, + *, + rapidsmpf_options: Options | None = None, + executor_options: dict[str, Any] | None = None, + engine_options: dict[str, Any] | None = None, + ) -> None: + """ + Reset the engine with new options, keeping cluster resources alive. + + The following inputs are fixed at construction time and cannot change: + - ``num_ranks`` + - ``num_py_executors`` (in ``executor_options``) + - ``hardware_binding`` (in ``engine_options``) + - ``memory_resource_config`` (in ``engine_options``) + + Subclasses must override this method. The override should: + 1. Raise :class:`RuntimeError` if the engine is already shut down. + 2. Call ``super()._reset(...)`` to apply the universal option validation below. + 3. Perform the backend-specific rebuild. + + Parameters + ---------- + rapidsmpf_options + New :class:`Options` for each rank's :class:`Context`. + ``None`` is treated as an empty dict. + executor_options + New executor options for the polars ``GPUEngine`` layer. + ``None`` is treated as an empty dict. + engine_options + New engine options for the polars ``GPUEngine`` layer. + ``None`` is treated as an empty dict. + + Raises + ------ + ValueError + If ``executor_options`` or ``engine_options`` contains a + construction-time-only key (see list above), or if a + reserved key is set (via :func:`check_reserved_keys`). + """ + executor_options = executor_options or {} + engine_options = engine_options or {} + check_reserved_keys(executor_options, engine_options) + + _disallowed_exec = {"num_py_executors"} & executor_options.keys() + if _disallowed_exec: + raise ValueError( + f"executor_options keys {sorted(_disallowed_exec)} cannot be " + "changed via _reset(). Construct a fresh engine instead." + ) + _disallowed_engine = { + "hardware_binding", + "memory_resource_config", + } & engine_options.keys() + if _disallowed_engine: + raise ValueError( + f"engine_options keys {sorted(_disallowed_engine)} cannot be " + "changed via _reset(). Construct a fresh engine instead." + ) + def shutdown(self) -> None: """ Shut down engine and release all owned resources. diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/dask.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/dask.py index eb32abcf375..49810e998fd 100644 --- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/dask.py +++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/dask.py @@ -19,10 +19,7 @@ import ucxx._lib.libucxx as ucx_api from rapidsmpf import bootstrap from rapidsmpf.communicator.ucxx import barrier, get_root_ucxx_address, new_communicator -from rapidsmpf.config import ( - Options, - get_environment_variables, -) +from rapidsmpf.config import Options from rapidsmpf.progress_thread import ProgressThread from rapidsmpf.rmm_resource_adaptor import RmmResourceAdaptor from rapidsmpf.streaming.core.context import Context @@ -36,6 +33,7 @@ StreamingEngine, check_reserved_keys, evaluate_on_rank, + resolve_rapidsmpf_options, ) from cudf_polars.experimental.rapidsmpf.frontend.hardware_binding import ( HardwareBindingPolicy, @@ -294,6 +292,47 @@ def _teardown_worker( delattr(dask_worker, attr) +def _reset_worker( + rapidsmpf_options_as_bytes: bytes, + *, + uid: str, + dask_worker: distributed.Worker | None = None, +) -> None: + """ + Rebuild the streaming Context with new options. + + Must be called collectively on all workers. A barrier ensures no + worker tears down its Context while peers may still be using it. + + Parameters + ---------- + rapidsmpf_options_as_bytes + Serialized :class:`Options` to install. + uid + Cluster instance identifier used to look up the per-worker context. + dask_worker + Injected by ``distributed`` when called via :meth:`distributed.Client.run`. + """ + assert dask_worker is not None + attr = f"_cudf_polars_mp_context_{uid}" + mp_ctx: _WorkerContext | None = getattr(dask_worker, attr, None) + if mp_ctx is None: + raise RuntimeError(f"_reset_worker called before _setup_worker for uid={uid}") + assert mp_ctx.comm is not None + assert mp_ctx.ctx is not None + # Collective: all ranks idle before any rank tears down its Context. + if mp_ctx.comm.nranks > 1: + barrier(mp_ctx.comm) + # Explicit shutdown is thread-affine. ``distributed.worker.run`` + # dispatches sync work onto the worker's event-loop thread, which is + # the same thread that built the Context in ``_setup_worker``. + mp_ctx.ctx.shutdown() + mp_ctx.ctx = None + options = Options.deserialize(rapidsmpf_options_as_bytes) + mp_ctx.ctx = Context.from_options(mp_ctx.comm.logger, mp_ctx.mr, options) + rmm.mr.set_current_device_resource(mp_ctx.ctx.br().device_mr) + + def _get_statistics( *, clear: bool, uid: str, dask_worker: distributed.Worker | None = None ) -> tuple[int, Statistics]: @@ -563,13 +602,9 @@ def __init__( "memory_resource_config", None ) - rapidsmpf_options = ( + rapidsmpf_options_as_bytes = resolve_rapidsmpf_options( rapidsmpf_options - if rapidsmpf_options is not None - else Options(get_environment_variables()) - ) - rapidsmpf_options.insert_if_absent({"num_streaming_threads": "4"}) - rapidsmpf_options_as_bytes = rapidsmpf_options.serialize() + ).serialize() # Unique identifier for this cluster instance; namespaces the per-worker # attribute so multiple DaskEngine contexts can coexist on the same workers. @@ -660,6 +695,55 @@ def __init__( engine_options={**engine_options, "memory_resource": None}, ) + def _reset( + self, + *, + rapidsmpf_options: Options | None = None, + executor_options: dict[str, Any] | None = None, + engine_options: dict[str, Any] | None = None, + ) -> None: + """Reset the engine; see :meth:`StreamingEngine._reset` for the contract.""" + if self._dask_context is None: + raise RuntimeError("Cannot reset a shut-down engine") + super()._reset( + rapidsmpf_options=rapidsmpf_options, + executor_options=executor_options, + engine_options=engine_options, + ) + executor_options = executor_options or {} + engine_options = engine_options or {} + + rapidsmpf_options_as_bytes = resolve_rapidsmpf_options( + rapidsmpf_options + ).serialize() + + ctx = self._dask_context + # Reset all worker Contexts collectively. ``client.run`` blocks + # until every worker's reset returns; the per-worker barrier + # inside :func:`_reset_worker` synchronizes the teardown across + # workers. + ctx.client.run( + functools.partial(_reset_worker, uid=ctx.rapidsmpf_id), + rapidsmpf_options_as_bytes, + ) + + # Re-run ``StreamingEngine.__init__`` on the existing instance to + # reconfigure the polars ``GPUEngine`` layer (``self.config``, + # ``self.device``, etc.) with the new options. Pass the existing + # ``self._exit_stack`` so any registered callbacks survive. + StreamingEngine.__init__( + self, + nranks=self._nranks, + executor_options={ + **executor_options, + "runtime": "rapidsmpf", + "cluster": "dask", + "dask_context": ctx, + }, + engine_options={**engine_options, "memory_resource": None}, + exit_stack=self._exit_stack, + ) + @classmethod def from_options( cls, diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/ray.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/ray.py index 47c88249123..1ba92de3e49 100644 --- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/ray.py +++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/ray.py @@ -14,10 +14,7 @@ import ucxx._lib.libucxx as ucx_api from rapidsmpf import bootstrap from rapidsmpf.communicator.ucxx import barrier, get_root_ucxx_address, new_communicator -from rapidsmpf.config import ( - Options, - get_environment_variables, -) +from rapidsmpf.config import Options from rapidsmpf.progress_thread import ProgressThread from rapidsmpf.rmm_resource_adaptor import RmmResourceAdaptor from rapidsmpf.streaming.core.context import Context @@ -31,6 +28,7 @@ StreamingEngine, check_reserved_keys, evaluate_on_rank, + resolve_rapidsmpf_options, ) from cudf_polars.experimental.rapidsmpf.frontend.hardware_binding import ( HardwareBindingPolicy, @@ -256,18 +254,6 @@ def reset(self, *, rapidsmpf_options_as_bytes: bytes) -> None: """ Rebuild the streaming Context with new options. - Keeps the UCXX communicator, the :class:`RmmResourceAdaptor`, - and the Python thread-pool executor alive — only the rapidsmpf - :class:`Context` is replaced. Used by :meth:`RayEngine._reset` - to amortize actor startup and UCX bootstrap costs across engines - that differ only in streaming options. - - The RMM resource is *not* rebuilt: UCX maps CUDA IPC buffers - against it (notably for pool memory resources) and never - releases those mappings during the application lifetime, so a - rebuilt MR would silently leak pool memory. Construct a fresh - :class:`RayEngine` if you need to swap the memory resource. - Must be called collectively on all actors. A barrier ensures no rank tears down its Context while peers may still be using it. @@ -280,7 +266,8 @@ def reset(self, *, rapidsmpf_options_as_bytes: bytes) -> None: raise RuntimeError("reset() requires setup_worker() to have run") assert self._comm is not None # Collective: all ranks idle before any rank tears down its Context. - barrier(self._comm) + if self._comm.nranks > 1: + barrier(self._comm) self._ctx.shutdown() self._ctx = None self._rapidsmpf_options = Options.deserialize(rapidsmpf_options_as_bytes) @@ -544,13 +531,9 @@ def __init__( "memory_resource_config", None ) - rapidsmpf_options = ( + rapidsmpf_options_as_bytes = resolve_rapidsmpf_options( rapidsmpf_options - if rapidsmpf_options is not None - else Options(get_environment_variables()) - ) - rapidsmpf_options.insert_if_absent({"num_streaming_threads": "4"}) - rapidsmpf_options_as_bytes = rapidsmpf_options.serialize() + ).serialize() exit_stack = contextlib.ExitStack() if not ray.is_initialized(): @@ -621,73 +604,23 @@ def _reset( executor_options: dict[str, Any] | None = None, engine_options: dict[str, Any] | None = None, ) -> None: - """ - Reset the engine with new options. - - Fast path for consecutive ``RayEngine`` uses that differ only in - streaming options. Avoids Ray actor startup and UCX bootstrap. - - Replaces engine state in full, similar to :meth:`__init__`. - ``StreamingEngine`` revalidates invariants on each reset, so callers - must pass required options (for example, ``allow_gpu_sharing=True`` - when ``num_ranks > 1``). - - The following inputs are fixed at construction time and cannot change: - - ``num_ranks`` - - ``num_py_executors`` (in ``executor_options``) - - ``hardware_binding`` (in ``engine_options``) - - ``memory_resource_config`` (in ``engine_options``) - - ``ray_init_options`` - - Parameters - ---------- - rapidsmpf_options - New :class:`Options` for each actor's ``Context``. Defaults to - ``Options(get_environment_variables())`` if ``None``. - executor_options - Polars ``GPUEngine`` executor options. ``None`` is treated as - an empty dict. - engine_options - Polars ``GPUEngine`` options. ``None`` is treated as an empty - dict. - """ + """Reset the engine; see :meth:`StreamingEngine._reset` for the contract.""" if self._rank_actors is None: raise RuntimeError("Cannot reset a shut-down engine") - + super()._reset( + rapidsmpf_options=rapidsmpf_options, + executor_options=executor_options, + engine_options=engine_options, + ) executor_options = executor_options or {} engine_options = engine_options or {} - check_reserved_keys(executor_options, engine_options) - - # Reject keys that cannot be changed. - _disallowed_exec = {"num_py_executors"} & executor_options.keys() - if _disallowed_exec: - raise ValueError( - f"executor_options keys {sorted(_disallowed_exec)} cannot be " - "changed via _reset(). Construct a fresh RayEngine instead." - ) - _disallowed_engine = { - "hardware_binding", - "memory_resource_config", - } & engine_options.keys() - if _disallowed_engine: - raise ValueError( - f"engine_options keys {sorted(_disallowed_engine)} cannot be " - "changed via _reset(). Construct a fresh RayEngine instead." - ) - - rapidsmpf_options = ( + rapidsmpf_options_as_bytes = resolve_rapidsmpf_options( rapidsmpf_options - if rapidsmpf_options is not None - else Options(get_environment_variables()) - ) - rapidsmpf_options.insert_if_absent({"num_streaming_threads": "4"}) - rapidsmpf_options_as_bytes = rapidsmpf_options.serialize() + ).serialize() # Reset all actor Contexts collectively. ``ray.get`` blocks until # every actor's reset returns; the per-actor barrier inside # :meth:`RankActor.reset` synchronizes the teardown across ranks. - # The per-actor RMM resource is kept alive across resets — see - # :meth:`RankActor.reset`. ray.get( [ rank.reset.remote( diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/spmd.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/spmd.py index 0f52f83c1a1..65e3eb8b1e7 100644 --- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/spmd.py +++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/spmd.py @@ -15,7 +15,7 @@ from rapidsmpf.communicator.single import ( new_communicator as single_communicator, ) -from rapidsmpf.config import Options, get_environment_variables +from rapidsmpf.communicator.ucxx import barrier from rapidsmpf.integrations.cudf.partition import unpack_and_concat from rapidsmpf.memory.packed_data import PackedData from rapidsmpf.progress_thread import ProgressThread @@ -36,6 +36,7 @@ all_gather_host_data, check_reserved_keys, evaluate_on_rank, + resolve_rapidsmpf_options, ) from cudf_polars.experimental.rapidsmpf.frontend.hardware_binding import ( HardwareBindingPolicy, @@ -49,6 +50,7 @@ from collections.abc import Callable from rapidsmpf.communicator.communicator import Communicator + from rapidsmpf.config import Options from rapidsmpf.streaming.cudf.channel_metadata import ChannelMetadata from cudf_polars.dsl.ir import IR @@ -341,11 +343,7 @@ def __init__( ) bind_to_gpu(hw_binding) - rapidsmpf_options = ( - rapidsmpf_options - if rapidsmpf_options is not None - else Options(get_environment_variables()) - ) + rapidsmpf_options = resolve_rapidsmpf_options(rapidsmpf_options) mr_config: MemoryResourceConfig | None = engine_options.get( "memory_resource_config", None ) @@ -369,17 +367,22 @@ def __init__( ) # else: caller-provided comm; the caller retains ownership - py_executor = ThreadPoolExecutor( + self._py_executor: ThreadPoolExecutor = ThreadPoolExecutor( max_workers=cast(int, executor_options.get("num_py_executors", 8)), thread_name_prefix="spmd-executor", ) + self._mr: RmmResourceAdaptor = mr exit_stack = contextlib.ExitStack() try: - exit_stack.callback(py_executor.shutdown, wait=False) + exit_stack.callback(self._py_executor.shutdown, wait=False) exit_stack.enter_context(set_memory_resource(mr)) - ctx = exit_stack.enter_context( - Context.from_options(comm.logger, mr, rapidsmpf_options) - ) + # ``Context`` is *not* registered as a context manager so that + # :meth:`_reset` can swap it mid-life without leaving the + # exit-stack holding a stale reference. ``_cleanup_ctx`` is + # registered instead — it shuts down whatever ``self._ctx`` is + # at engine-shutdown time (i.e. the latest reset's Context). + ctx = Context.from_options(comm.logger, mr, rapidsmpf_options) + exit_stack.callback(self._cleanup_ctx) self._comm: Communicator | None = comm self._ctx: Context | None = ctx super().__init__( @@ -389,7 +392,7 @@ def __init__( "runtime": "rapidsmpf", "cluster": "spmd", "spmd_context": SPMDContext( - comm=comm, context=ctx, py_executor=py_executor + comm=comm, context=ctx, py_executor=self._py_executor ), }, engine_options={ @@ -402,6 +405,17 @@ def __init__( exit_stack.close() raise + def _cleanup_ctx(self) -> None: + """ + Shut down the current ``self._ctx`` if any; called from exit-stack. + + ``Context.shutdown()`` is idempotent on the rapidsmpf C++ side, so this is + safe even if a prior ``_reset`` already shut down a now-replaced Context. + """ + if self._ctx is not None: + self._ctx.shutdown() + self._ctx = None + @classmethod def from_options(cls, options: StreamingOptions) -> SPMDEngine: """ @@ -436,6 +450,65 @@ def from_options(cls, options: StreamingOptions) -> SPMDEngine: engine_options=options.to_engine_options(), ) + def _reset( + self, + *, + rapidsmpf_options: Options | None = None, + executor_options: dict[str, Any] | None = None, + engine_options: dict[str, Any] | None = None, + ) -> None: + """ + Reset the engine; see :meth:`StreamingEngine._reset` for the contract. + + Must be called collectively on all ranks. A barrier ensures no + rank tears down its Context while peers may still be using it. + """ + if self._ctx is None: + raise RuntimeError("Cannot reset a shut-down engine") + assert self._comm is not None + super()._reset( + rapidsmpf_options=rapidsmpf_options, + executor_options=executor_options, + engine_options=engine_options, + ) + executor_options = executor_options or {} + engine_options = engine_options or {} + rapidsmpf_options = resolve_rapidsmpf_options(rapidsmpf_options) + + # Collective: synchronize all ranks before tearing down the Context. + if self._comm.nranks > 1: + barrier(self._comm) + # Same-thread shutdown, _reset runs on the thread that built the + # Context (the test driver's main thread). The per-engine RMM + # resource is kept alive across resets, see :meth:`_cleanup_ctx`. + self._ctx.shutdown() + self._ctx = Context.from_options(self._comm.logger, self._mr, rapidsmpf_options) + + # Re-run ``StreamingEngine.__init__`` on the existing instance to + # reconfigure the polars ``GPUEngine`` layer (``self.config``, + # ``self.device``, etc.) with the new options. Pass the existing + # ``self._exit_stack`` so any registered callbacks (notably + # ``_cleanup_ctx`` and ``set_memory_resource``) survive. + StreamingEngine.__init__( + self, + nranks=self._comm.nranks, + executor_options={ + **executor_options, + "runtime": "rapidsmpf", + "cluster": "spmd", + "spmd_context": SPMDContext( + comm=self._comm, + context=self._ctx, + py_executor=self._py_executor, + ), + }, + engine_options={ + **engine_options, + "memory_resource": self._ctx.br().device_mr, + }, + exit_stack=self._exit_stack, + ) + @property def rank(self) -> int: """ @@ -536,9 +609,14 @@ def shutdown(self) -> None: """ if self._ctx is None: return # already shut down + + # Order matters: ``super().shutdown()`` closes ``self._exit_stack``, + # which invokes ``self._cleanup_ctx``. That requires ``self._ctx`` to + # still be set so the rapidsmpf Context can be shut down correctly. + # Clear the references only after shutdown completes. + super().shutdown() self._comm = None self._ctx = None - super().shutdown() def _run(self, func: Callable[..., T], *args: Any, **kwargs: Any) -> list[T]: data = json.dumps(func(*args, **kwargs)).encode() diff --git a/python/cudf_polars/cudf_polars/testing/engine_utils.py b/python/cudf_polars/cudf_polars/testing/engine_utils.py index ec216dc6d88..c36bcf2ed27 100644 --- a/python/cudf_polars/cudf_polars/testing/engine_utils.py +++ b/python/cudf_polars/cudf_polars/testing/engine_utils.py @@ -10,7 +10,7 @@ from typing import TYPE_CHECKING, Any, Literal if TYPE_CHECKING: - from rapidsmpf.communicator.communicator import Communicator + from collections.abc import Mapping import polars as pl @@ -112,39 +112,49 @@ def create_streaming_options( def build_streaming_engine( param: EngineFixtureParam, - spmd_comm: Communicator, + engines: Mapping[str, StreamingEngine], options: StreamingOptions | None = None, ) -> StreamingEngine: """ - Build a :class:`StreamingEngine` from an engine fixture parameter. + Return ``engines``'s entry for ``param``, ``_reset``-ed. + + ``engines`` must already contain a slot for ``param.engine_name`` — + seeded by the ``streaming_engines`` session-scoped fixture. The + fixture owns mutation; this function only reads and ``_reset``-s. Parameters ---------- param Decoded engine fixture parameter describing the backend and block size mode. - spmd_comm - Communicator used when constructing an :class:`SPMDEngine`. + engines + Streaming-engine collection keyed by backend name. Provided by + the ``streaming_engines`` test fixture. options Optional streaming options to merge on top of the baseline selected by ``param.blocksize_mode``. Returns ------- - A streaming engine matching ``param``. - """ - from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine + The shared :class:`StreamingEngine`, ``_reset`` to the requested options. + Raises + ------ + RuntimeError + If ``engines`` has no slot for ``param.engine_name``. + """ streaming_options = create_streaming_options(param.blocksize_mode, options) - match param.engine_name: - case "spmd": - return SPMDEngine( - comm=spmd_comm, - rapidsmpf_options=streaming_options.to_rapidsmpf_options(), - executor_options=streaming_options.to_executor_options(), - engine_options=streaming_options.to_engine_options(), - ) - case _: # pragma: no cover - raise AssertionError(f"Unknown streaming backend: {param.engine_name!r}") + engine = engines.get(param.engine_name) + if engine is None: # pragma: no cover + raise RuntimeError( + f"No streaming engine for {param.engine_name!r}. The corresponding " + "session-scoped fixture must populate the collection before tests run." + ) + engine._reset( + rapidsmpf_options=streaming_options.to_rapidsmpf_options(), + executor_options=streaming_options.to_executor_options(), + engine_options=streaming_options.to_engine_options(), + ) + return engine def get_blocksize_mode(obj: pl.GPUEngine) -> Literal["medium", "small"]: diff --git a/python/cudf_polars/tests/conftest.py b/python/cudf_polars/tests/conftest.py index 7ad45c06605..7f00684638f 100644 --- a/python/cudf_polars/tests/conftest.py +++ b/python/cudf_polars/tests/conftest.py @@ -18,12 +18,18 @@ ) if TYPE_CHECKING: - from collections.abc import Callable, Generator - - from rapidsmpf.communicator.communicator import Communicator + from collections.abc import Callable, Generator, Mapping + from typing import TypeAlias from cudf_polars.experimental.rapidsmpf.frontend.core import StreamingEngine from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions + from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine + + # Read-only view over the per-backend streaming engines owned by the + # ``streaming_engines`` session fixture. Only that fixture mutates the + # underlying dict; consumers (``spmd_engine``, ``streaming_engine_factory``, + # ``engine``) only look up by backend name. + StreamingEngines: TypeAlias = Mapping[str, StreamingEngine] @pytest.fixture(params=[False, True], ids=["no_nulls", "nulls"], scope="session") @@ -66,12 +72,12 @@ def _skip_unless_spmd(request: pytest.FixtureRequest) -> None: @pytest.fixture(scope="session") -def spmd_comm() -> Communicator: - """Session-scoped communicator — bootstrapped once and shared across all tests. +def streaming_engines() -> Generator[StreamingEngines, None, None]: + """Return a session-scoped mapping of engine name to engine instance. - Sharing a single communicator avoids the file-based bootstrap race that can - cause hangs when ``create_ucxx_comm()`` is called repeatedly in the same - ``rrun`` session (stale barrier files / stale ``ucxx_root_address`` KV entry). + The returned :class:`StreamingEngines` is a dict that maps each engine + name to a single shared engine instance, which is reused across the entire + test session. """ pytest.importorskip("rapidsmpf") from rapidsmpf import bootstrap @@ -79,12 +85,36 @@ def spmd_comm() -> Communicator: from rapidsmpf.config import Options, get_environment_variables from rapidsmpf.progress_thread import ProgressThread + from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine + if bootstrap.is_running_with_rrun(): - return bootstrap.create_ucxx_comm( + comm = bootstrap.create_ucxx_comm( progress_thread=ProgressThread(), type=bootstrap.BackendType.AUTO, ) - return single_communicator(Options(get_environment_variables()), ProgressThread()) + else: + comm = single_communicator( + Options(get_environment_variables()), ProgressThread() + ) + + engines: dict[str, StreamingEngine] = {"spmd": SPMDEngine(comm=comm)} + try: + yield engines + finally: + while engines: + _, engine = engines.popitem() + engine.shutdown() + + +@pytest.fixture +def spmd_engine(streaming_engines: StreamingEngines) -> SPMDEngine: + """Return the shared :class:`SPMDEngine` reset to default options.""" + from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine + + engine = streaming_engines["spmd"] + assert isinstance(engine, SPMDEngine) + engine._reset() + return engine @pytest.fixture(params=STREAMING_ENGINE_FIXTURE_PARAMS) @@ -102,38 +132,29 @@ def _all_engine_param(request: pytest.FixtureRequest) -> EngineFixtureParam: @pytest.fixture def streaming_engine_factory( _streaming_engine_param: EngineFixtureParam, - spmd_comm: Communicator, -) -> Generator[Callable[..., StreamingEngine], None, None]: + streaming_engines: StreamingEngines, +) -> Callable[..., StreamingEngine]: """ - Yield a factory that constructs :class:`StreamingEngine` instances for tests. - - The fixture is parametrized over :data:`STREAMING_ENGINE_FIXTURE_PARAMS`. - Created engines are tracked and automatically shut down after the test. + Return a factory that yields a shared :class:`StreamingEngine`. Parameters ---------- _streaming_engine_param Parametrized engine descriptor controlling backend and block size mode. - spmd_comm - Communicator used when constructing SPMD-based engines. - - Yields - ------ - Factory function that creates :class:`StreamingEngine` instances. The - factory accepts optional :class:`StreamingOptions`, which are merged on - top of the parametrized blocksize baseline. + streaming_engines + Session-scoped engine collection to look up the shared engine in. + + Returns + ------- + Factory function that returns the shared :class:`StreamingEngine`. """ - engines: list[StreamingEngine] = [] def factory(options: StreamingOptions | None = None) -> StreamingEngine: - engine = build_streaming_engine(_streaming_engine_param, spmd_comm, options) - engines.append(engine) - return engine - - yield factory + return build_streaming_engine( + _streaming_engine_param, streaming_engines, options + ) - for engine in reversed(engines): - engine.shutdown() + return factory @pytest.fixture @@ -164,9 +185,9 @@ def streaming_engine( def engine( request: pytest.FixtureRequest, _all_engine_param: EngineFixtureParam, -) -> Generator[pl.GPUEngine, None, None]: +) -> pl.GPUEngine: """ - Yield a :class:`polars.GPUEngine` for each engine variant under test. + Return a :class:`polars.GPUEngine` for each engine variant under test. Parameters ---------- @@ -176,8 +197,8 @@ def engine( Parametrized engine descriptor covering both in-memory and streaming variants. - Yields - ------ + Returns + ------- Engine instance matching the parametrized variant. Notes @@ -186,15 +207,10 @@ def engine( :func:`streaming_engine` fixture instead. """ if _all_engine_param.engine_name == "in-memory": - yield pl.GPUEngine(executor="in-memory", raise_on_fail=True) - return + return pl.GPUEngine(executor="in-memory", raise_on_fail=True) - spmd_comm: Communicator = request.getfixturevalue("spmd_comm") - engine = build_streaming_engine(_all_engine_param, spmd_comm) - try: - yield engine - finally: - engine.shutdown() + engines: StreamingEngines = request.getfixturevalue("streaming_engines") + return build_streaming_engine(_all_engine_param, engines) @pytest.fixture diff --git a/python/cudf_polars/tests/experimental/test_all_gather_host_data.py b/python/cudf_polars/tests/experimental/test_all_gather_host_data.py index aad7b341676..8f09a82c4bd 100644 --- a/python/cudf_polars/tests/experimental/test_all_gather_host_data.py +++ b/python/cudf_polars/tests/experimental/test_all_gather_host_data.py @@ -14,7 +14,6 @@ all_gather_host_data, ) from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions -from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine pytestmark = pytest.mark.spmd @@ -36,15 +35,14 @@ def _struct(rank: int) -> bytes: @pytest.mark.parametrize("make_data", [_empty, _text, _bytearray, _struct]) -def test_all_gather_host_data(spmd_comm, make_data) -> None: +def test_all_gather_host_data(spmd_engine, make_data) -> None: """Each rank sends rank-specific data; results are correct and ordered.""" - with SPMDEngine(comm=spmd_comm) as spmd_engine: - comm = spmd_engine.comm - br = spmd_engine.context.br() - result = all_gather_host_data(comm, br, op_id=0, data=make_data(comm.rank)) - assert len(result) == comm.nranks - for i, item in enumerate(result): - assert item == bytes(make_data(i)) + comm = spmd_engine.comm + br = spmd_engine.context.br() + result = all_gather_host_data(comm, br, op_id=0, data=make_data(comm.rank)) + assert len(result) == comm.nranks + for i, item in enumerate(result): + assert item == bytes(make_data(i)) def test_gather_cluster_info(streaming_engine) -> None: diff --git a/python/cudf_polars/tests/experimental/test_allgather.py b/python/cudf_polars/tests/experimental/test_allgather.py index 514276c6647..52c353044eb 100644 --- a/python/cudf_polars/tests/experimental/test_allgather.py +++ b/python/cudf_polars/tests/experimental/test_allgather.py @@ -13,7 +13,6 @@ import pylibcudf as plc from cudf_polars.experimental.rapidsmpf.collectives.allgather import AllGatherManager -from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine from cudf_polars.experimental.rapidsmpf.utils import allgather_reduce @@ -53,9 +52,8 @@ async def _test_allgather(engine) -> None: assert col.type().id().value == plc.types.TypeId.INT32.value -def test_allgather(spmd_comm) -> None: - with SPMDEngine(comm=spmd_comm) as engine: - asyncio.run(_test_allgather(engine)) +def test_allgather(spmd_engine) -> None: + asyncio.run(_test_allgather(spmd_engine)) async def _test_allgather_reduce(engine) -> None: @@ -72,6 +70,5 @@ async def _test_allgather_reduce(engine) -> None: assert results == (10, 20, 30) # Single rank, so sums are just the local values -def test_allgather_reduce(spmd_comm) -> None: - with SPMDEngine(comm=spmd_comm) as engine: - asyncio.run(_test_allgather_reduce(engine)) +def test_allgather_reduce(spmd_engine) -> None: + asyncio.run(_test_allgather_reduce(spmd_engine)) diff --git a/python/cudf_polars/tests/experimental/test_dask.py b/python/cudf_polars/tests/experimental/test_dask.py index d923edd37cf..5ccdde864ef 100644 --- a/python/cudf_polars/tests/experimental/test_dask.py +++ b/python/cudf_polars/tests/experimental/test_dask.py @@ -153,3 +153,97 @@ def test_empty_dataframe(engine: DaskEngine) -> None: def test_run(engine: DaskEngine) -> None: result = engine._run(os.getpid) assert len(set(result)) == engine.nranks + + +@pytest.fixture(scope="module") +def reset_engine() -> Iterator[DaskEngine]: + """Module-scoped engine for reset tests — independent of ``engine``. + + These tests exercise :meth:`DaskEngine._reset` (which mutates the + engine in-place). A dedicated fixture keeps those mutations from + leaking into the other tests. + """ + with DaskEngine( + executor_options={"max_rows_per_partition": 10}, + ) as e: + yield e + + +def test_reset_keeps_workers_alive(reset_engine: DaskEngine) -> None: + """``_reset`` must not respawn dask workers.""" + workers_before = sorted( + reset_engine._dask_ctx.client.scheduler_info(n_workers=-1)["workers"] + ) + pids_before = sorted(reset_engine._run(os.getpid)) + + reset_engine._reset(executor_options={"max_rows_per_partition": 7}) + + workers_after = sorted( + reset_engine._dask_ctx.client.scheduler_info(n_workers=-1)["workers"] + ) + pids_after = sorted(reset_engine._run(os.getpid)) + + # Same worker addresses … + assert workers_before == workers_after + # … and the workers are running in the same OS processes. + assert pids_before == pids_after + + +def test_reset_updates_executor_options(reset_engine: DaskEngine) -> None: + """``_reset`` updates the polars-layer config to the new options.""" + reset_engine._reset(executor_options={"max_rows_per_partition": 42}) + + opts = reset_engine.config["executor_options"] + assert opts["max_rows_per_partition"] == 42 + # Reserved keys are still injected by ``_reset``. + assert opts["runtime"] == "rapidsmpf" + assert opts["cluster"] == "dask" + assert isinstance(opts["dask_context"], DaskContext) + + +def test_reset_collects_after_options_change(reset_engine: DaskEngine) -> None: + """The engine still drives a real query after ``_reset``.""" + reset_engine._reset(executor_options={"max_rows_per_partition": 3}) + assert_gpu_result_equal( + pl.LazyFrame({"a": [1, 2, 3, 4, 5]}), + engine=reset_engine, + check_row_order=False, + ) + + +def test_reset_after_shutdown_raises() -> None: + """``shutdown`` is idempotent; ``_reset`` after shutdown raises every time.""" + engine = DaskEngine(executor_options={"max_rows_per_partition": 10}) + engine.shutdown() + engine.shutdown() # idempotent + with pytest.raises(RuntimeError, match="shut-down"): + engine._reset() + with pytest.raises(RuntimeError, match="shut-down"): + engine._reset() # still raises on a second attempt + engine.shutdown() # still safe after a failed _reset + + +def test_reset_rejects_construction_time_executor_options( + reset_engine: DaskEngine, +) -> None: + """``_reset`` rejects ``executor_options`` keys read at worker setup.""" + with pytest.raises(ValueError, match="num_py_executors"): + reset_engine._reset(executor_options={"num_py_executors": 4}) + + +def test_reset_rejects_construction_time_engine_options( + reset_engine: DaskEngine, +) -> None: + """``_reset`` rejects ``engine_options`` keys read at worker setup.""" + from cudf_polars.experimental.rapidsmpf.frontend.hardware_binding import ( + HardwareBindingPolicy, + ) + + with pytest.raises(ValueError, match="hardware_binding"): + reset_engine._reset( + engine_options={ + "hardware_binding": HardwareBindingPolicy(enabled=False), + }, + ) + with pytest.raises(ValueError, match="memory_resource_config"): + reset_engine._reset(engine_options={"memory_resource_config": None}) diff --git a/python/cudf_polars/tests/experimental/test_io_multirank.py b/python/cudf_polars/tests/experimental/test_io_multirank.py index 631f12fd85c..2208cc67316 100644 --- a/python/cudf_polars/tests/experimental/test_io_multirank.py +++ b/python/cudf_polars/tests/experimental/test_io_multirank.py @@ -19,8 +19,6 @@ from collections.abc import Iterator from pathlib import Path - from rapidsmpf.communicator.communicator import Communicator - from cudf_polars.experimental.rapidsmpf.frontend.core import StreamingEngine # Runs the spmd variant even under rrun with nranks > 1. The ray/dask @@ -44,7 +42,7 @@ def df() -> pl.LazyFrame: @pytest.fixture(params=["spmd", "ray", "dask"]) def engine( request: pytest.FixtureRequest, - spmd_comm: Communicator, + spmd_engine: SPMDEngine, ) -> Iterator[StreamingEngine]: """Yield each supported streaming engine.""" backend = request.param @@ -52,7 +50,7 @@ def engine( if backend == "spmd": with SPMDEngine( - comm=spmd_comm, + comm=spmd_engine.comm, executor_options=executor_options, ) as eng: yield eng diff --git a/python/cudf_polars/tests/experimental/test_ray.py b/python/cudf_polars/tests/experimental/test_ray.py index 7365be733b3..ded4903c594 100644 --- a/python/cudf_polars/tests/experimental/test_ray.py +++ b/python/cudf_polars/tests/experimental/test_ray.py @@ -275,7 +275,7 @@ def test_reset_collects_after_options_change(reset_engine: RayEngine) -> None: def test_reset_after_shutdown_raises() -> None: - """``_reset`` after ``shutdown`` raises ``RuntimeError``.""" + """``shutdown`` is idempotent; ``_reset`` after shutdown raises every time.""" engine = RayEngine( executor_options={"max_rows_per_partition": 10}, engine_options={"allow_gpu_sharing": True}, @@ -283,8 +283,12 @@ def test_reset_after_shutdown_raises() -> None: ray_init_options={"include_dashboard": False}, ) engine.shutdown() + engine.shutdown() # idempotent with pytest.raises(RuntimeError, match="shut-down"): engine._reset() + with pytest.raises(RuntimeError, match="shut-down"): + engine._reset() # still raises on a second attempt + engine.shutdown() # still safe after a failed _reset def test_reset_rejects_construction_time_executor_options( diff --git a/python/cudf_polars/tests/experimental/test_sink.py b/python/cudf_polars/tests/experimental/test_sink.py index 9b0573d2cb4..df68b7c199a 100644 --- a/python/cudf_polars/tests/experimental/test_sink.py +++ b/python/cudf_polars/tests/experimental/test_sink.py @@ -92,7 +92,7 @@ def test_sink_parquet_directory( assert len(list(check_path.iterdir())) == expected_file_count -def test_sink_parquet_raises_spmd(spmd_comm): +def test_sink_parquet_raises_spmd(spmd_engine): from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine with ( @@ -100,7 +100,7 @@ def test_sink_parquet_raises_spmd(spmd_comm): ValueError, match="The spmd cluster requires sink_to_directory=True" ), SPMDEngine( - comm=spmd_comm, executor_options={"sink_to_directory": False} + comm=spmd_engine.comm, executor_options={"sink_to_directory": False} ) as engine, ): ConfigOptions.from_polars_engine(engine) diff --git a/python/cudf_polars/tests/experimental/test_spilling.py b/python/cudf_polars/tests/experimental/test_spilling.py index 799d19402e6..6aa11801132 100644 --- a/python/cudf_polars/tests/experimental/test_spilling.py +++ b/python/cudf_polars/tests/experimental/test_spilling.py @@ -9,7 +9,6 @@ import numpy as np import pytest -from rapidsmpf.config import Options from rapidsmpf.memory.buffer import MemoryType from rapidsmpf.memory.pinned_memory_resource import is_pinned_memory_resources_supported from rapidsmpf.streaming.core.message import Message @@ -18,7 +17,7 @@ import pylibcudf as plc -from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine +from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions from cudf_polars.experimental.rapidsmpf.utils import ( make_spill_function, ) @@ -51,109 +50,104 @@ def create_test_table(nbytes: int, stream: Stream) -> plc.Table: ], ) def test_make_spill_function( - spmd_comm, + streaming_engine_factory, *, pinned_memory: bool, spilled_host_mem_type: MemoryType, ) -> None: """Test that spilling prioritizes longest queues and newest messages.""" - with SPMDEngine( - comm=spmd_comm, - rapidsmpf_options=Options({"pinned_memory": str(pinned_memory).lower()}), - ) as spmd_engine: - context = spmd_engine.context - - if spilled_host_mem_type == MemoryType.PINNED_HOST: - assert spmd_engine.context.br().pinned_mr is not None - other_host_mem_type = MemoryType.HOST - else: - assert spmd_engine.context.br().pinned_mr is None - other_host_mem_type = MemoryType.PINNED_HOST - - # Create 3 spillable message containers simulating fanout buffers - # Buffer 0: Fast consumer (2 messages) - # Buffer 1: Slow consumer (5 messages) <- should spill from here first - # Buffer 2: Medium consumer (3 messages) - buffers = [SpillableMessages(context.br()) for _ in range(3)] - messages_per_buffer = [2, 5, 3] - - # Track message IDs for each buffer - message_ids: dict[int, list[int]] = {} - - # Populate buffers with messages - stream = context.get_stream_from_pool() - for buffer_idx, (sm, count) in enumerate( - zip(buffers, messages_per_buffer, strict=False) - ): - message_ids[buffer_idx] = [] - for msg_idx in range(count): - # Create 1MB messages - table = create_test_table(1024 * 1024, stream) - chunk = TableChunk.from_pylibcudf_table( - table, stream, exclusive_view=True, br=context.br() - ) - msg = Message(msg_idx, chunk) - mid = sm.insert(msg) - message_ids[buffer_idx].append(mid) - - # Register spill function - spill_func = make_spill_function(buffers, context) - func_id = context.br().spill_manager.add_spill_function(spill_func, priority=0) - - try: - # Manually trigger spilling of 3MB - # Expected: Buffer 1 (longest) should spill newest messages first - amount_to_spill = 3 * 1024 * 1024 - actual_spilled = context.br().spill_manager.spill(amount_to_spill) - - # Allow some tolerance - assert actual_spilled >= amount_to_spill * 0.95 - - # Verify Buffer 1 (longest queue): newest 3 messages should be spilled - buffer_1_descs = buffers[1].get_content_descriptions() - for i in range(3, 5): # Messages 3, 4 (newest) - mid = message_ids[1][i] - desc = buffer_1_descs[mid] - # Should be in HOST memory (spilled) - assert desc.content_sizes[spilled_host_mem_type] > 0 - assert desc.content_sizes[other_host_mem_type] == 0 - assert desc.content_sizes[MemoryType.DEVICE] == 0 - - # Buffer 1: oldest messages should still be in device - for i in range(2): # Messages 0, 1 (oldest) - mid = message_ids[1][i] - desc = buffer_1_descs[mid] - # Should still be in DEVICE memory - assert desc.content_sizes[MemoryType.DEVICE] > 0 - assert desc.content_sizes[spilled_host_mem_type] == 0 - assert desc.content_sizes[other_host_mem_type] == 0 - - # Buffer 0 (shortest queue): all messages should still be on device - buffer_0_descs = buffers[0].get_content_descriptions() - for mid in message_ids[0]: - desc = buffer_0_descs[mid] - assert desc.content_sizes[MemoryType.DEVICE] > 0 - assert desc.content_sizes[spilled_host_mem_type] == 0 - assert desc.content_sizes[other_host_mem_type] == 0 - - # Verify we can extract and make available a spilled message - spilled_mid = message_ids[1][4] # Newest message from longest queue - spilled_msg = buffers[1].extract(mid=spilled_mid) - - chunk = TableChunk.from_message(spilled_msg, br=context.br()) - assert not chunk.is_available() # Should be on host - - # Make it available should bring it back to device - cost = chunk.make_available_cost() - assert cost > 0 - res, _ = context.br().reserve( - MemoryType.DEVICE, cost, allow_overbooking=True + engine = streaming_engine_factory(StreamingOptions(pinned_memory=pinned_memory)) + context = engine.context + + if spilled_host_mem_type == MemoryType.PINNED_HOST: + assert engine.context.br().pinned_mr is not None + other_host_mem_type = MemoryType.HOST + else: + assert engine.context.br().pinned_mr is None + other_host_mem_type = MemoryType.PINNED_HOST + + # Create 3 spillable message containers simulating fanout buffers + # Buffer 0: Fast consumer (2 messages) + # Buffer 1: Slow consumer (5 messages) <- should spill from here first + # Buffer 2: Medium consumer (3 messages) + buffers = [SpillableMessages(context.br()) for _ in range(3)] + messages_per_buffer = [2, 5, 3] + + # Track message IDs for each buffer + message_ids: dict[int, list[int]] = {} + + # Populate buffers with messages + stream = context.get_stream_from_pool() + for buffer_idx, (sm, count) in enumerate( + zip(buffers, messages_per_buffer, strict=False) + ): + message_ids[buffer_idx] = [] + for msg_idx in range(count): + # Create 1MB messages + table = create_test_table(1024 * 1024, stream) + chunk = TableChunk.from_pylibcudf_table( + table, stream, exclusive_view=True, br=context.br() ) - chunk_available = chunk.make_available(res) - - assert chunk_available.is_available() - # Verify we got a valid table back - assert chunk_available.table_view().num_rows() > 0 - - finally: - context.br().spill_manager.remove_spill_function(func_id) + msg = Message(msg_idx, chunk) + mid = sm.insert(msg) + message_ids[buffer_idx].append(mid) + + # Register spill function + spill_func = make_spill_function(buffers, context) + func_id = context.br().spill_manager.add_spill_function(spill_func, priority=0) + + try: + # Manually trigger spilling of 3MB + # Expected: Buffer 1 (longest) should spill newest messages first + amount_to_spill = 3 * 1024 * 1024 + actual_spilled = context.br().spill_manager.spill(amount_to_spill) + + # Allow some tolerance + assert actual_spilled >= amount_to_spill * 0.95 + + # Verify Buffer 1 (longest queue): newest 3 messages should be spilled + buffer_1_descs = buffers[1].get_content_descriptions() + for i in range(3, 5): # Messages 3, 4 (newest) + mid = message_ids[1][i] + desc = buffer_1_descs[mid] + # Should be in HOST memory (spilled) + assert desc.content_sizes[spilled_host_mem_type] > 0 + assert desc.content_sizes[other_host_mem_type] == 0 + assert desc.content_sizes[MemoryType.DEVICE] == 0 + + # Buffer 1: oldest messages should still be in device + for i in range(2): # Messages 0, 1 (oldest) + mid = message_ids[1][i] + desc = buffer_1_descs[mid] + # Should still be in DEVICE memory + assert desc.content_sizes[MemoryType.DEVICE] > 0 + assert desc.content_sizes[spilled_host_mem_type] == 0 + assert desc.content_sizes[other_host_mem_type] == 0 + + # Buffer 0 (shortest queue): all messages should still be on device + buffer_0_descs = buffers[0].get_content_descriptions() + for mid in message_ids[0]: + desc = buffer_0_descs[mid] + assert desc.content_sizes[MemoryType.DEVICE] > 0 + assert desc.content_sizes[spilled_host_mem_type] == 0 + assert desc.content_sizes[other_host_mem_type] == 0 + + # Verify we can extract and make available a spilled message + spilled_mid = message_ids[1][4] # Newest message from longest queue + spilled_msg = buffers[1].extract(mid=spilled_mid) + + chunk = TableChunk.from_message(spilled_msg, br=context.br()) + assert not chunk.is_available() # Should be on host + + # Make it available should bring it back to device + cost = chunk.make_available_cost() + assert cost > 0 + res, _ = context.br().reserve(MemoryType.DEVICE, cost, allow_overbooking=True) + chunk_available = chunk.make_available(res) + + assert chunk_available.is_available() + # Verify we got a valid table back + assert chunk_available.table_view().num_rows() > 0 + + finally: + context.br().spill_manager.remove_spill_function(func_id) diff --git a/python/cudf_polars/tests/experimental/test_spmd.py b/python/cudf_polars/tests/experimental/test_spmd.py index a1970c8e92f..9fef0e00350 100644 --- a/python/cudf_polars/tests/experimental/test_spmd.py +++ b/python/cudf_polars/tests/experimental/test_spmd.py @@ -30,12 +30,22 @@ pytestmark = pytest.mark.spmd -def test_yields_context_and_engine(spmd_comm: Communicator) -> None: +@pytest.fixture +def comm(spmd_engine: SPMDEngine) -> Communicator: + """Communicator from the shared :class:`SPMDEngine` for local construction. + + Most tests in this module need to construct their own + :class:`SPMDEngine` to exercise lifecycle, construction-time + options, MR-state semantics, or :meth:`SPMDEngine._reset`. + """ + return spmd_engine.comm + + +def test_yields_context_and_engine(spmd_engine: SPMDEngine) -> None: """SPMDEngine has comm and context properties.""" - with SPMDEngine(comm=spmd_comm) as engine: - assert engine.comm is not None - assert engine.context is not None - assert isinstance(engine, pl.GPUEngine) + assert spmd_engine.comm is not None + assert spmd_engine.context is not None + assert isinstance(spmd_engine, pl.GPUEngine) def test_from_options() -> None: @@ -74,31 +84,29 @@ def test_engine_options_reserved_keys() -> None: pass -def test_engine_options_parquet_options(spmd_comm: Communicator) -> None: +def test_engine_options_parquet_options(comm: Communicator) -> None: """engine_options forwards parquet_options to GPUEngine without error.""" - with SPMDEngine(comm=spmd_comm, engine_options={"parquet_options": {}}) as engine: + with SPMDEngine(comm=comm, engine_options={"parquet_options": {}}) as engine: assert isinstance(engine, pl.GPUEngine) -def test_scan(spmd_comm: Communicator) -> None: +def test_scan(spmd_engine: SPMDEngine) -> None: """Each rank scans its own single-row LazyFrame and gets that row back.""" - with SPMDEngine(comm=spmd_comm) as engine: - lf = pl.LazyFrame({"a": [engine.rank], "b": [engine.rank * 10]}) - result = lf.collect(engine=engine) - assert result.shape == (1, 2) - assert result["a"].to_list() == [engine.rank] - assert result["b"].to_list() == [engine.rank * 10] + lf = pl.LazyFrame({"a": [spmd_engine.rank], "b": [spmd_engine.rank * 10]}) + result = lf.collect(engine=spmd_engine) + assert result.shape == (1, 2) + assert result["a"].to_list() == [spmd_engine.rank] + assert result["b"].to_list() == [spmd_engine.rank * 10] -def test_basic_query(spmd_comm: Communicator) -> None: +def test_basic_query(spmd_engine: SPMDEngine) -> None: """A simple in-memory LazyFrame can be collected.""" - with SPMDEngine(comm=spmd_comm) as engine: - result = pl.LazyFrame({"a": [1, 2, 3], "b": [4, 5, 6]}).collect(engine=engine) + result = pl.LazyFrame({"a": [1, 2, 3], "b": [4, 5, 6]}).collect(engine=spmd_engine) assert result.shape == (3, 2) assert result["a"].to_list() == [1, 2, 3] -def test_collect_then_lazy_equivalent(spmd_comm: Communicator) -> None: +def test_collect_then_lazy_equivalent(spmd_engine: SPMDEngine) -> None: """collect().lazy() preserves SPMD semantics: an intermediate materialize is a no-op. In SPMD mode a DataFrame is always rank-local. When it is wrapped back @@ -106,111 +114,105 @@ def test_collect_then_lazy_equivalent(spmd_comm: Communicator) -> None: re-slicing it across ranks. So ``lf.collect().lazy().op.collect()`` must produce the same result as ``lf.op.collect()``. """ - with SPMDEngine(comm=spmd_comm) as engine: - lf = pl.LazyFrame( - {"a": [engine.rank, engine.rank + 1, engine.rank + 2], "b": [0, 1, 2]} - ) + rank = spmd_engine.rank + lf = pl.LazyFrame({"a": [rank, rank + 1, rank + 2], "b": [0, 1, 2]}) - # One-step - one_step = lf.filter(pl.col("b") >= 1).collect(engine=engine) + # One-step + one_step = lf.filter(pl.col("b") >= 1).collect(engine=spmd_engine) - # Two-step: materialize then re-wrap - intermediate = lf.collect(engine=engine) - two_step = intermediate.lazy().filter(pl.col("b") >= 1).collect(engine=engine) + # Two-step: materialize then re-wrap + intermediate = lf.collect(engine=spmd_engine) + two_step = intermediate.lazy().filter(pl.col("b") >= 1).collect(engine=spmd_engine) assert one_step.sort("a").equals(two_step.sort("a")) -def test_group_by(spmd_comm: Communicator) -> None: +def test_group_by(spmd_engine: SPMDEngine) -> None: """Group-by on rank-local data, then allgather to verify the global result.""" - with SPMDEngine(comm=spmd_comm) as engine: - lf = pl.LazyFrame({"a": [engine.rank], "b": [engine.rank * 10]}) - local_result = lf.group_by("a").agg(pl.col("b").sum()).collect(engine=engine) - with reserve_op_id() as op_id: - global_result = allgather_polars_dataframe( - engine=engine, local_df=local_result, op_id=op_id - ) - assert global_result.shape == (engine.nranks, 2) - assert global_result.sort("a")["a"].to_list() == list(range(engine.nranks)) - assert global_result.sort("a")["b"].to_list() == [ - r * 10 for r in range(engine.nranks) - ] + lf = pl.LazyFrame({"a": [spmd_engine.rank], "b": [spmd_engine.rank * 10]}) + local_result = lf.group_by("a").agg(pl.col("b").sum()).collect(engine=spmd_engine) + with reserve_op_id() as op_id: + global_result = allgather_polars_dataframe( + engine=spmd_engine, local_df=local_result, op_id=op_id + ) + assert global_result.shape == (spmd_engine.nranks, 2) + assert global_result.sort("a")["a"].to_list() == list(range(spmd_engine.nranks)) + assert global_result.sort("a")["b"].to_list() == [ + r * 10 for r in range(spmd_engine.nranks) + ] -def test_allgather_polars_dataframe(spmd_comm: Communicator) -> None: +def test_allgather_polars_dataframe(spmd_engine: SPMDEngine) -> None: """allgather_polars_dataframe collects every rank's contribution in rank order.""" - with SPMDEngine(comm=spmd_comm) as engine: - local = pl.DataFrame({"rank": [engine.rank], "val": [engine.rank * 2]}) - with reserve_op_id() as op_id: - result = allgather_polars_dataframe( - engine=engine, local_df=local, op_id=op_id - ) - assert result.shape == (engine.nranks, 2) - assert result["rank"].to_list() == list(range(engine.nranks)) - assert result["val"].to_list() == [r * 2 for r in range(engine.nranks)] + local = pl.DataFrame({"rank": [spmd_engine.rank], "val": [spmd_engine.rank * 2]}) + with reserve_op_id() as op_id: + result = allgather_polars_dataframe( + engine=spmd_engine, local_df=local, op_id=op_id + ) + assert result.shape == (spmd_engine.nranks, 2) + assert result["rank"].to_list() == list(range(spmd_engine.nranks)) + assert result["val"].to_list() == [r * 2 for r in range(spmd_engine.nranks)] -def test_num_py_executors(spmd_comm: Communicator) -> None: +def test_num_py_executors(comm: Communicator) -> None: """executor_options forwards num_py_executors to the thread pool.""" with SPMDEngine( - comm=spmd_comm, + comm=comm, executor_options={"num_py_executors": 2}, ) as engine: result = pl.LazyFrame({"a": [1, 2, 3]}).collect(engine=engine) assert result.shape == (3, 1) -def test_allgather_polars_dataframe_empty(spmd_comm: Communicator) -> None: +def test_allgather_polars_dataframe_empty(spmd_engine: SPMDEngine) -> None: """allgather handles an empty (zero-row) local DataFrame on every rank.""" - with SPMDEngine(comm=spmd_comm) as engine: - local = pl.DataFrame( - {"a": pl.Series([], dtype=pl.Int32), "b": pl.Series([], dtype=pl.Float64)} + local = pl.DataFrame( + {"a": pl.Series([], dtype=pl.Int32), "b": pl.Series([], dtype=pl.Float64)} + ) + with reserve_op_id() as op_id: + result = allgather_polars_dataframe( + engine=spmd_engine, local_df=local, op_id=op_id ) - with reserve_op_id() as op_id: - result = allgather_polars_dataframe( - engine=engine, local_df=local, op_id=op_id - ) assert result.shape == (0, 2) assert result.columns == ["a", "b"] assert result.dtypes == [pl.Int32, pl.Float64] -def test_mr_wrapped_as_current_inside_context(spmd_comm: Communicator) -> None: +def test_mr_wrapped_as_current_inside_context(comm: Communicator) -> None: """Inside SPMDEngine the current device resource is RmmResourceAdaptor.""" - with SPMDEngine(comm=spmd_comm): + with SPMDEngine(comm=comm): assert isinstance(rmm.mr.get_current_device_resource(), RmmResourceAdaptor) -def test_mr_restored_after_context(spmd_comm: Communicator) -> None: +def test_mr_restored_after_context(comm: Communicator) -> None: """After SPMDEngine exits the original device resource is restored.""" original = rmm.mr.get_current_device_resource() - with SPMDEngine(comm=spmd_comm): + with SPMDEngine(comm=comm): pass assert rmm.mr.get_current_device_resource() is original -def test_allgather_polars_dataframe_multi_column(spmd_comm: Communicator) -> None: +def test_allgather_polars_dataframe_multi_column(spmd_engine: SPMDEngine) -> None: """allgather preserves column names, count, and dtypes for multi-column DataFrames.""" - with SPMDEngine(comm=spmd_comm) as engine: - local = pl.DataFrame( - { - "rank": [engine.rank], - "x": [float(engine.rank)], - "label": [f"r{engine.rank}"], - } + local = pl.DataFrame( + { + "rank": [spmd_engine.rank], + "x": [float(spmd_engine.rank)], + "label": [f"r{spmd_engine.rank}"], + } + ) + with reserve_op_id() as op_id: + result = allgather_polars_dataframe( + engine=spmd_engine, local_df=local, op_id=op_id ) - with reserve_op_id() as op_id: - result = allgather_polars_dataframe( - engine=engine, local_df=local, op_id=op_id - ) - assert result.shape == (engine.nranks, 3) - assert result.columns == ["rank", "x", "label"] - sorted_result = result.sort("rank") - assert sorted_result["rank"].to_list() == list(range(engine.nranks)) - assert sorted_result["x"].to_list() == [float(r) for r in range(engine.nranks)] - assert sorted_result["label"].to_list() == [ - f"r{r}" for r in range(engine.nranks) - ] + assert result.shape == (spmd_engine.nranks, 3) + assert result.columns == ["rank", "x", "label"] + sorted_result = result.sort("rank") + assert sorted_result["rank"].to_list() == list(range(spmd_engine.nranks)) + assert sorted_result["x"].to_list() == [float(r) for r in range(spmd_engine.nranks)] + assert sorted_result["label"].to_list() == [ + f"r{r}" for r in range(spmd_engine.nranks) + ] # --------------------------------------------------------------------------- @@ -218,44 +220,44 @@ def test_allgather_polars_dataframe_multi_column(spmd_comm: Communicator) -> Non # --------------------------------------------------------------------------- -def test_comm_argument_reuses_communicator(spmd_comm: Communicator) -> None: +def test_comm_argument_reuses_communicator(comm: Communicator) -> None: """Passing comm= reuses the communicator across two engine lifetimes.""" - with SPMDEngine(comm=spmd_comm) as engine1: + with SPMDEngine(comm=comm) as engine1: nranks = engine1.nranks rank = engine1.rank - # engine1 is shut down; spmd_comm is still alive - with SPMDEngine(comm=spmd_comm) as engine2: + # engine1 is shut down; the shared comm is still alive + with SPMDEngine(comm=comm) as engine2: assert engine2.nranks == nranks assert engine2.rank == rank -def test_comm_not_closed_after_engine_shutdown(spmd_comm: Communicator) -> None: +def test_comm_not_closed_after_engine_shutdown(comm: Communicator) -> None: """The caller-provided comm survives engine.shutdown().""" - with SPMDEngine(comm=spmd_comm): + with SPMDEngine(comm=comm): pass # engine.shutdown() is called on __exit__ - # spmd_comm must still be accessible — not destroyed by engine teardown - assert spmd_comm.rank >= 0 + # comm must still be accessible — not destroyed by engine teardown + assert comm.rank >= 0 -def test_comm_argument_mr_still_wrapped(spmd_comm: Communicator) -> None: +def test_comm_argument_mr_still_wrapped(comm: Communicator) -> None: """MR wrapping still happens even when comm is provided externally.""" - with SPMDEngine(comm=spmd_comm): + with SPMDEngine(comm=comm): assert isinstance(rmm.mr.get_current_device_resource(), RmmResourceAdaptor) -def test_comm_sequential_queries(spmd_comm: Communicator) -> None: +def test_comm_sequential_queries(comm: Communicator) -> None: """Two engines sharing a comm can each execute a query without interference.""" - with SPMDEngine(comm=spmd_comm) as engine: + with SPMDEngine(comm=comm) as engine: r1 = pl.LazyFrame({"a": [1, 2]}).collect(engine=engine) - with SPMDEngine(comm=spmd_comm) as engine: + with SPMDEngine(comm=comm) as engine: r2 = pl.LazyFrame({"a": [3, 4]}).collect(engine=engine) assert r1["a"].to_list() == [1, 2] assert r2["a"].to_list() == [3, 4] -def test_shutdown_idempotent(spmd_comm: Communicator) -> None: +def test_shutdown_idempotent(comm: Communicator) -> None: """Calling shutdown() twice does not raise.""" - engine = SPMDEngine(comm=spmd_comm) + engine = SPMDEngine(comm=comm) engine.shutdown() engine.shutdown() @@ -277,9 +279,9 @@ def test_memory_resource_config() -> None: mock_create.assert_called_once() -def test_comm_and_context_unavailable_after_shutdown(spmd_comm: Communicator) -> None: +def test_comm_and_context_unavailable_after_shutdown(comm: Communicator) -> None: """Accessing comm or context after shutdown raises RuntimeError.""" - engine = SPMDEngine(comm=spmd_comm) + engine = SPMDEngine(comm=comm) engine.shutdown() with pytest.raises(RuntimeError, match="shutdown"): _ = engine.comm @@ -287,8 +289,89 @@ def test_comm_and_context_unavailable_after_shutdown(spmd_comm: Communicator) -> _ = engine.context -def test_run(spmd_comm): - with SPMDEngine(comm=spmd_comm) as engine: - result = engine._run(os.getpid) - +def test_run(spmd_engine: SPMDEngine) -> None: + result = spmd_engine._run(os.getpid) assert result == [os.getpid()] + + +def test_reset_keeps_comm_alive(comm: Communicator) -> None: + """``_reset`` must not rebuild the communicator.""" + with SPMDEngine( + comm=comm, executor_options={"max_rows_per_partition": 10} + ) as engine: + comm_before = engine.comm + engine._reset(executor_options={"max_rows_per_partition": 7}) + # Same Communicator instance — caller-provided comm is preserved. + assert engine.comm is comm_before + # Engine still drives a real query. + result = pl.LazyFrame({"a": [1, 2, 3]}).collect(engine=engine) + assert sorted(result["a"].to_list()) == [1, 2, 3] + + +def test_reset_updates_executor_options(comm: Communicator) -> None: + """``_reset`` updates the polars-layer config to the new options.""" + from cudf_polars.utils.config import SPMDContext + + with SPMDEngine( + comm=comm, executor_options={"max_rows_per_partition": 10} + ) as engine: + engine._reset(executor_options={"max_rows_per_partition": 42}) + + opts = engine.config["executor_options"] + assert opts["max_rows_per_partition"] == 42 + # Reserved keys are still injected by ``_reset``. + assert opts["runtime"] == "rapidsmpf" + assert opts["cluster"] == "spmd" + assert isinstance(opts["spmd_context"], SPMDContext) + + +def test_reset_collects_after_options_change(comm: Communicator) -> None: + """The engine still drives a real query after ``_reset``.""" + with SPMDEngine( + comm=comm, executor_options={"max_rows_per_partition": 10} + ) as engine: + engine._reset(executor_options={"max_rows_per_partition": 3}) + result = pl.LazyFrame({"a": [1, 2, 3, 4, 5]}).collect(engine=engine) + assert sorted(result["a"].to_list()) == [1, 2, 3, 4, 5] + + +def test_reset_after_shutdown_raises(comm: Communicator) -> None: + """``shutdown`` is idempotent; ``_reset`` after shutdown raises every time.""" + engine = SPMDEngine(comm=comm) + engine.shutdown() + engine.shutdown() # idempotent + with pytest.raises(RuntimeError, match="shut-down"): + engine._reset() + with pytest.raises(RuntimeError, match="shut-down"): + engine._reset() # still raises on a second attempt + engine.shutdown() # still safe after a failed _reset + + +def test_reset_rejects_construction_time_executor_options( + comm: Communicator, +) -> None: + """``_reset`` rejects ``executor_options`` keys read at engine construction.""" + with ( + SPMDEngine(comm=comm) as engine, + pytest.raises(ValueError, match="num_py_executors"), + ): + engine._reset(executor_options={"num_py_executors": 4}) + + +def test_reset_rejects_construction_time_engine_options( + comm: Communicator, +) -> None: + """``_reset`` rejects ``engine_options`` keys read at engine construction.""" + from cudf_polars.experimental.rapidsmpf.frontend.hardware_binding import ( + HardwareBindingPolicy, + ) + + with SPMDEngine(comm=comm) as engine: + with pytest.raises(ValueError, match="hardware_binding"): + engine._reset( + engine_options={ + "hardware_binding": HardwareBindingPolicy(enabled=False), + }, + ) + with pytest.raises(ValueError, match="memory_resource_config"): + engine._reset(engine_options={"memory_resource_config": None}) diff --git a/python/cudf_polars/tests/experimental/test_statistics.py b/python/cudf_polars/tests/experimental/test_statistics.py index 965449b80f0..82c121d5830 100644 --- a/python/cudf_polars/tests/experimental/test_statistics.py +++ b/python/cudf_polars/tests/experimental/test_statistics.py @@ -16,8 +16,6 @@ if TYPE_CHECKING: from collections.abc import Iterator - from rapidsmpf.communicator.communicator import Communicator - from cudf_polars.experimental.rapidsmpf.frontend.core import StreamingEngine # Runs the spmd variant even under rrun with nranks > 1. The ray/dask @@ -30,7 +28,7 @@ @pytest.fixture(params=["spmd", "ray", "dask"]) def engine( request: pytest.FixtureRequest, - spmd_comm: Communicator, + spmd_engine: SPMDEngine, ) -> Iterator[StreamingEngine]: """Yield each supported streaming engine with statistics enabled.""" backend = request.param @@ -39,7 +37,7 @@ def engine( if backend == "spmd": with SPMDEngine( - comm=spmd_comm, + comm=spmd_engine.comm, rapidsmpf_options=rapidsmpf_options, executor_options=executor_options, ) as engine: From 8bdabe74cfd6e7b59ee6ff48b3c48ac3095df71d Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Wed, 6 May 2026 05:30:14 -0500 Subject: [PATCH 16/36] Validate PDS-DS Q1 (#22389) Workaround for sum of nulls discrepancy between SQL and Polars. - Closes https://github.com/rapidsai/cudf/issues/22123 Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/cudf/pull/22389 --- .../experimental/benchmarks/pdsds_queries/q1.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q1.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q1.py index c4b8b7ec740..81fd42ea30e 100644 --- a/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q1.py +++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q1.py @@ -74,7 +74,14 @@ def polars_impl(run_config: RunConfig) -> QueryResult: ) .filter(pl.col("d_year") == year) .group_by(["sr_customer_sk", "sr_store_sk"]) - .agg(pl.col("sr_return_amt").sum().alias("ctr_total_return")) + .agg( + # Polars sum() returns 0 for all-null groups; SQL returns NULL. + # See https://github.com/rapidsai/cudf/issues/19560. + pl.when(pl.col("sr_return_amt").count() > 0) + .then(pl.col("sr_return_amt").sum()) + .otherwise(None) + .alias("ctr_total_return") + ) .rename( { "sr_customer_sk": "ctr_customer_sk", From e304ffdd535ca2f5772ed25c5f80f46ca4f31d01 Mon Sep 17 00:00:00 2001 From: "Richard (Rick) Zamora" Date: Wed, 6 May 2026 07:39:44 -0500 Subject: [PATCH 17/36] Improve hstack lowering (#22353) - This is a follow-up to https://github.com/rapidsai/cudf/pull/21796 - This (hopefully) simplifies some code in https://github.com/rapidsai/cudf/pull/22191 **Problem statement**: We currently translate `HStack` nodes with non-pointwise expressions to the equivalent `Select` node at lowering time. This is because all our non-pointwise `Expr`-decomposition logic is specific to `Select`. Before this PR, this translation was skipped whenever the underlying `HStack` was completely overwriting it's original columns. The problem with this case is that we loose "anchor" columns that tell the `Select` how to broadcast scalar-aggregation results. **Proposed solution**: We add a temporary "anchor" column to the translated `HStack` so that broadcasting works correctly in the `Select` node. **Motivation**: - We can handle all `over()` expression decomposition within `Select` if we know **all** non-pointwise HStack operations are lowered to `Select` anyway. - We don't "fall back" for other non-`over` `HStack` corner cases either. Authors: - Richard (Rick) Zamora (https://github.com/rjzamora) Approvers: - Matthew Murray (https://github.com/Matt711) URL: https://github.com/rapidsai/cudf/pull/22353 --- .../cudf_polars/experimental/parallel.py | 58 ++++++++++++++----- .../cudf_polars/testing/inject_gpu_engine.py | 11 ++++ .../tests/experimental/test_hstack.py | 2 +- 3 files changed, 54 insertions(+), 17 deletions(-) diff --git a/python/cudf_polars/cudf_polars/experimental/parallel.py b/python/cudf_polars/cudf_polars/experimental/parallel.py index 108d7822d60..f77e923bce0 100644 --- a/python/cudf_polars/cudf_polars/experimental/parallel.py +++ b/python/cudf_polars/cudf_polars/experimental/parallel.py @@ -9,6 +9,8 @@ from functools import partial, reduce from typing import TYPE_CHECKING, Any +import polars as pl + import cudf_polars.experimental.distinct import cudf_polars.experimental.groupby import cudf_polars.experimental.io @@ -16,7 +18,8 @@ import cudf_polars.experimental.select import cudf_polars.experimental.shuffle import cudf_polars.experimental.sort # noqa: F401 -from cudf_polars.dsl.expr import Col, NamedExpr +from cudf_polars.containers import DataType +from cudf_polars.dsl.expr import Col, Literal, NamedExpr from cudf_polars.dsl.ir import ( IR, Cache, @@ -25,11 +28,13 @@ HStack, IRExecutionContext, MapFunction, + Projection, Select, Slice, Union, ) from cudf_polars.dsl.traversal import CachingVisitor, traversal +from cudf_polars.dsl.utils.naming import unique_names from cudf_polars.experimental.base import PartitionInfo, get_key_name from cudf_polars.experimental.dispatch import ( generate_ir_tasks, @@ -49,8 +54,6 @@ from collections.abc import MutableMapping from typing import Any - import polars as pl - from cudf_polars.experimental.base import StatsCollector from cudf_polars.experimental.dispatch import LowerIRTransformer, State from cudf_polars.utils.config import ConfigOptions, StreamingExecutor @@ -397,6 +400,20 @@ def _( ) +def _add_anchor_column(ir: HStack) -> tuple[HStack, str, DataType]: + """Add temporary anchor column to preserve row count.""" + anchor_name = next(unique_names((*ir.schema, *ir.children[0].schema))) + anchor_dtype = DataType(pl.datatypes.Int8()) + anchor_named_expr = NamedExpr(anchor_name, Literal(anchor_dtype, 0)) + new_ir = HStack( + ir.children[0].schema | {anchor_name: anchor_dtype}, + (anchor_named_expr,), + True, # noqa: FBT003 + ir.children[0], + ) + return new_ir, anchor_name, anchor_dtype + + @lower_ir_node.register(HStack) def _( ir: HStack, rec: LowerIRTransformer @@ -404,20 +421,29 @@ def _( if not all(e.is_pointwise for e in traversal([ne.value for ne in ir.columns])): # Redirect non-pointwise HStack to Select so the Select handler can # attempt decomposition (or fall back gracefully via decompose_select). + child: IR = ir.children[0] + anchor_name: str | None = None col_map = {ne.name: ne for ne in ir.columns} - has_passthrough = any(name not in col_map for name in ir.schema) - if has_passthrough or not ir.should_broadcast: - exprs = tuple( - col_map[name] if name in col_map else NamedExpr(name, Col(dtype, name)) - for name, dtype in ir.schema.items() - ) - return lower_ir_node( - Select(ir.schema, exprs, ir.should_broadcast, ir.children[0]), - rec, - ) - # All output columns are aggregations: no N-row passthrough to anchor - # broadcast. Fall back so HStack.do_evaluate uses target_length=child.num_rows. - return _lower_ir_fallback(ir, rec) + schema = ir.schema + if ir.should_broadcast and all(name in col_map for name in ir.schema): + # We need to add a temporary anchor column to preserve row count. + child, anchor_name, anchor_dtype = _add_anchor_column(ir) + + schema = ir.schema | {anchor_name: anchor_dtype} + exprs = tuple( + col_map[name] if name in col_map else NamedExpr(name, Col(dtype, name)) + for name, dtype in schema.items() + ) + new_ir: Select | Projection = Select(schema, exprs, ir.should_broadcast, child) + if anchor_name is not None: + # Need to drop the temporary anchor column. + schema = { + name: dtype + for name, dtype in new_ir.schema.items() + if name != anchor_name + } + new_ir = Projection(schema, new_ir) + return lower_ir_node(new_ir, rec) child, partition_info = rec(ir.children[0]) new_node = ir.reconstruct([child]) diff --git a/python/cudf_polars/cudf_polars/testing/inject_gpu_engine.py b/python/cudf_polars/cudf_polars/testing/inject_gpu_engine.py index 1cad7acec22..6fe2de4d154 100644 --- a/python/cudf_polars/cudf_polars/testing/inject_gpu_engine.py +++ b/python/cudf_polars/cudf_polars/testing/inject_gpu_engine.py @@ -334,9 +334,20 @@ def pytest_report_header(config: pytest.Config) -> str: "tests/unit/io/test_scan.py::test_scan_with_row_index_projected_out[glob-parquet-sync]": "Too slow with --inject-gpu-engine-blocksize=small", "tests/unit/io/test_scan.py::test_scan_with_row_index_projected_out[single-parquet-async]": "Too slow with --inject-gpu-engine-blocksize=small", "tests/unit/io/test_scan.py::test_scan_with_row_index_projected_out[single-parquet-sync]": "Too slow with --inject-gpu-engine-blocksize=small", + "tests/unit/lazyframe/test_order_observability.py::test_with_columns_sensitivity[exprs0-True-None]": "Too slow with --inject-gpu-engine-blocksize=small", + "tests/unit/lazyframe/test_order_observability.py::test_with_columns_sensitivity[exprs1-True-None]": "Too slow with --inject-gpu-engine-blocksize=small", "tests/unit/lazyframe/test_order_observability.py::test_with_columns_sensitivity[exprs2-True-unordered_columns2]": "Too slow with --inject-gpu-engine-blocksize=small", "tests/unit/lazyframe/test_order_observability.py::test_with_columns_sensitivity[exprs3-True-None]": "Too slow with --inject-gpu-engine-blocksize=small", + "tests/unit/lazyframe/test_order_observability.py::test_with_columns_sensitivity[exprs4-True-None]": "Too slow with --inject-gpu-engine-blocksize=small", + "tests/unit/lazyframe/test_order_observability.py::test_with_columns_sensitivity[exprs5-True-unordered_columns5]": "Too slow with --inject-gpu-engine-blocksize=small", + "tests/unit/lazyframe/test_order_observability.py::test_with_columns_sensitivity[exprs6-False-unordered_columns6]": "Too slow with --inject-gpu-engine-blocksize=small", + "tests/unit/lazyframe/test_order_observability.py::test_with_columns_sensitivity[exprs7-False-None]": "Too slow with --inject-gpu-engine-blocksize=small", + "tests/unit/lazyframe/test_order_observability.py::test_with_columns_sensitivity[exprs8-False-None]": "Too slow with --inject-gpu-engine-blocksize=small", "tests/unit/lazyframe/test_order_observability.py::test_with_columns_sensitivity[exprs9-True-unordered_columns9]": "Too slow with --inject-gpu-engine-blocksize=small", + "tests/unit/lazyframe/test_order_observability.py::test_with_columns_sensitivity[exprs10-True-unordered_columns10]": "Too slow with --inject-gpu-engine-blocksize=small", + "tests/unit/lazyframe/test_order_observability.py::test_with_columns_sensitivity[exprs11-False-unordered_columns11]": "Too slow with --inject-gpu-engine-blocksize=small", + "tests/unit/lazyframe/test_order_observability.py::test_with_columns_sensitivity[exprs12-False-None]": "Too slow with --inject-gpu-engine-blocksize=small", + "tests/unit/lazyframe/test_order_observability.py::test_with_columns_sensitivity[exprs13-False-None]": "Too slow with --inject-gpu-engine-blocksize=small", "tests/unit/lazyframe/test_optimizations.py::test_collapse_joins_combinations": "Too slow for CI", "tests/unit/operations/test_slice.py::test_slice_slice_pushdown": "Too slow with --inject-gpu-engine-blocksize=small", "tests/unit/operations/test_group_by.py::test_group_by_first_last_big[Int32-10432-False]": "Too slow with --inject-gpu-engine-blocksize=small", diff --git a/python/cudf_polars/tests/experimental/test_hstack.py b/python/cudf_polars/tests/experimental/test_hstack.py index 17dede9dddc..9bbb4b7aa33 100644 --- a/python/cudf_polars/tests/experimental/test_hstack.py +++ b/python/cudf_polars/tests/experimental/test_hstack.py @@ -105,7 +105,7 @@ def test_hstack_non_pointwise_redirect_covers_parallel_hstack_handler(engine): def test_with_columns_scalar_upstream_20981(engine): # Based on upstream-Polars unit test. - lf = pl.LazyFrame({"a": [1.0, 2.0, 3.0]}) + lf = pl.LazyFrame({"a": [1.0, 2.0, 3.0, 4.0, 5.0]}) q = lf.with_columns(pl.col.a.mean()) assert_gpu_result_equal(q, engine=engine) From 9edc7dcaa118e151fcaebdb17010d446d53f99b3 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Wed, 6 May 2026 09:12:57 -0400 Subject: [PATCH 18/36] Replace `LD_PRELOAD` hack with compute-sanitizer (#22290) We were previously swapping out cudart symbols using `LD_PRELOAD` and `dlsym()`. Adopt a more robust approach that uses the compute-sanitizer library (https://docs.nvidia.com/compute-sanitizer/) instead. This will also allow us to switch to static cudart, contributing to https://github.com/rapidsai/build-planning/issues/235. This also vendors `FindCUDAToolkit.cmake` from CMake as of a83b2de6. Authors: - Kyle Edwards (https://github.com/KyleFromNVIDIA) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/22290 --- .github/workflows/pr.yaml | 50 - .github/workflows/test.yaml | 45 - .pre-commit-config.yaml | 12 +- ci/build_streams.sh | 50 - ci/test_streams.sh | 35 - conda/recipes/libcudf/recipe.yaml | 15 +- cpp/CMakeLists.txt | 16 +- cpp/cmake/Modules/FindCUDAToolkit.cmake | 1567 +++++++++++++++++ cpp/tests/utilities/identify_stream_usage.cpp | 340 ++-- dependencies.yaml | 8 +- python/pylibcudf/tests/conftest.py | 15 +- .../pylibcudf/tests/test_column_from_array.py | 23 +- python/pylibcudf/tests/test_interop.py | 5 +- python/pylibcudf/tests/test_reshape.py | 9 +- 14 files changed, 1752 insertions(+), 438 deletions(-) delete mode 100755 ci/build_streams.sh delete mode 100755 ci/test_streams.sh create mode 100644 cpp/cmake/Modules/FindCUDAToolkit.cmake diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 4705a1e10c7..c20f7f7ea79 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -42,10 +42,6 @@ jobs: # - narwhals-tests - telemetry-setup - third-party-integration-tests-cudf-pandas - - streams-build-matrix - - streams-build - - streams-test-matrix - - streams-test secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@main if: always() @@ -614,52 +610,6 @@ jobs: needs: changed-files uses: ./.github/workflows/spark-rapids-jni.yaml if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_java - streams-build-matrix: - needs: [checks, changed-files] - uses: rapidsai/shared-workflows/.github/workflows/compute-matrix.yaml@main - if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp - with: - build_type: pull-request - matrix_name: conda-cpp-build - matrix_filter: map(select(.ARCH == "amd64")) - streams-build: - needs: streams-build-matrix - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@main - strategy: - fail-fast: false - matrix: ${{ fromJSON(needs.streams-build-matrix.outputs.matrix) }} - with: - build_type: pull-request - branch: ${{ inputs.branch }} - date: ${{ inputs.date }} - sha: ${{ inputs.sha }} - node_type: cpu8 - container_image: "rapidsai/ci-conda:26.06-cuda${{ matrix.CUDA_VER }}-${{ matrix.LINUX_VER }}-py${{ matrix.PY_VER }}" - script: ci/build_streams.sh - artifact-name: stream_tests_${{ matrix.CUDA_VER }} - file_to_upload: cpp/install - streams-test-matrix: - needs: streams-build - uses: rapidsai/shared-workflows/.github/workflows/compute-matrix.yaml@main - with: - build_type: pull-request - matrix_name: conda-cpp-tests - # This selects "ARCH=amd64 + the latest supported Python + CUDA". - matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) - streams-test: - needs: streams-test-matrix - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@main - strategy: - fail-fast: false - matrix: ${{ fromJSON(needs.streams-test-matrix.outputs.matrix) }} - with: - build_type: pull-request - branch: ${{ inputs.branch }} - date: ${{ inputs.date }} - sha: ${{ inputs.sha }} - node_type: gpu-l4-latest-1 - container_image: "rapidsai/ci-conda:26.06-cuda${{ matrix.CUDA_VER }}-${{ matrix.LINUX_VER }}-py${{ matrix.PY_VER }}" - script: ci/test_streams.sh stream_tests_${{ matrix.CUDA_VER }} telemetry-summarize: # This job must use a self-hosted runner to record telemetry traces. diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 385acf09227..95439e65744 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -186,48 +186,3 @@ jobs: node_type: "gpu-l4-latest-1" container_image: "rapidsai/ci-conda:26.06-latest" script: ci/test_narwhals.sh - streams-build-matrix: - uses: rapidsai/shared-workflows/.github/workflows/compute-matrix.yaml@main - with: - build_type: ${{ inputs.build_type || 'branch' }} - matrix_name: conda-cpp-build - matrix_filter: map(select(.ARCH == "amd64")) - streams-build: - needs: streams-build-matrix - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@main - strategy: - fail-fast: false - matrix: ${{ fromJSON(needs.streams-build-matrix.outputs.matrix) }} - with: - build_type: ${{ inputs.build_type || 'branch' }} - branch: ${{ inputs.branch }} - date: ${{ inputs.date }} - sha: ${{ inputs.sha }} - node_type: cpu8 - container_image: "rapidsai/ci-conda:26.06-cuda${{ matrix.CUDA_VER }}-${{ matrix.LINUX_VER }}-py${{ matrix.PY_VER }}" - script: ci/build_streams.sh - artifact-name: stream_tests_${{ matrix.CUDA_VER }} - file_to_upload: cpp/install - streams-test-matrix: - needs: streams-build - uses: rapidsai/shared-workflows/.github/workflows/compute-matrix.yaml@main - if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp - with: - build_type: pull-request - matrix_name: conda-cpp-tests - # This selects "ARCH=amd64 + the latest supported Python + CUDA". - matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) - streams-test: - needs: streams-test-matrix - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@main - strategy: - fail-fast: false - matrix: ${{ fromJSON(needs.streams-test-matrix.outputs.matrix) }} - with: - build_type: ${{ inputs.build_type || 'branch' }} - branch: ${{ inputs.branch }} - date: ${{ inputs.date }} - sha: ${{ inputs.sha }} - node_type: gpu-l4-latest-1 - container_image: "rapidsai/ci-conda:26.06-cuda${{ matrix.CUDA_VER }}-${{ matrix.LINUX_VER }}-py${{ matrix.PY_VER }}" - script: RAPIDS_BUILD_WORKFLOW_NAME=test.yaml ci/test_streams.sh stream_tests_${{ matrix.CUDA_VER }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a420fde44b0..1fb05425bd3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -146,6 +146,10 @@ repos: entry: ./cpp/scripts/run-cmake-format.sh cmake-format language: python types: [cmake] + # TODO: Remove FindCUDAToolkit once we require CMake 4.4 + exclude: | + (?x) + ^cpp/cmake/Modules/FindCUDAToolkit[.]cmake$ # Note that pre-commit autoupdate does not update the versions # of dependencies, so we'll have to update this manually. additional_dependencies: @@ -157,6 +161,10 @@ repos: entry: ./cpp/scripts/run-cmake-format.sh cmake-lint language: python types: [cmake] + # TODO: Remove FindCUDAToolkit once we require CMake 4.4 + exclude: | + (?x) + ^cpp/cmake/Modules/FindCUDAToolkit[.]cmake$ # Note that pre-commit autoupdate does not update the versions # of dependencies, so we'll have to update this manually. additional_dependencies: @@ -213,6 +221,7 @@ repos: pytest[.]ini$| ^[.]pre-commit-config[.]yaml$| Makefile$ + # TODO: Remove FindCUDAToolkit once we require CMake 4.4 exclude: | (?x)^( cpp/include/cudf_test/cxxopts[.]hpp$| @@ -226,7 +235,8 @@ repos: cpp/src/io/comp/unbz2[.]hpp$| cpp/src/io/comp/gpuinflate[.]cu$| cpp/src/io/utilities/base64_utilities[.]cpp$| - cpp/src/io/utilities/base64_utilities[.]hpp$ + cpp/src/io/utilities/base64_utilities[.]hpp$| + cpp/cmake/Modules/FindCUDAToolkit[.]cmake$ ) - id: verify-copyright name: verify-copyright-brotli diff --git a/ci/build_streams.sh b/ci/build_streams.sh deleted file mode 100755 index 46eaf2f1385..00000000000 --- a/ci/build_streams.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. -# SPDX-License-Identifier: Apache-2.0 - -set -euo pipefail - -rapids-logger "Create test conda environment" -. /opt/conda/etc/profile.d/conda.sh - -rapids-logger "Generate C++ testing dependencies" - -ENV_YAML_DIR="$(mktemp -d)" - -rapids-dependency-file-generator \ - --output conda \ - --file-key stream_tests \ - --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch)" | tee "${ENV_YAML_DIR}/env.yaml" - -rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n stream_tests - -# Temporarily allow unbound variables for conda activation. -set +u -conda activate stream_tests -set -u - -RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}" -export RAPIDS_CUDA_MAJOR - -source rapids-configure-sccache - -SCCACHE_S3_KEY_PREFIX="cudf-streams/$(arch)/cuda${RAPIDS_CUDA_MAJOR}/objects-cache" -SCCACHE_S3_PREPROCESSOR_CACHE_KEY_PREFIX="cudf-streams/$(arch)/cuda${RAPIDS_CUDA_MAJOR}/preprocessor-cache" -SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE=true -export SCCACHE_S3_KEY_PREFIX SCCACHE_S3_PREPROCESSOR_CACHE_KEY_PREFIX SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE - -rapids-print-env - -rapids-logger "Run C++ build" - -cmake -S cpp -B cpp/build -GNinja \ - -DCUDA_STATIC_RUNTIME=OFF \ - -DCUDF_BUILD_STREAMS_TEST_UTIL=ON \ - -DBUILD_SHARED_LIBS=ON -mkdir cpp/install -cmake --build cpp/build "-j${PARALLEL_LEVEL}" -cmake --install cpp/build --prefix cpp/install -cmake --install cpp/build --prefix cpp/install --component testing - -sccache --show-adv-stats -sccache --stop-server >/dev/null 2>&1 || true diff --git a/ci/test_streams.sh b/ci/test_streams.sh deleted file mode 100755 index e325dd1d049..00000000000 --- a/ci/test_streams.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. -# SPDX-License-Identifier: Apache-2.0 - -set -euo pipefail - -readonly artifact_name="$1" - -rapids-logger "Create test conda environment" -. /opt/conda/etc/profile.d/conda.sh - -rapids-logger "Download stream test artifacts" -STREAM_TESTS="$(rapids-download-from-github "$artifact_name")" - -rapids-logger "Generate C++ testing dependencies" - -ENV_YAML_DIR="$(mktemp -d)" - -rapids-dependency-file-generator \ - --output conda \ - --file-key stream_tests \ - --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch)" | tee "${ENV_YAML_DIR}/env.yaml" - -rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n stream_tests - -# Temporarily allow unbound variables for conda activation. -set +u -conda activate stream_tests -set -u - -rapids-print-env - -rapids-logger "Run C++ tests" - -ctest --test-dir "${STREAM_TESTS}/bin/gtests/libcudf" --output-on-failure diff --git a/conda/recipes/libcudf/recipe.yaml b/conda/recipes/libcudf/recipe.yaml index 67f921d9a67..c2e3fff245b 100644 --- a/conda/recipes/libcudf/recipe.yaml +++ b/conda/recipes/libcudf/recipe.yaml @@ -32,10 +32,15 @@ cache: cudf_ROOT="$(realpath ./cpp/build)" export cudf_ROOT + cmake_args= + for arg in $CMAKE_ARGS; do + cmake_args="$cmake_args \"$arg\"" + done + ./build.sh -n -v \ libcudf libcudf_kafka benchmarks tests \ --build_metrics --incl_cache_stats --allgpuarch \ - --cmake-args=\"-DCUDF_ENABLE_ARROW_S3=ON\" + --cmake-args="\"-DCUDF_ENABLE_ARROW_S3=ON\" $cmake_args" secrets: - AWS_ACCESS_KEY_ID - AWS_SECRET_ACCESS_KEY @@ -79,8 +84,10 @@ cache: host: - librmm =${{ minor_version }} - libkvikio =${{ minor_version }} + - cuda-cudart-dev - cuda-nvrtc-dev - cuda-nvtx-dev + - cuda-sanitizer-api - libcurand-dev - libnvjitlink-dev - if: linux and x86_64 @@ -119,6 +126,7 @@ outputs: - ${{ compiler("c") }} host: - cuda-version =${{ cuda_version }} + - cuda-sanitizer-api - libkvikio =${{ minor_version }} - librmm =${{ minor_version }} - libnvcomp-dev ${{ nvcomp_version }} @@ -129,6 +137,7 @@ outputs: run: - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }} - cuda-nvrtc + - cuda-sanitizer-api - if: linux and x86_64 then: - libcufile @@ -148,6 +157,7 @@ outputs: - cuda-cudart - cuda-nvrtc - cuda-nvtx + - cuda-sanitizer-api - cuda-version - flatbuffers - libcufile @@ -306,11 +316,13 @@ outputs: - cuda-version =${{ cuda_version }} - libcurand-dev - cuda-cudart-dev + - cuda-sanitizer-api run: - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }} - ${{ pin_subpackage("libcudf", exact=True) }} - ${{ pin_subpackage("libcudf_kafka", exact=True) }} - libcurand + - cuda-sanitizer-api ignore_run_exports: from_package: - libcurand-dev @@ -318,6 +330,7 @@ outputs: - cuda-cudart - cuda-nvrtc - cuda-nvtx + - cuda-sanitizer-api - cuda-version - flatbuffers - libcudf diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 5a0b2f95e83..c2485171c71 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -63,7 +63,7 @@ option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF) set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL ON) -if(CUDA_STATIC_RUNTIME OR NOT BUILD_SHARED_LIBS) +if(NOT BUILD_SHARED_LIBS) set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL OFF) endif() @@ -1137,13 +1137,6 @@ endif() # * build cudf_identify_stream_usage -------------------------------------------------------------- if(CUDF_BUILD_STREAMS_TEST_UTIL) - if(CUDA_STATIC_RUNTIME) - message( - FATAL_ERROR - "Stream identification cannot be used with a static CUDA runtime. Please set CUDA_STATIC_RUNTIME=OFF or CUDF_BUILD_STREAMS_TEST_UTIL=OFF." - ) - endif() - # Libraries for stream-related testing. We build the library twice, one with STREAM_MODE_TESTING # on and one with it set to off. Each test will then be configured to use the appropriate library # depending via ctest and whether it has been updated to expose public stream APIs. @@ -1157,6 +1150,9 @@ if(CUDF_BUILD_STREAMS_TEST_UTIL) ) endif() + set(sanitizer_relative_genex + "$,$>" + ) set_target_properties( ${_tgt} PROPERTIES # set target compile options @@ -1164,13 +1160,13 @@ if(CUDF_BUILD_STREAMS_TEST_UTIL) CXX_STANDARD_REQUIRED ON POSITION_INDEPENDENT_CODE ON BUILD_RPATH "\$ORIGIN" - INSTALL_RPATH "\$ORIGIN" + INSTALL_RPATH "\$ORIGIN;\$ORIGIN/${sanitizer_relative_genex}" ) target_compile_options( ${_tgt} PRIVATE "$:${CUDF_CXX_FLAGS}>>" ) target_include_directories(${_tgt} PRIVATE "$") - target_link_libraries(${_tgt} PUBLIC CUDA::cudart rmm::rmm) + target_link_libraries(${_tgt} PUBLIC CUDA::cudart rmm::rmm CUDA::sanitizer) rapids_cuda_set_runtime(${_tgt} USE_STATIC ${CUDA_STATIC_RUNTIME}) add_library(cudf::${_tgt} ALIAS ${_tgt}) diff --git a/cpp/cmake/Modules/FindCUDAToolkit.cmake b/cpp/cmake/Modules/FindCUDAToolkit.cmake new file mode 100644 index 00000000000..63b9baad90a --- /dev/null +++ b/cpp/cmake/Modules/FindCUDAToolkit.cmake @@ -0,0 +1,1567 @@ +# SPDX-FileCopyrightText: Copyright 2000-2026 Kitware, Inc. and Contributors +# SPDX-License-Identifier: BSD-3-Clause + +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file LICENSE.rst or https://cmake.org/licensing for details. + +#[=======================================================================[.rst: +FindCUDAToolkit +--------------- + +.. versionadded:: 3.17 + +Finds the NVIDIA CUDA toolkit and the associated libraries, but does not +require the ``CUDA`` language be enabled for a given project: + +.. code-block:: cmake + + find_package(CUDAToolkit [] [QUIET] [REQUIRED] [EXACT] [...]) + +This module does not search for the NVIDIA CUDA Samples. + +.. versionadded:: 3.19 + QNX support. + +Search Behavior +^^^^^^^^^^^^^^^ + +The CUDA Toolkit search behavior uses the following order: + +1. If the ``CUDA`` language has been enabled we will use the directory + containing the compiler as the first search location for ``nvcc``. + +2. If the variable :variable:`CMAKE_CUDA_COMPILER _COMPILER>` or + the environment variable :envvar:`CUDACXX` is defined, it will be used + as the path to the ``nvcc`` executable. + +3. If the ``CUDAToolkit_ROOT`` cmake configuration variable (e.g., + ``-DCUDAToolkit_ROOT=/some/path``) *or* environment variable is defined, it + will be searched. If both an environment variable **and** a + configuration variable are specified, the *configuration* variable takes + precedence. + + The directory specified here must be such that the executable ``nvcc`` or + the appropriate ``version.txt`` or ``version.json`` file can be found + underneath the specified directory. + +4. If the CUDA_PATH environment variable is defined, it will be searched + for ``nvcc``. + +5. The user's path is searched for ``nvcc`` using :command:`find_program`. If + this is found, no subsequent search attempts are performed. Users are + responsible for ensuring that the first ``nvcc`` to show up in the path is + the desired path in the event that multiple CUDA Toolkits are installed. + +6. On Unix systems, if the symbolic link ``/usr/local/cuda`` exists, this is + used. No subsequent search attempts are performed. No default symbolic link + location exists for the Windows platform. + +7. The platform specific default install locations are searched. If exactly one + candidate is found, this is used. The default CUDA Toolkit install locations + searched are: + + +-------------+-------------------------------------------------------------+ + | Platform | Search Pattern | + +=============+=============================================================+ + | macOS | ``/Developer/NVIDIA/CUDA-X.Y`` | + +-------------+-------------------------------------------------------------+ + | Other Unix | ``/usr/local/cuda-X.Y`` | + +-------------+-------------------------------------------------------------+ + | Windows | ``C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y`` | + +-------------+-------------------------------------------------------------+ + + Where ``X.Y`` would be a specific version of the CUDA Toolkit, such as + ``/usr/local/cuda-9.0`` or + ``C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0`` + + .. note:: + + When multiple CUDA Toolkits are installed in the default location of a + system (e.g., both ``/usr/local/cuda-9.0`` and ``/usr/local/cuda-10.0`` + exist but the ``/usr/local/cuda`` symbolic link does **not** exist), this + package is marked as **not** found. + + There are too many factors involved in making an automatic decision in + the presence of multiple CUDA Toolkits being installed. In this + situation, users are encouraged to either (1) set ``CUDAToolkit_ROOT`` or + (2) ensure that the correct ``nvcc`` executable shows up in ``$PATH`` for + :command:`find_program` to find. + +Arguments +^^^^^^^^^ + +``[]`` + The ``[]`` argument requests a version with which the package found + should be compatible. See :ref:`find_package version format ` + for more details. + +Options +^^^^^^^ + +``REQUIRED`` + If specified, configuration will error if a suitable CUDA Toolkit is not + found. + +``QUIET`` + If specified, the search for a suitable CUDA Toolkit will not produce any + messages. + +``EXACT`` + If specified, the CUDA Toolkit is considered found only if the exact + ``VERSION`` specified is recovered. + +Imported Targets +^^^^^^^^^^^^^^^^ + +An :ref:`imported target ` named ``CUDA::toolkit`` is provided. + +This module provides :ref:`Imported Targets` for each +of the following libraries that are part of the CUDAToolkit: + +- `CUDA Runtime Library`_ +- `CUDA Driver Library`_ +- `cuBLAS`_ +- `cuDLA`_ +- `cuFile`_ +- `cuFFT`_ +- `cuRAND`_ +- `cuSOLVER`_ +- `cuSPARSE`_ +- `cuPTI`_ +- `NPP`_ +- `nvBLAS`_ +- `nvGRAPH`_ +- `nvJPEG`_ +- `nvidia-ML`_ +- `nvPTX Compiler`_ +- `nvRTC`_ +- `nvJitLink`_ +- `nvFatBin`_ +- `nvToolsExt`_ +- `nvtx3`_ +- `OpenCL`_ +- `cuLIBOS`_ + +CUDA Runtime Library +"""""""""""""""""""" + +The CUDA Runtime library (cudart) are what most applications will typically +need to link against to make any calls such as `cudaMalloc`, and `cudaFree`. + +Targets Created: + +- ``CUDA::cudart`` +- ``CUDA::cudart_static`` + +CUDA Driver Library +"""""""""""""""""""" + +The CUDA Driver library (cuda) are used by applications that use calls +such as `cuMemAlloc`, and `cuMemFree`. + +Targets Created: + +- ``CUDA::cuda_driver`` + +cuBLAS +"""""" + +The `CUDA Basic Linear Algebra Subroutine`_ library. + +Targets Created: + +- ``CUDA::cublas`` +- ``CUDA::cublas_static`` +- ``CUDA::cublasLt`` starting in CUDA 10.1 +- ``CUDA::cublasLt_static`` starting in CUDA 10.1 + +.. _`CUDA Basic Linear Algebra Subroutine`: https://docs.nvidia.com/cuda/cublas + +cuDLA +"""""" + +.. versionadded:: 3.27 + +The `NVIDIA Tegra Deep Learning Accelerator`_ library. + +Targets Created: + +- ``CUDA::cudla`` starting in CUDA 11.6 + +.. _`NVIDIA Tegra Deep Learning Accelerator`: https://docs.nvidia.com/cuda/cuda-for-tegra-appnote#cudla + +cuFile +"""""" + +.. versionadded:: 3.25 + +The `NVIDIA GPUDirect Storage cuFile`_ library. + +Targets Created: + +- ``CUDA::cuFile`` starting in CUDA 11.4 +- ``CUDA::cuFile_static`` starting in CUDA 11.4 +- ``CUDA::cuFile_rdma`` starting in CUDA 11.4 +- ``CUDA::cuFile_rdma_static`` starting in CUDA 11.4 + +.. _`NVIDIA GPUDirect Storage cuFile`: https://docs.nvidia.com/gpudirect-storage/api-reference-guide + +cuFFT +""""" + +The `CUDA Fast Fourier Transform`_ library. + +Targets Created: + +- ``CUDA::cufft`` +- ``CUDA::cufftw`` +- ``CUDA::cufft_static`` +- ``CUDA::cufft_static_nocallback`` starting in CUDA 9.2, requires CMake 3.23+ +- ``CUDA::cufftw_static`` + +.. _`CUDA Fast Fourier Transform`: https://docs.nvidia.com/cuda/cufft + +cuRAND +"""""" + +The `CUDA random number generation`_ library. + +Targets Created: + +- ``CUDA::curand`` +- ``CUDA::curand_static`` + +.. _`CUDA random number generation`: https://docs.nvidia.com/cuda/curand + +cuSOLVER +"""""""" + +A `GPU accelerated linear system solver`_ library. + +Targets Created: + +- ``CUDA::cusolver`` +- ``CUDA::cusolver_static`` + +.. _`GPU accelerated linear system solver`: https://docs.nvidia.com/cuda/cusolver + +cuSPARSE +"""""""" + +The `CUDA sparse matrix`_ library. + +Targets Created: + +- ``CUDA::cusparse`` +- ``CUDA::cusparse_static`` + +.. _`CUDA sparse matrix`: https://docs.nvidia.com/cuda/cusparse + +cupti +""""" + +The `NVIDIA CUDA Profiling Tools Interface`_. + +Targets Created: + +- ``CUDA::cupti`` +- ``CUDA::cupti_static`` + +.. versionadded:: 3.27 + + - ``CUDA::nvperf_host`` starting in CUDA 10.2 + - ``CUDA::nvperf_host_static`` starting in CUDA 10.2 + - ``CUDA::nvperf_target`` starting in CUDA 10.2 + - ``CUDA::pcsamplingutil`` starting in CUDA 11.3 + +.. _`NVIDIA CUDA Profiling Tools Interface`: https://developer.nvidia.com/cupti + +NPP +""" + +The `NVIDIA 2D Image and Signal Processing Performance Primitives`_ libraries. + +Targets Created: + +- `nppc`: + + - ``CUDA::nppc`` + - ``CUDA::nppc_static`` + +- `nppial`: Arithmetic and logical operation functions in `nppi_arithmetic_and_logical_operations.h` + + - ``CUDA::nppial`` + - ``CUDA::nppial_static`` + +- `nppicc`: Color conversion and sampling functions in `nppi_color_conversion.h` + + - ``CUDA::nppicc`` + - ``CUDA::nppicc_static`` + +- `nppicom`: JPEG compression and decompression functions in `nppi_compression_functions.h` + Removed starting in CUDA 11.0, use `nvJPEG`_ instead. + + - ``CUDA::nppicom`` + - ``CUDA::nppicom_static`` + +- `nppidei`: Data exchange and initialization functions in `nppi_data_exchange_and_initialization.h` + + - ``CUDA::nppidei`` + - ``CUDA::nppidei_static`` + +- `nppif`: Filtering and computer vision functions in `nppi_filter_functions.h` + + - ``CUDA::nppif`` + - ``CUDA::nppif_static`` + +- `nppig`: Geometry transformation functions found in `nppi_geometry_transforms.h` + + - ``CUDA::nppig`` + - ``CUDA::nppig_static`` + +- `nppim`: Morphological operation functions found in `nppi_morphological_operations.h` + + - ``CUDA::nppim`` + - ``CUDA::nppim_static`` + +- `nppist`: Statistics and linear transform in `nppi_statistics_functions.h` and `nppi_linear_transforms.h` + + - ``CUDA::nppist`` + - ``CUDA::nppist_static`` + +- `nppisu`: Memory support functions in `nppi_support_functions.h` + + - ``CUDA::nppisu`` + - ``CUDA::nppisu_static`` + +- `nppitc`: Threshold and compare operation functions in `nppi_threshold_and_compare_operations.h` + + - ``CUDA::nppitc`` + - ``CUDA::nppitc_static`` + +- `npps`: + + - ``CUDA::npps`` + - ``CUDA::npps_static`` + +.. _`NVIDIA 2D Image and Signal Processing Performance Primitives`: https://docs.nvidia.com/cuda/npp + +nvBLAS +"""""" + +The `GPU-accelerated drop-in BLAS`_ library. +This is a shared library only. + +Targets Created: + +- ``CUDA::nvblas`` + +.. _`GPU-accelerated drop-in BLAS`: https://docs.nvidia.com/cuda/nvblas + +nvGRAPH +""""""" + +A `GPU-accelerated graph analytics`_ library. +Removed starting in CUDA 11.0 + +Targets Created: + +- ``CUDA::nvgraph`` +- ``CUDA::nvgraph_static`` + +.. _`GPU-accelerated graph analytics`: https://docs.nvidia.com/cuda/archive/10.0/nvgraph + +nvJPEG +"""""" + +A `GPU-accelerated JPEG codec`_ library. +Introduced in CUDA 10. + +Targets Created: + +- ``CUDA::nvjpeg`` +- ``CUDA::nvjpeg_static`` + +.. _`GPU-accelerated JPEG codec`: https://docs.nvidia.com/cuda/nvjpeg + +nvPTX Compiler +"""""""""""""" + +.. versionadded:: 3.25 + +The `PTX Compiler APIs`_. +These are a set of APIs which can be used to compile a PTX program into GPU assembly code. +Introduced in CUDA 11.1 +This is a static library only. + +Targets Created: + +- ``CUDA::nvptxcompiler_static`` starting in CUDA 11.1 + +.. _`PTX Compiler APIs`: https://docs.nvidia.com/cuda/ptx-compiler-api + +nvRTC +""""" + +A `runtime compilation library for CUDA`_. + +Targets Created: + +- ``CUDA::nvrtc`` + +.. versionadded:: 3.26 + + - ``CUDA::nvrtc_builtins`` + - ``CUDA::nvrtc_static`` starting in CUDA 11.5 + - ``CUDA::nvrtc_builtins_static`` starting in CUDA 11.5 + +.. _`runtime compilation library for CUDA`: https://docs.nvidia.com/cuda/nvrtc + +nvJitLink +""""""""" + +The `JIT Link APIs`_. + +Targets Created: + +- ``CUDA::nvJitLink`` starting in CUDA 12.0 +- ``CUDA::nvJitLink_static`` starting in CUDA 12.0 + +.. _`JIT Link APIs`: https://docs.nvidia.com/cuda/nvjitlink + +nvFatBin +""""""""" + +.. versionadded:: 3.30 + +The `Fatbin Creator APIs`_. + +Targets Created: + +- ``CUDA::nvfatbin`` starting in CUDA 12.4 +- ``CUDA::nvfatbin_static`` starting in CUDA 12.4 + +.. _`Fatbin Creator APIs`: https://docs.nvidia.com/cuda/nvfatbin + +nvidia-ML +""""""""" + +The `NVIDIA Management Library`_. + +Targets Created: + +- ``CUDA::nvml`` +- ``CUDA::nvml_static`` starting in CUDA 12.4 + +.. versionadded:: 3.31 + Added ``CUDA::nvml_static``. + +.. _`NVIDIA Management Library`: https://developer.nvidia.com/management-library-nvml + +.. _`FindCUDAToolkit_nvToolsExt`: + +nvToolsExt +"""""""""" + +.. deprecated:: 3.25 + + With CUDA 10.0+, use `nvtx3`_. + Starting in CUDA 12.9 the `nvToolsExt` library no longer exists + +The `legacy NVIDIA Tools Extension`_. +This is a shared library only. + +Targets Created: + +- ``CUDA::nvToolsExt`` + +.. _`legacy NVIDIA Tools Extension`: https://docs.nvidia.com/cuda/archive/9.0/profiler-users-guide#nvtx + +.. _`FindCUDAToolkit_nvtx3`: + +nvtx3 +""""" + +.. versionadded:: 3.25 + +The header-only `NVIDIA Tools Extension`_ library. +Introduced in CUDA 10.0. + +Targets created: + +- ``CUDA::nvtx3`` + + +- ``CUDA::nvtx3_interop`` + + .. versionadded:: 4.1 + + This is provided by CUDA 12.9 and above for use by languages that + cannot consume C++ header-only libraries, such as ``Fortran``. + +.. _`NVIDIA Tools Extension`: https://nvidia.github.io/NVTX/doxygen + +OpenCL +"""""" + +The `NVIDIA Open Computing Language`_ library. +This is a shared library only. + +Targets Created: + +- ``CUDA::OpenCL`` + +.. _`NVIDIA Open Computing Language`: https://developer.nvidia.com/opencl + +cuLIBOS +""""""" + +The cuLIBOS library is a backend thread abstraction layer library which is +static only. The ``CUDA::cublas_static``, ``CUDA::cusparse_static``, +``CUDA::cufft_static``, ``CUDA::curand_static``, and (when implemented) NPP +libraries all automatically have this dependency linked. + +Target Created: + +- ``CUDA::culibos`` + +**Note**: direct usage of this target by consumers should not be necessary. + +.. _`FindCUDAToolkit_bin2c`: + +bin2c +""""" + +.. versionadded:: 4.3 + +A utility that converts binary files to C files containing byte arrays. + +Target Created: + +- ``CUDA::bin2c`` + +.. _`FindCUDAToolkit_sanitizer`: + +compute-sanitizer +""""""""""""""""" + +.. versionadded:: 4.4 + +The `NVIDIA Compute Sanitizer`_ library, which allows the tracing of CUDA +runtime and driver calls. + +Target Created: + +- ``CUDA::sanitizer`` + +.. _`NVIDIA Compute Sanitizer`: https://docs.nvidia.com/compute-sanitizer + +Result Variables +^^^^^^^^^^^^^^^^ + +This module defines the following variables: + +``CUDAToolkit_FOUND`` + A boolean specifying whether or not the CUDA Toolkit was found. + +``CUDAToolkit_VERSION`` + The exact version of the CUDA Toolkit found (as reported by + ``nvcc --version``, ``version.txt``, or ``version.json``). + +``CUDAToolkit_VERSION_MAJOR`` + The major version of the CUDA Toolkit. + +``CUDAToolkit_VERSION_MINOR`` + The minor version of the CUDA Toolkit. + +``CUDAToolkit_VERSION_PATCH`` + The patch version of the CUDA Toolkit. + +``CUDAToolkit_BIN_DIR`` + The path to the CUDA Toolkit library directory that contains the CUDA + executable ``nvcc``. + +``CUDAToolkit_INCLUDE_DIRS`` + List of paths to all the CUDA Toolkit folders containing header files + required to compile a project linking against CUDA. + +``CUDAToolkit_LIBRARY_DIR`` + The path to the CUDA Toolkit library directory that contains the CUDA + Runtime library ``cudart``. + +``CUDAToolkit_LIBRARY_ROOT`` + .. versionadded:: 3.18 + + The path to the CUDA Toolkit directory containing the nvvm directory and + either version.txt or version.json. + +``CUDAToolkit_TARGET_DIR`` + The path to the CUDA Toolkit directory including the target architecture + when cross-compiling. When not cross-compiling this will be equivalent to + the parent directory of ``CUDAToolkit_BIN_DIR``. + +``CUDAToolkit_NVCC_EXECUTABLE`` + The path to the NVIDIA CUDA compiler ``nvcc``. Note that this path may + **not** be the same as + :variable:`CMAKE_CUDA_COMPILER _COMPILER>`. ``nvcc`` must be + found to determine the CUDA Toolkit version as well as determining other + features of the Toolkit. This variable is set for the convenience of + modules that depend on this one. +#]=======================================================================] + +# NOTE: much of this was simply extracted from FindCUDA.cmake. + +# James Bigler, NVIDIA Corp (nvidia.com - jbigler) +# Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html +# +# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved. +# +# Copyright (c) 2007-2009 +# Scientific Computing and Imaging Institute, University of Utah +# +# This code is licensed under the MIT License. See the FindCUDA.cmake script +# for the text of the license. + +# The MIT License +# +# License for the specific language governing rights and limitations under +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +# +############################################################################### + +function(_CUDAToolkit_build_include_dirs result_variable default_paths_variable) + set(content "${${default_paths_variable}}") + set(${result_variable} "${content}" PARENT_SCOPE) +endfunction() + +function(_CUDAToolkit_build_library_dirs result_variable default_paths_variable) + set(content "${${default_paths_variable}}") + set(${result_variable} "${content}" PARENT_SCOPE) +endfunction() + +# The toolkit is located during compiler detection for CUDA and stored in CMakeCUDACompiler.cmake as +# - CMAKE_CUDA_COMPILER_TOOLKIT_ROOT +# - CMAKE_CUDA_COMPILER_LIBRARY_ROOT +# - CMAKE_CUDA_COMPILER_LIBRARY_DIRECTORIES_FROM_IMPLICIT_LIBRARIES +# - CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES +# We compute the rest based on those here to avoid re-searching and to avoid finding a possibly +# different installation. +if(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT) + set(CUDAToolkit_ROOT_DIR "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}") + set(CUDAToolkit_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_LIBRARY_ROOT}") + _CUDAToolkit_build_library_dirs(CUDAToolkit_IMPLICIT_LIBRARY_DIRECTORIES CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES) + _CUDAToolkit_build_include_dirs(CUDAToolkit_INCLUDE_DIRECTORIES CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES) + set(CUDAToolkit_BIN_DIR "${CUDAToolkit_ROOT_DIR}/bin") + set(CUDAToolkit_NVCC_EXECUTABLE "${CUDAToolkit_BIN_DIR}/nvcc${CMAKE_EXECUTABLE_SUFFIX}") + set(CUDAToolkit_VERSION "${CMAKE_CUDA_COMPILER_TOOLKIT_VERSION}") + + if(CUDAToolkit_VERSION MATCHES [=[([0-9]+)\.([0-9]+)\.([0-9]+)]=]) + set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}") + set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}") + set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}") + endif() +else() + function(_CUDAToolkit_find_root_dir ) + cmake_parse_arguments(arg "COMPILER_PATHS" "" "SEARCH_PATHS;FIND_FLAGS" ${ARGN}) + + if(NOT CUDAToolkit_BIN_DIR) + if(arg_COMPILER_PATHS) + # need to find parent dir, since this could clang and not nvcc + if(EXISTS "${CMAKE_CUDA_COMPILER}") + get_filename_component(possible_nvcc_path "${CMAKE_CUDA_COMPILER}" PROGRAM PROGRAM_ARGS CUDAToolkit_compiler_args) + get_filename_component(possible_nvcc_path "${possible_nvcc_path}" DIRECTORY) + elseif(EXISTS "$ENV{CUDACXX}") + get_filename_component(possible_nvcc_path "$ENV{CUDACXX}" PROGRAM PROGRAM_ARGS CUDAToolkit_compiler_args) + get_filename_component(possible_nvcc_path "${possible_nvcc_path}" DIRECTORY) + endif() + if(possible_nvcc_path) + find_program(CUDAToolkit_NVCC_EXECUTABLE + NAMES nvcc nvcc.exe + NO_DEFAULT_PATH + PATHS ${possible_nvcc_path} + ) + endif() + else() + if(NOT CUDAToolkit_SENTINEL_FILE) + find_program(CUDAToolkit_NVCC_EXECUTABLE + NAMES nvcc nvcc.exe + PATHS ${arg_SEARCH_PATHS} + ${arg_FIND_FLAGS} + ) + endif() + + if(NOT CUDAToolkit_NVCC_EXECUTABLE) + find_file(CUDAToolkit_SENTINEL_FILE + NAMES version.txt version.json + PATHS ${arg_SEARCH_PATHS} + NO_DEFAULT_PATH + ) + endif() + endif() + + if(EXISTS "${CUDAToolkit_NVCC_EXECUTABLE}") + # If NVCC exists then invoke it to find the toolkit location. + # This allows us to support wrapper scripts (e.g. ccache or colornvcc), CUDA Toolkit, + # NVIDIA HPC SDK, and distro's splayed layouts + + + #Allow the user to specify a host compiler except for Visual Studio + if(NOT $ENV{CUDAHOSTCXX} STREQUAL "") + get_filename_component(CUDAToolkit_CUDA_HOST_COMPILER $ENV{CUDAHOSTCXX} PROGRAM) + if(NOT EXISTS ${CUDAToolkit_CUDA_HOST_COMPILER}) + message(FATAL_ERROR "Could not find the compiler specified in the environment variable CUDAHOSTCXX:\n$ENV{CUDAHOSTCXX}.\n${CUDAToolkit_CUDA_HOST_COMPILER}") + endif() + elseif(CUDAToolkit_CUDA_HOST_COMPILER) + # We get here if CUDAToolkit_CUDA_HOST_COMPILER was specified by the user or toolchain file. + if(IS_ABSOLUTE "${CUDAToolkit_CUDA_HOST_COMPILER}") + # Convert to forward slashes. + cmake_path(CONVERT "${CUDAToolkit_CUDA_HOST_COMPILER}" TO_CMAKE_PATH_LIST CUDAToolkit_CUDA_HOST_COMPILER NORMALIZE) + else() + # Convert to absolute path so changes in `PATH` do not impact CUDA compilation. + find_program(_CUDAToolkit_CUDA_HOST_COMPILER_PATH NO_CACHE NAMES "${CUDAToolkit_CUDA_HOST_COMPILER}") + if(_CUDAToolkit_CUDA_HOST_COMPILER_PATH) + set(CUDAToolkit_CUDA_HOST_COMPILER "${_CUDAToolkit_CUDA_HOST_COMPILER_PATH}") + endif() + unset(_CUDAToolkit_CUDA_HOST_COMPILER_PATH) + endif() + if(NOT EXISTS "${CUDAToolkit_CUDA_HOST_COMPILER}") + message(FATAL_ERROR "Could not find the compiler specified in the variable CUDAToolkit_CUDA_HOST_COMPILER:\n ${CUDAToolkit_CUDA_HOST_COMPILER}") + endif() + # If the value was cached, update the cache entry with our modifications. + get_property(_CUDAToolkit_CUDA_HOST_COMPILER_CACHED CACHE CUDAToolkit_CUDA_HOST_COMPILER PROPERTY TYPE) + if(_CUDAToolkit_CUDA_HOST_COMPILER_CACHED) + set_property(CACHE CUDAToolkit_CUDA_HOST_COMPILER PROPERTY VALUE "${CUDAToolkit_CUDA_HOST_COMPILER}") + mark_as_advanced(CUDAToolkit_CUDA_HOST_COMPILER) + endif() + unset(_CUDAToolkit_CUDA_HOST_COMPILER_CACHED) + endif() + + if(CUDAToolkit_CUDA_HOST_COMPILER) + set(nvcc_ccbin_flag "-ccbin=${CUDAToolkit_CUDA_HOST_COMPILER}") + endif() + execute_process(COMMAND ${CUDAToolkit_NVCC_EXECUTABLE} "${nvcc_ccbin_flag}" "-v" "__cmake_determine_cuda" + OUTPUT_VARIABLE _CUDA_NVCC_OUT ERROR_VARIABLE _CUDA_NVCC_OUT) + message(CONFIGURE_LOG + "Executed nvcc to extract CUDAToolkit information:\n${_CUDA_NVCC_OUT}\n\n") + if(_CUDA_NVCC_OUT MATCHES "\\#\\$ TOP=([^\r\n]*)") + get_filename_component(CUDAToolkit_BIN_DIR "${CMAKE_MATCH_1}/bin" ABSOLUTE) + message(CONFIGURE_LOG + "Parsed CUDAToolkit nvcc location:\n${CUDAToolkit_BIN_DIR}\n\n") + else() + get_filename_component(CUDAToolkit_BIN_DIR "${CUDAToolkit_NVCC_EXECUTABLE}" DIRECTORY) + endif() + if(_CUDA_NVCC_OUT MATCHES "\\#\\$ INCLUDES=([^\r\n]*)") + separate_arguments(_nvcc_output NATIVE_COMMAND "${CMAKE_MATCH_1}") + foreach(line IN LISTS _nvcc_output) + string(REGEX REPLACE "^-I" "" line "${line}") + get_filename_component(line "${line}" ABSOLUTE) + list(APPEND _cmake_CUDAToolkit_include_directories "${line}") + endforeach() + endif() + if(_CUDA_NVCC_OUT MATCHES "\\#\\$ SYSTEM_INCLUDES=([^\r\n]*)") + unset(_nvcc_output) + separate_arguments(_nvcc_output NATIVE_COMMAND "${CMAKE_MATCH_1}") + foreach(line IN LISTS _nvcc_output) + string(REGEX REPLACE "^-isystem" "" line "${line}") + if(line) + get_filename_component(line "${line}" ABSOLUTE) + list(APPEND _cmake_CUDAToolkit_include_directories "${line}") + endif() + endforeach() + endif() + if(DEFINED _cmake_CUDAToolkit_include_directories) + message(CONFIGURE_LOG + "Parsed CUDAToolkit nvcc implicit include information:\n${_cmake_CUDAToolkit_include_directories}\n\n") + set(_cmake_CUDAToolkit_include_directories "${_cmake_CUDAToolkit_include_directories}" CACHE INTERNAL "CUDAToolkit internal list of include directories") + endif() + if(_CUDA_NVCC_OUT MATCHES "\\#\\$ LIBRARIES=([^\r\n]*)") + include(${CMAKE_ROOT}/Modules/CMakeParseImplicitLinkInfo.cmake) + set(_nvcc_link_line "cuda-fake-ld ${CMAKE_MATCH_1}") + CMAKE_PARSE_IMPLICIT_LINK_INFO("${_nvcc_link_line}" + _cmake_CUDAToolkit_implicit_link_libs + _cmake_CUDAToolkit_implicit_link_directories + _cmake_CUDAToolkit_implicit_frameworks + _nvcc_log + "${CMAKE_CUDA_IMPLICIT_OBJECT_REGEX}" + LANGUAGE CUDA) + message(CONFIGURE_LOG + "Parsed CUDAToolkit nvcc implicit link information:\n${_nvcc_log}\n${_cmake_CUDAToolkit_implicit_link_directories}\n\n") + unset(_nvcc_link_line) + unset(_cmake_CUDAToolkit_implicit_link_libs) + unset(_cmake_CUDAToolkit_implicit_frameworks) + + set(_cmake_CUDAToolkit_implicit_link_directories "${_cmake_CUDAToolkit_implicit_link_directories}" CACHE INTERNAL "CUDAToolkit internal list of implicit link directories") + endif() + unset(_CUDA_NVCC_OUT) + + set(CUDAToolkit_BIN_DIR "${CUDAToolkit_BIN_DIR}" CACHE PATH "" FORCE) + mark_as_advanced(CUDAToolkit_BIN_DIR) + endif() + + if(CUDAToolkit_SENTINEL_FILE) + get_filename_component(CUDAToolkit_BIN_DIR ${CUDAToolkit_SENTINEL_FILE} DIRECTORY ABSOLUTE) + set(CUDAToolkit_BIN_DIR "${CUDAToolkit_BIN_DIR}/bin") + + set(CUDAToolkit_BIN_DIR "${CUDAToolkit_BIN_DIR}" CACHE PATH "" FORCE) + mark_as_advanced(CUDAToolkit_BIN_DIR) + endif() + endif() + + if(DEFINED _cmake_CUDAToolkit_include_directories) + _CUDAToolkit_build_include_dirs(_cmake_CUDAToolkit_contents _cmake_CUDAToolkit_include_directories) + set(CUDAToolkit_INCLUDE_DIRECTORIES "${_cmake_CUDAToolkit_contents}" PARENT_SCOPE) + endif() + if(DEFINED _cmake_CUDAToolkit_implicit_link_directories) + _CUDAToolkit_build_library_dirs(_cmake_CUDAToolkit_contents _cmake_CUDAToolkit_implicit_link_directories) + set(CUDAToolkit_IMPLICIT_LIBRARY_DIRECTORIES "${_cmake_CUDAToolkit_contents}" PARENT_SCOPE) + endif() + + if(CUDAToolkit_BIN_DIR) + get_filename_component(CUDAToolkit_ROOT_DIR ${CUDAToolkit_BIN_DIR} DIRECTORY ABSOLUTE) + set(CUDAToolkit_ROOT_DIR "${CUDAToolkit_ROOT_DIR}" PARENT_SCOPE) + endif() + + endfunction() + + function(_CUDAToolkit_guess_root_dir) + # CUDAToolkit_ROOT cmake / env variable not specified, try platform defaults. + # + # - Linux: /usr/local/cuda-X.Y + # - macOS: /Developer/NVIDIA/CUDA-X.Y + # - Windows: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y + # + # We will also search the default symlink location /usr/local/cuda first since + # if CUDAToolkit_ROOT is not specified, it is assumed that the symlinked + # directory is the desired location. + if(UNIX) + if(NOT APPLE) + set(platform_base "/usr/local/cuda-") + else() + set(platform_base "/Developer/NVIDIA/CUDA-") + endif() + else() + set(platform_base "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v") + endif() + + # Build out a descending list of possible cuda installations, e.g. + file(GLOB possible_paths "${platform_base}*") + # Iterate the glob results and create a descending list. + set(versions) + foreach(p ${possible_paths}) + # Extract version number from end of string + string(REGEX MATCH "[0-9][0-9]?\\.[0-9]$" p_version ${p}) + if(IS_DIRECTORY ${p} AND p_version) + list(APPEND versions ${p_version}) + endif() + endforeach() + + # Sort numerically in descending order, so we try the newest versions first. + list(SORT versions COMPARE NATURAL ORDER DESCENDING) + + # With a descending list of versions, populate possible paths to search. + set(search_paths) + foreach(v ${versions}) + list(APPEND search_paths "${platform_base}${v}") + endforeach() + + # Force the global default /usr/local/cuda to the front on Unix. + if(UNIX) + list(INSERT search_paths 0 "/usr/local/cuda") + endif() + + # Now search for the toolkit again using the platform default search paths. + _CUDAToolkit_find_root_dir(SEARCH_PATHS "${search_paths}" FIND_FLAGS PATH_SUFFIXES bin) + if(CUDAToolkit_ROOT_DIR) + set(CUDAToolkit_ROOT_DIR "${CUDAToolkit_ROOT_DIR}" PARENT_SCOPE) + endif() + + # We are done with these variables now, cleanup for caller. + unset(platform_base) + unset(possible_paths) + unset(versions) + unset(search_paths) + endfunction() + + function(_CUDAToolkit_find_version_file result_variable) + # We first check for a non-scattered installation to prefer it over a scattered installation. + set(version_files version.txt version.json) + foreach(vf IN LISTS version_files) + if(CUDAToolkit_ROOT AND EXISTS "${CUDAToolkit_ROOT}/${vf}") + set(${result_variable} "${CUDAToolkit_ROOT}/${vf}" PARENT_SCOPE) + break() + elseif(CUDAToolkit_ROOT_DIR AND EXISTS "${CUDAToolkit_ROOT_DIR}/${vf}") + set(${result_variable} "${CUDAToolkit_ROOT_DIR}/${vf}" PARENT_SCOPE) + break() + elseif(CMAKE_SYSROOT_LINK AND EXISTS "${CMAKE_SYSROOT_LINK}/usr/lib/cuda/${vf}") + set(${result_variable} "${CMAKE_SYSROOT_LINK}/usr/lib/cuda/${vf}" PARENT_SCOPE) + break() + elseif(EXISTS "${CMAKE_SYSROOT}/usr/lib/cuda/${vf}") + set(${result_variable} "${CMAKE_SYSROOT}/usr/lib/cuda/${vf}" PARENT_SCOPE) + break() + endif() + endforeach() + endfunction() + + function(_CUDAToolkit_parse_version_file version_file) + if(version_file) + file(READ "${version_file}" file_contents) + cmake_path(GET version_file EXTENSION LAST_ONLY version_ext) + if(version_ext STREQUAL ".json") + string(JSON cuda_version_info GET "${file_contents}" "cuda" "version") + set(cuda_version_match_regex [=[([0-9]+)\.([0-9]+)\.([0-9]+)]=]) + elseif(version_ext STREQUAL ".txt") + set(cuda_version_info "${file_contents}") + set(cuda_version_match_regex [=[CUDA Version ([0-9]+)\.([0-9]+)\.([0-9]+)]=]) + endif() + + if(cuda_version_info MATCHES "${cuda_version_match_regex}") + set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}" PARENT_SCOPE) + set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}" PARENT_SCOPE) + set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}" PARENT_SCOPE) + set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}" PARENT_SCOPE) + endif() + endif() + endfunction() + + macro(_CUDAToolkit_find_failure_message _CUDAToolkit_fail_mode) + # Declare error messages now, print later depending on find_package args. + if("${_CUDAToolkit_fail_mode}" STREQUAL "GUESS") + set(_CUDAToolkit_fail_message "Could not find `nvcc` executable in any searched paths, please set CUDAToolkit_ROOT") + elseif("${_CUDAToolkit_fail_mode}" STREQUAL "VARIABLE") + set(_CUDAToolkit_fail_message "Could not find `nvcc` executable in path specified by variable CUDAToolkit_ROOT=${CUDAToolkit_ROOT}") + else() + set(_CUDAToolkit_fail_message "Could not find `nvcc` executable in path specified by environment variable CUDAToolkit_ROOT=$ENV{CUDAToolkit_ROOT}") + endif() + + if(CUDAToolkit_FIND_REQUIRED) + message(FATAL_ERROR ${_CUDAToolkit_fail_message}) + else() + if(NOT CUDAToolkit_FIND_QUIETLY) + message(STATUS ${_CUDAToolkit_fail_message}) + endif() + set(CUDAToolkit_FOUND FALSE) + unset(_CUDAToolkit_fail_message) + return() + endif() + endmacro() + + # For NVCC we can easily deduce the SDK binary directory from the compiler path. + if(CMAKE_CUDA_COMPILER_LOADED AND NOT CUDAToolkit_BIN_DIR AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") + get_filename_component(CUDAToolkit_BIN_DIR "${CMAKE_CUDA_COMPILER}" DIRECTORY) + set(CUDAToolkit_BIN_DIR "${CUDAToolkit_BIN_DIR}" CACHE PATH "") + # Try language provided path first. + _CUDAToolkit_find_root_dir(SEARCH_PATHS "${CUDAToolkit_BIN_DIR}" FIND_FLAGS NO_DEFAULT_PATH) + mark_as_advanced(CUDAToolkit_BIN_DIR) + endif() + + # Try `CMAKE_CUDA_COMPILER` and `ENV{CUDACXX}` + if(NOT CUDAToolkit_ROOT_DIR) + _CUDAToolkit_find_root_dir(COMPILER_PATHS) + endif() + + # Try user provided path + if(NOT CUDAToolkit_ROOT_DIR AND DEFINED CUDAToolkit_ROOT) + _CUDAToolkit_find_root_dir(SEARCH_PATHS "${CUDAToolkit_ROOT}" FIND_FLAGS PATH_SUFFIXES bin NO_DEFAULT_PATH) + if(NOT CUDAToolkit_ROOT_DIR) + # If the user specified CUDAToolkit_ROOT but the toolkit could not be found, this is an error. + _CUDAToolkit_find_failure_message(VARIABLE) + endif() + endif() + + if(NOT CUDAToolkit_ROOT_DIR AND DEFINED ENV{CUDAToolkit_ROOT}) + _CUDAToolkit_find_root_dir(SEARCH_PATHS "$ENV{CUDAToolkit_ROOT}" FIND_FLAGS PATH_SUFFIXES bin NO_DEFAULT_PATH) + if(NOT CUDAToolkit_ROOT_DIR) + # If the user specified ENV{CUDAToolkit_ROOT} but the toolkit could not be found, this is an error. + _CUDAToolkit_find_failure_message(ENV) + endif() + endif() + + # Try users PATH, and CUDA_PATH env variable + if(NOT CUDAToolkit_ROOT_DIR) + _CUDAToolkit_find_root_dir(FIND_FLAGS PATHS ENV CUDA_PATH PATH_SUFFIXES bin) + endif() + + # Try guessing where CUDA is installed + if(NOT CUDAToolkit_ROOT_DIR) + _CUDAToolkit_guess_root_dir() + if(NOT CUDAToolkit_ROOT_DIR) + _CUDAToolkit_find_failure_message(GUESS) + endif() + endif() + + _CUDAToolkit_find_version_file( _CUDAToolkit_version_file ) + if(_CUDAToolkit_version_file) + # CUDAToolkit_LIBRARY_ROOT contains the device library and version file. + get_filename_component(CUDAToolkit_LIBRARY_ROOT "${_CUDAToolkit_version_file}" DIRECTORY ABSOLUTE) + endif() + unset(_CUDAToolkit_version_file) + + if(CUDAToolkit_NVCC_EXECUTABLE AND + CMAKE_CUDA_COMPILER_VERSION AND + CUDAToolkit_NVCC_EXECUTABLE STREQUAL CMAKE_CUDA_COMPILER) + # Need to set these based off the already computed CMAKE_CUDA_COMPILER_VERSION value + # This if statement will always match, but is used to provide variables for MATCH 1,2,3... + if(CMAKE_CUDA_COMPILER_VERSION MATCHES [=[([0-9]+)\.([0-9]+)\.([0-9]+)]=]) + set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}") + set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}") + set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}") + set(CUDAToolkit_VERSION "${CMAKE_CUDA_COMPILER_VERSION}") + endif() + elseif(CUDAToolkit_NVCC_EXECUTABLE) + # Compute the version by invoking nvcc + execute_process(COMMAND ${CUDAToolkit_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT) + if(NVCC_OUT MATCHES [=[ V([0-9]+)\.([0-9]+)\.([0-9]+)]=]) + set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}") + set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}") + set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}") + set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}") + endif() + unset(NVCC_OUT) + else() + _CUDAToolkit_find_version_file(version_file) + _CUDAToolkit_parse_version_file("${version_file}") + endif() +endif() + +# Figure out the target directory when either crosscompiling +# or if we don't have `nvcc` and need to deduce the target arch +if(CMAKE_CROSSCOMPILING OR NOT CUDAToolkit_NVCC_EXECUTABLE) + # When a language is enabled we can use its compiler's target architecture. + if(CMAKE_CUDA_COMPILER_LOADED AND CMAKE_CUDA_COMPILER_ARCHITECTURE_ID) + set(_CUDA_TARGET_PROCESSOR "${CMAKE_CUDA_COMPILER_ARCHITECTURE_ID}") + elseif(CMAKE_CXX_COMPILER_LOADED AND CMAKE_CXX_COMPILER_ARCHITECTURE_ID) + set(_CUDA_TARGET_PROCESSOR "${CMAKE_CXX_COMPILER_ARCHITECTURE_ID}") + elseif(CMAKE_C_COMPILER_LOADED AND CMAKE_C_COMPILER_ARCHITECTURE_ID) + set(_CUDA_TARGET_PROCESSOR "${CMAKE_C_COMPILER_ARCHITECTURE_ID}") + elseif(CMAKE_SYSTEM_PROCESSOR) + set(_CUDA_TARGET_PROCESSOR "${CMAKE_SYSTEM_PROCESSOR}") + elseif(CMAKE_CROSSCOMPILING) + message(FATAL_ERROR "Cross-compiling with the CUDA toolkit requires CMAKE_SYSTEM_PROCESSOR to be set.") + endif() + # Keep in sync with equivalent table in CMakeDetermineCUDACompiler and FindCUDA! + if(_CUDA_TARGET_PROCESSOR STREQUAL "armv7-a") + # Support for NVPACK + set(CUDAToolkit_TARGET_NAMES "armv7-linux-androideabi") + elseif(_CUDA_TARGET_PROCESSOR MATCHES "arm") + set(CUDAToolkit_TARGET_NAMES "armv7-linux-gnueabihf") + elseif(_CUDA_TARGET_PROCESSOR MATCHES "aarch64") + if(ANDROID_ARCH_NAME STREQUAL "arm64") + set(CUDAToolkit_TARGET_NAMES "aarch64-linux-androideabi") + elseif (CMAKE_SYSTEM_NAME STREQUAL "QNX") + set(CUDAToolkit_TARGET_NAMES "aarch64-qnx") + else() + set(CUDAToolkit_TARGET_NAMES "aarch64-linux" "sbsa-linux") + endif() + elseif(_CUDA_TARGET_PROCESSOR STREQUAL "x86_64") + set(CUDAToolkit_TARGET_NAMES "x86_64-linux") + endif() + unset(_CUDA_TARGET_PROCESSOR) + + foreach(CUDAToolkit_TARGET_NAME IN LISTS CUDAToolkit_TARGET_NAMES) + if(EXISTS "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}") + set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}") + # add known CUDA target root path to the set of directories we search for programs, libraries and headers + list(PREPEND CMAKE_FIND_ROOT_PATH "${CUDAToolkit_TARGET_DIR}") + + # Mark that we need to pop the root search path changes after we have + # found all cuda libraries so that searches for our cross-compilation + # libraries work when another cuda sdk is in CMAKE_PREFIX_PATH or + # PATH + set(_CUDAToolkit_Pop_ROOT_PATH True) + break() + endif() + endforeach() +endif() + + #If not already set we simply use the toolkit root +if(NOT CUDAToolkit_TARGET_DIR) + set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}") +endif() + +# Determine windows search path suffix for libraries +if(CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") + if(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "AMD64") + set(_CUDAToolkit_win_search_dirs lib/x64) + set(_CUDAToolkit_win_stub_search_dirs lib/x64/stubs) + endif() +endif() + +# We don't need to verify the cuda_runtime header when we are using `nvcc` include paths +# as the compiler being enabled means the header was found +if(NOT CUDAToolkit_INCLUDE_DIRECTORIES) + # Otherwise use CUDAToolkit_TARGET_DIR to guess where the `cuda_runtime.h` is located + # On a scattered installation /usr, on a non-scattered something like /usr/local/cuda or /usr/local/cuda-10.2/targets/aarch64-linux. + if(EXISTS "${CUDAToolkit_TARGET_DIR}/include/cuda_runtime.h") + set(CUDAToolkit_INCLUDE_DIRECTORIES "${CUDAToolkit_TARGET_DIR}/include") + else() + message(STATUS "Unable to find cuda_runtime.h in \"${CUDAToolkit_TARGET_DIR}/include\" for CUDAToolkit_INCLUDE_DIRECTORIES.") + endif() +endif() + +# The NVHPC layout moves math library headers and libraries to a sibling directory and it could be nested under +# the version of the CUDA toolchain +# Create a separate variable so this directory can be selectively added to math targets. +find_path(CUDAToolkit_CUBLAS_INCLUDE_DIR cublas_v2.h PATHS + ${CUDAToolkit_INCLUDE_DIRECTORIES} + NO_DEFAULT_PATH) + +if(NOT CUDAToolkit_CUBLAS_INCLUDE_DIR) + file(REAL_PATH "${CUDAToolkit_TARGET_DIR}" CUDAToolkit_MATH_INCLUDE_DIR) + cmake_path(APPEND CUDAToolkit_MATH_INCLUDE_DIR "../../math_libs/") + if(EXISTS "${CUDAToolkit_MATH_INCLUDE_DIR}/${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}/") + cmake_path(APPEND CUDAToolkit_MATH_INCLUDE_DIR "${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}/") + endif() + cmake_path(APPEND CUDAToolkit_MATH_INCLUDE_DIR "include") + cmake_path(NORMAL_PATH CUDAToolkit_MATH_INCLUDE_DIR) + + find_path(CUDAToolkit_CUBLAS_INCLUDE_DIR cublas_v2.h PATHS + ${CUDAToolkit_MATH_INCLUDE_DIR} + NO_DEFAULT_PATH + ) + if(CUDAToolkit_CUBLAS_INCLUDE_DIR) + list(APPEND CUDAToolkit_INCLUDE_DIRECTORIES "${CUDAToolkit_CUBLAS_INCLUDE_DIR}") + endif() +endif() +unset(CUDAToolkit_CUBLAS_INCLUDE_DIR CACHE) +unset(CUDAToolkit_CUBLAS_INCLUDE_DIR) + +# Find the CUDA Runtime Library libcudart +find_library(CUDA_CUDART + NAMES cudart + PATHS ${CUDAToolkit_IMPLICIT_LIBRARY_DIRECTORIES} ${CUDAToolkit_TARGET_DIR} + PATH_SUFFIXES lib64 ${_CUDAToolkit_win_search_dirs} +) +find_library(CUDA_CUDART + NAMES cudart + PATHS ${CUDAToolkit_IMPLICIT_LIBRARY_DIRECTORIES} ${CUDAToolkit_TARGET_DIR} + PATH_SUFFIXES lib64/stubs ${_CUDAToolkit_win_stub_search_dirs} lib/stubs stubs +) + +if(NOT CUDA_CUDART AND NOT CUDAToolkit_FIND_QUIETLY) + message(STATUS "Unable to find cudart library.") +endif() + +#----------------------------------------------------------------------------- +# Perform version comparison and validate all required variables are set. +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(CUDAToolkit + REQUIRED_VARS + CUDAToolkit_INCLUDE_DIRECTORIES + CUDA_CUDART + CUDAToolkit_BIN_DIR + VERSION_VAR + CUDAToolkit_VERSION +) + +unset(CUDAToolkit_ROOT_DIR) +mark_as_advanced(CUDA_CUDART + CUDAToolkit_NVCC_EXECUTABLE + CUDAToolkit_SENTINEL_FILE + ) + +#----------------------------------------------------------------------------- +# Construct result variables +if(CUDAToolkit_FOUND) + set(CUDAToolkit_INCLUDE_DIRS "${CUDAToolkit_INCLUDE_DIRECTORIES}") + get_filename_component(CUDAToolkit_LIBRARY_DIR ${CUDA_CUDART} DIRECTORY ABSOLUTE) + + # Build search paths without any symlinks + file(REAL_PATH "${CUDAToolkit_LIBRARY_DIR}" _cmake_search_dir) + set(CUDAToolkit_LIBRARY_SEARCH_DIRS "${_cmake_search_dir}") + + # Detect we are in a splayed nvhpc toolkit layout and add extra + # search paths without symlinks + # + # When the `nvcc` compiler output is parsed we have already resolved + # symlinks so we have `cuda/12.X/targets/....` and not `cuda/12.X/lib64`. + if(CUDAToolkit_LIBRARY_DIR MATCHES ".*/cuda/${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}/(lib64$|targets/)") + # Search location for math_libs/ + block(SCOPE_FOR POLICIES) + cmake_policy(SET CMP0152 NEW) + file(REAL_PATH "${CUDAToolkit_LIBRARY_DIR}/../../../../../" _cmake_search_dir) + list(APPEND CUDAToolkit_LIBRARY_SEARCH_DIRS "${_cmake_search_dir}") + + # Search location for extras like cupti + file(REAL_PATH "${CUDAToolkit_LIBRARY_DIR}/../../../" _cmake_search_dir) + list(APPEND CUDAToolkit_LIBRARY_SEARCH_DIRS "${_cmake_search_dir}") + endblock() + endif() + + if(DEFINED CUDAToolkit_IMPLICIT_LIBRARY_DIRECTORIES) + list(APPEND CUDAToolkit_LIBRARY_SEARCH_DIRS "${CUDAToolkit_IMPLICIT_LIBRARY_DIRECTORIES}") + endif() + + # If no `CUDAToolkit_LIBRARY_ROOT` exists set it based on CUDAToolkit_LIBRARY_DIR + if(NOT DEFINED CUDAToolkit_LIBRARY_ROOT) + foreach(CUDAToolkit_search_loc IN LISTS CUDAToolkit_LIBRARY_DIR CUDAToolkit_BIN_DIR) + get_filename_component(CUDAToolkit_possible_lib_root "${CUDAToolkit_search_loc}" DIRECTORY ABSOLUTE) + if(EXISTS "${CUDAToolkit_possible_lib_root}/nvvm/") + set(CUDAToolkit_LIBRARY_ROOT "${CUDAToolkit_possible_lib_root}") + break() + endif() + endforeach() + unset(CUDAToolkit_search_loc) + unset(CUDAToolkit_possible_lib_root) + endif() +else() + # clear cache results when we fail + unset(_cmake_CUDAToolkit_implicit_link_directories CACHE) + unset(_cmake_CUDAToolkit_include_directories CACHE) + unset(CUDA_CUDART CACHE) + unset(CUDAToolkit_BIN_DIR CACHE) + unset(CUDAToolkit_NVCC_EXECUTABLE CACHE) + unset(CUDAToolkit_SENTINEL_FILE CACHE) +endif() +unset(CUDAToolkit_IMPLICIT_LIBRARY_DIRECTORIES) +unset(CUDAToolkit_INCLUDE_DIRECTORIES) + +# CUDAToolkit_LIBRARY_ROOT is accidentally set to the target directory in some environments +# when the CUDA language is enabled, so patch it out +if(CUDAToolkit_LIBRARY_ROOT MATCHES "^(.*)/targets/([^/]*)$") + set(CUDAToolkit_LIBRARY_ROOT "${CMAKE_MATCH_1}") +endif() + +#----------------------------------------------------------------------------- +# Construct import targets +if(CUDAToolkit_FOUND) + + function(_CUDAToolkit_find_and_add_import_lib lib_name) + cmake_parse_arguments(arg "" "" "ALT;DEPS;EXTRA_PATH_SUFFIXES;EXTRA_INCLUDE_DIRS;ONLY_SEARCH_FOR;LIBRARY_SEARCH_DIRS" ${ARGN}) + + if(NOT arg_LIBRARY_SEARCH_DIRS) + set(arg_LIBRARY_SEARCH_DIRS "${CUDAToolkit_LIBRARY_SEARCH_DIRS}") + endif() + + if(arg_ONLY_SEARCH_FOR) + set(search_names ${arg_ONLY_SEARCH_FOR}) + else() + set(search_names ${lib_name} ${arg_ALT}) + endif() + + find_library(CUDA_${lib_name}_LIBRARY + NAMES ${search_names} + HINTS ${arg_LIBRARY_SEARCH_DIRS} + ENV CUDA_PATH + PATH_SUFFIXES nvidia/current lib64 ${_CUDAToolkit_win_search_dirs} lib + # Support NVHPC splayed math library layout + math_libs/${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}/lib64 + math_libs/lib64 + ${arg_EXTRA_PATH_SUFFIXES} + ) + # Don't try any stub directories until we have exhausted all other + # search locations. + set(CUDA_IMPORT_PROPERTY IMPORTED_LOCATION) + set(CUDA_IMPORT_TYPE UNKNOWN) + if(NOT CUDA_${lib_name}_LIBRARY) + find_library(CUDA_${lib_name}_LIBRARY + NAMES ${search_names} + HINTS ${arg_LIBRARY_SEARCH_DIRS} + ENV CUDA_PATH + PATH_SUFFIXES lib64/stubs ${_CUDAToolkit_win_stub_search_dirs} lib/stubs stubs + ) + endif() + if(CUDA_${lib_name}_LIBRARY MATCHES "/stubs/" AND NOT CUDA_${lib_name}_LIBRARY MATCHES "\\.a$" AND NOT WIN32) + # Use a SHARED library with IMPORTED_IMPLIB, but not IMPORTED_LOCATION, + # to indicate that the stub is for linkers but not dynamic loaders. + # It will not contribute any RPATH entry. When encountered as + # a private transitive dependency of another shared library, + # it will be passed explicitly to linkers so they can find it + # even when the runtime library file does not exist on disk. + set(CUDA_IMPORT_PROPERTY IMPORTED_IMPLIB) + set(CUDA_IMPORT_TYPE SHARED) + endif() + + mark_as_advanced(CUDA_${lib_name}_LIBRARY) + + if (NOT TARGET CUDA::${lib_name} AND CUDA_${lib_name}_LIBRARY) + add_library(CUDA::${lib_name} ${CUDA_IMPORT_TYPE} IMPORTED) + target_include_directories(CUDA::${lib_name} SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}") + if(DEFINED CUDAToolkit_MATH_INCLUDE_DIR) + string(FIND ${CUDA_${lib_name}_LIBRARY} "math_libs" math_libs) + if(NOT ${math_libs} EQUAL -1) + target_include_directories(CUDA::${lib_name} SYSTEM INTERFACE "${CUDAToolkit_MATH_INCLUDE_DIR}") + endif() + endif() + set_property(TARGET CUDA::${lib_name} PROPERTY ${CUDA_IMPORT_PROPERTY} "${CUDA_${lib_name}_LIBRARY}") + foreach(dep ${arg_DEPS}) + if(TARGET CUDA::${dep}) + target_link_libraries(CUDA::${lib_name} INTERFACE CUDA::${dep}) + endif() + endforeach() + if(arg_EXTRA_INCLUDE_DIRS) + target_include_directories(CUDA::${lib_name} SYSTEM INTERFACE "${arg_EXTRA_INCLUDE_DIRS}") + endif() + endif() + endfunction() + + if(NOT TARGET CUDA::toolkit) + add_library(CUDA::toolkit IMPORTED INTERFACE) + target_include_directories(CUDA::toolkit SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}") + target_link_directories(CUDA::toolkit INTERFACE "${CUDAToolkit_LIBRARY_DIR}") + endif() + + # setup dependencies that are required for cudart/cudart_static when building + # on linux. These are generally only required when using the CUDA toolkit + # when CUDA language is disabled + if(NOT TARGET CUDA::cudart_static_deps) + add_library(CUDA::cudart_static_deps IMPORTED INTERFACE) + if(UNIX AND (CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED)) + find_package(Threads REQUIRED) + target_link_libraries(CUDA::cudart_static_deps INTERFACE Threads::Threads ${CMAKE_DL_LIBS}) + endif() + + if(UNIX AND NOT APPLE AND NOT (CMAKE_SYSTEM_NAME STREQUAL "QNX")) + # On Linux, you must link against librt when using the static cuda runtime. + find_library(CUDAToolkit_rt_LIBRARY rt) + mark_as_advanced(CUDAToolkit_rt_LIBRARY) + if(NOT CUDAToolkit_rt_LIBRARY) + message(WARNING "Could not find librt library, needed by CUDA::cudart_static") + else() + target_link_libraries(CUDA::cudart_static_deps INTERFACE ${CUDAToolkit_rt_LIBRARY}) + endif() + endif() + endif() + + _CUDAToolkit_find_and_add_import_lib(cuda_driver ALT cuda DEPS cudart_static_deps) + _CUDAToolkit_find_and_add_import_lib(cudart DEPS cudart_static_deps) + _CUDAToolkit_find_and_add_import_lib(cudart_static DEPS cudart_static_deps) + + if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.0.0) + _CUDAToolkit_find_and_add_import_lib(nvJitLink) + _CUDAToolkit_find_and_add_import_lib(nvJitLink_static DEPS cudart_static_deps nvptxcompiler_static) + endif() + + if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.4.0) + _CUDAToolkit_find_and_add_import_lib(nvfatbin DEPS cudart_static_deps) + _CUDAToolkit_find_and_add_import_lib(nvfatbin_static DEPS cudart_static_deps) + endif() + + _CUDAToolkit_find_and_add_import_lib(culibos) # it's a static library + foreach (cuda_lib cublasLt cufft nvjpeg) + _CUDAToolkit_find_and_add_import_lib(${cuda_lib}) + _CUDAToolkit_find_and_add_import_lib(${cuda_lib}_static DEPS cudart_static_deps culibos) + endforeach() + foreach (cuda_lib curand nppc) + _CUDAToolkit_find_and_add_import_lib(${cuda_lib}) + _CUDAToolkit_find_and_add_import_lib(${cuda_lib}_static DEPS culibos) + endforeach() + + _CUDAToolkit_find_and_add_import_lib(cusparse DEPS nvJitLink) + _CUDAToolkit_find_and_add_import_lib(cusparse_static DEPS nvJitLink_static culibos) + + if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.0.0) + # cublas depends on cublasLt + # https://docs.nvidia.com/cuda/archive/11.0/cublas#static-library + _CUDAToolkit_find_and_add_import_lib(cublas DEPS cublasLt culibos) + _CUDAToolkit_find_and_add_import_lib(cublas_static DEPS cublasLt_static culibos) + else() + _CUDAToolkit_find_and_add_import_lib(cublas DEPS culibos) + _CUDAToolkit_find_and_add_import_lib(cublas_static DEPS culibos) + endif() + + if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.4) + _CUDAToolkit_find_and_add_import_lib(cuFile ALT cufile DEPS culibos) + _CUDAToolkit_find_and_add_import_lib(cuFile_static ALT cufile_static DEPS culibos) + + _CUDAToolkit_find_and_add_import_lib(cuFile_rdma ALT cufile_rdma DEPS cuFile culibos) + _CUDAToolkit_find_and_add_import_lib(cuFile_rdma_static ALT cufile_rdma_static DEPS cuFile_static culibos) + endif() + + if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.6) + _CUDAToolkit_find_and_add_import_lib(cudla) + endif() + + + # cuFFTW depends on cuFFT + _CUDAToolkit_find_and_add_import_lib(cufftw DEPS cufft) + _CUDAToolkit_find_and_add_import_lib(cufftw_static DEPS cufft_static) + if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 9.2) + _CUDAToolkit_find_and_add_import_lib(cufft_static_nocallback DEPS culibos) + endif() + + # cuSOLVER depends on cuBLAS, and cuSPARSE + set(cusolver_deps cublas cusparse) + set(cusolver_static_deps cublas_static cusparse_static culibos) + if(CUDAToolkit_VERSION VERSION_GREATER 11.2.1) + # cusolver depends on libcusolver_metis and cublasLt + # https://docs.nvidia.com/cuda/archive/11.2.2/cusolver#link-dependency + list(APPEND cusolver_deps cublasLt) + _CUDAToolkit_find_and_add_import_lib(cusolver_metis_static ALT metis_static) # implementation detail static lib + list(APPEND cusolver_static_deps cusolver_metis_static cublasLt_static) + endif() + if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 10.1.2) + # cusolver depends on liblapack_static.a starting with CUDA 10.1 update 2, + # https://docs.nvidia.com/cuda/archive/11.5.0/cusolver#static-link-lapack + _CUDAToolkit_find_and_add_import_lib(cusolver_lapack_static ALT lapack_static) # implementation detail static lib + list(APPEND cusolver_static_deps cusolver_lapack_static) + endif() + _CUDAToolkit_find_and_add_import_lib(cusolver DEPS ${cusolver_deps}) + _CUDAToolkit_find_and_add_import_lib(cusolver_static DEPS ${cusolver_static_deps}) + unset(cusolver_deps) + unset(cusolver_static_deps) + + # nvGRAPH depends on cuRAND, and cuSOLVER. + _CUDAToolkit_find_and_add_import_lib(nvgraph DEPS curand cusolver) + _CUDAToolkit_find_and_add_import_lib(nvgraph_static DEPS curand_static cusolver_static) + + # Process the majority of the NPP libraries. + foreach (cuda_lib nppial nppicc nppidei nppif nppig nppim nppist nppitc npps nppicom nppisu) + _CUDAToolkit_find_and_add_import_lib(${cuda_lib} DEPS nppc) + _CUDAToolkit_find_and_add_import_lib(${cuda_lib}_static DEPS nppc_static) + endforeach() + + find_path(CUDAToolkit_CUPTI_INCLUDE_DIR cupti.h PATHS + "${CUDAToolkit_ROOT_DIR}/extras/CUPTI/include" + ${CUDAToolkit_INCLUDE_DIRS} + PATH_SUFFIXES "../extras/CUPTI/include" + "../../../extras/CUPTI/include" + NO_DEFAULT_PATH) + mark_as_advanced(CUDAToolkit_CUPTI_INCLUDE_DIR) + + if(CUDAToolkit_CUPTI_INCLUDE_DIR) + set(_cmake_cupti_extra_paths extras/CUPTI/lib64/ + extras/CUPTI/lib/ + ../extras/CUPTI/lib64/ + ../extras/CUPTI/lib/ + ../../../extras/CUPTI/lib64/ + ../../../extras/CUPTI/lib/) + _CUDAToolkit_find_and_add_import_lib(cupti + EXTRA_PATH_SUFFIXES ${_cmake_cupti_extra_paths} + EXTRA_INCLUDE_DIRS "${CUDAToolkit_CUPTI_INCLUDE_DIR}") + _CUDAToolkit_find_and_add_import_lib(cupti_static + EXTRA_PATH_SUFFIXES ${_cmake_cupti_extra_paths} + EXTRA_INCLUDE_DIRS "${CUDAToolkit_CUPTI_INCLUDE_DIR}") + if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 10.2.0) + _CUDAToolkit_find_and_add_import_lib(nvperf_host + EXTRA_PATH_SUFFIXES ${_cmake_cupti_extra_paths} + EXTRA_INCLUDE_DIRS "${CUDAToolkit_CUPTI_INCLUDE_DIR}") + _CUDAToolkit_find_and_add_import_lib(nvperf_host_static + EXTRA_PATH_SUFFIXES ${_cmake_cupti_extra_paths} + EXTRA_INCLUDE_DIRS "${CUDAToolkit_CUPTI_INCLUDE_DIR}") + _CUDAToolkit_find_and_add_import_lib(nvperf_target + EXTRA_PATH_SUFFIXES ${_cmake_cupti_extra_paths} + EXTRA_INCLUDE_DIRS "${CUDAToolkit_CUPTI_INCLUDE_DIR}") + endif() + if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.3.0) + _CUDAToolkit_find_and_add_import_lib(pcsamplingutil + EXTRA_PATH_SUFFIXES ${_cmake_cupti_extra_paths} + EXTRA_INCLUDE_DIRS "${CUDAToolkit_CUPTI_INCLUDE_DIR}") + endif() + endif() + + if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.1.0) + if(NOT TARGET CUDA::nvptxcompiler_static) + _CUDAToolkit_find_and_add_import_lib(nvptxcompiler_static) + if(TARGET CUDA::nvptxcompiler_static) + target_link_libraries(CUDA::nvptxcompiler_static INTERFACE CUDA::cudart_static_deps) + endif() + endif() + endif() + + _CUDAToolkit_find_and_add_import_lib(nvrtc_builtins ALT nvrtc-builtins) + _CUDAToolkit_find_and_add_import_lib(nvrtc) + if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.5.0) + _CUDAToolkit_find_and_add_import_lib(nvrtc_builtins_static ALT nvrtc-builtins_static) + if(NOT TARGET CUDA::nvrtc_static) + _CUDAToolkit_find_and_add_import_lib(nvrtc_static DEPS nvrtc_builtins_static nvptxcompiler_static) + if(TARGET CUDA::nvrtc_static AND WIN32 AND NOT (BORLAND OR MINGW OR CYGWIN)) + target_link_libraries(CUDA::nvrtc_static INTERFACE Ws2_32.lib) + endif() + endif() + endif() + + _CUDAToolkit_find_and_add_import_lib(nvml ALT nvidia-ml nvml) + _CUDAToolkit_find_and_add_import_lib(nvml_static ONLY_SEARCH_FOR libnvidia-ml.a libnvml.a) + + if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 10.0) + # Header-only variant. Uses dlopen(). + if(NOT TARGET CUDA::nvtx3) + add_library(CUDA::nvtx3 INTERFACE IMPORTED) + target_include_directories(CUDA::nvtx3 SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}") + target_link_libraries(CUDA::nvtx3 INTERFACE ${CMAKE_DL_LIBS}) + endif() + endif() + if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.9) + if(NOT TARGET CUDA::nvtx3_interop) + _CUDAToolkit_find_and_add_import_lib(nvtx3_interop ALT nvtx3interop) + endif() + endif() + + # nvToolsExt is removed starting in 12.9 + if(CUDAToolkit_VERSION VERSION_LESS 12.9) + if(WIN32) + # nvtools can be installed outside the CUDA toolkit directory + # so prefer the NVTOOLSEXT_PATH windows only environment variable + # In addition on windows the most common name is nvToolsExt64_1 + find_library(CUDA_nvToolsExt_LIBRARY + NAMES nvToolsExt64_1 nvToolsExt64 nvToolsExt + PATHS ENV NVTOOLSEXT_PATH + ENV CUDA_PATH + PATH_SUFFIXES lib/x64 lib + ) + endif() + _CUDAToolkit_find_and_add_import_lib(nvToolsExt ALT nvToolsExt64) + + if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 10.0) + # nvToolsExt is deprecated since nvtx3 introduction. + # Warn only if the project requires a sufficiently new CMake to make migration possible. + if(TARGET CUDA::nvToolsExt AND CMAKE_MINIMUM_REQUIRED_VERSION VERSION_GREATER_EQUAL 3.25) + set_property(TARGET CUDA::nvToolsExt PROPERTY DEPRECATION "nvToolsExt has been superseded by nvtx3 since CUDA 10.0 and CMake 3.25. Use CUDA::nvtx3 and include instead.") + endif() + endif() + endif() + + _CUDAToolkit_find_and_add_import_lib(OpenCL) + + find_program(CUDA_bin2c_EXECUTABLE + NAMES bin2c + HINTS ${CUDAToolkit_BIN_DIR} + NO_DEFAULT_PATH + ) + if(NOT TARGET CUDA::bin2c AND CUDA_bin2c_EXECUTABLE) + add_executable(CUDA::bin2c IMPORTED) + set_property(TARGET CUDA::bin2c PROPERTY IMPORTED_LOCATION "${CUDA_bin2c_EXECUTABLE}") + endif() + + _CUDAToolkit_find_and_add_import_lib( + sanitizer + ONLY_SEARCH_FOR sanitizer-public + EXTRA_PATH_SUFFIXES + "../compute-sanitizer" + "../../../compute-sanitizer" + "../Sanitizer" + "../../../Sanitizer" + "../extras/Sanitizer" + "../../../extras/Sanitizer" + EXTRA_INCLUDE_DIRS "${CUDAToolkit_CUPTI_INCLUDE_DIR}" + ) + if(TARGET CUDA::sanitizer) + get_property(loc TARGET CUDA::sanitizer PROPERTY IMPORTED_LOCATION) + get_filename_component(sanitizer_dir "${loc}" DIRECTORY) + target_include_directories(CUDA::sanitizer INTERFACE "${sanitizer_dir}/include") + endif() +endif() + +if(_CUDAToolkit_Pop_ROOT_PATH) + list(REMOVE_AT CMAKE_FIND_ROOT_PATH 0) + unset(_CUDAToolkit_Pop_ROOT_PATH) +endif() + +unset(_CUDAToolkit_win_search_dirs) +unset(_CUDAToolkit_win_stub_search_dirs) diff --git a/cpp/tests/utilities/identify_stream_usage.cpp b/cpp/tests/utilities/identify_stream_usage.cpp index 7c6ba60d52e..61abc4577da 100644 --- a/cpp/tests/utilities/identify_stream_usage.cpp +++ b/cpp/tests/utilities/identify_stream_usage.cpp @@ -11,13 +11,13 @@ #include #include +#include +#include #include #include #include #include -#include -#include // This file is compiled into a separate library that is dynamically loaded with LD_PRELOAD at // runtime to libcudf to override some stream-related symbols in libcudf. The goal of such a library @@ -119,234 +119,128 @@ void check_stream_and_error(cudaStream_t stream) } } -/** - * @brief Container for CUDA APIs that have been overloaded using DEFINE_OVERLOAD. - * - * This variable must be initialized before everything else. - * - * @see find_originals for a description of the priorities - */ -__attribute__((init_priority(1001))) std::unordered_map originals; +class sanitizer_subscriber { + public: + sanitizer_subscriber(); + ~sanitizer_subscriber(); -/** - * @brief Macro for generating functions to override existing CUDA functions. - * - * Define a new function with the provided signature that checks the used - * stream and raises an exception if it is one of CUDA's default streams. If - * not, the new function forwards all arguments to the original function. - * - * Note that since this only defines the function, we do not need default - * parameter values since those will be provided by the original declarations - * in CUDA itself. - * - * @see find_originals for a description of the priorities - * - * @param function The function to overload. - * @param signature The function signature (must include names, not just types). - * @parameter arguments The function arguments (names only, no types). - */ -#define DEFINE_OVERLOAD(function, signature, arguments) \ - using function##_t = cudaError_t (*)(signature); \ - \ - cudaError_t function(signature) \ - { \ - check_stream_and_error(stream); \ - return ((function##_t)originals[#function])(arguments); \ - } \ - __attribute__((constructor(1002))) void queue_##function() { originals[#function] = nullptr; } + private: + Sanitizer_SubscriberHandle handle; -/** - * @brief Helper macro to define macro arguments that contain a comma. - */ -#define ARG(...) __VA_ARGS__ + static void check_result(SanitizerResult result); -// clang-format off -/* - We need to overload all the functions from the runtime API (assuming that we - don't use the driver API) that accept streams. The main webpage for APIs is - https://docs.nvidia.com/cuda/cuda-runtime-api/modules.html#modules. Here are - the modules containing any APIs using streams as of 9/20/2022: - - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html - - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EVENT.html#group__CUDART__EVENT - Done - - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EXTRES__INTEROP.html#group__CUDART__EXTRES__INTEROP - - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EXECUTION.html#group__CUDART__EXECUTION - Done - - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY - Done - - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html#group__CUDART__MEMORY__POOLS - Done - - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__OPENGL__DEPRECATED.html#group__CUDART__OPENGL__DEPRECATED - - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EGL.html#group__CUDART__EGL - - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__INTEROP.html#group__CUDART__INTEROP - - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__GRAPH.html#group__CUDART__GRAPH - - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__HIGHLEVEL.html#group__CUDART__HIGHLEVEL - */ -// clang-format on - -// Event APIS: -// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EVENT.html#group__CUDART__EVENT -DEFINE_OVERLOAD(cudaEventRecord, ARG(cudaEvent_t event, cudaStream_t stream), ARG(event, stream)); - -DEFINE_OVERLOAD(cudaEventRecordWithFlags, - ARG(cudaEvent_t event, cudaStream_t stream, unsigned int flags), - ARG(event, stream, flags)); - -// Execution APIS: -// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EXECUTION.html#group__CUDART__EXECUTION -DEFINE_OVERLOAD(cudaLaunchKernel, - ARG(void const* func, - dim3 gridDim, - dim3 blockDim, - void** args, - size_t sharedMem, - cudaStream_t stream), - ARG(func, gridDim, blockDim, args, sharedMem, stream)); + template + static void check_stream_arg(const Sanitizer_CallbackData* cbdata); -#if CUDART_VERSION >= 13000 -// We need to define the __cudaLaunchKernel ABI as -// it isn't part of cuda_runtime.h when compiling as a C++ source -extern "C" cudaError_t CUDARTAPI __cudaLaunchKernel(cudaKernel_t kernel, - dim3 gridDim, - dim3 blockDim, - void** args, - size_t sharedMem, - cudaStream_t stream); -extern "C" cudaError_t CUDARTAPI __cudaLaunchKernel_ptsz(cudaKernel_t kernel, - dim3 gridDim, - dim3 blockDim, - void** args, - size_t sharedMem, - cudaStream_t stream); -DEFINE_OVERLOAD(__cudaLaunchKernel, - ARG(cudaKernel_t kernel, - dim3 gridDim, - dim3 blockDim, - void** args, - size_t sharedMem, - cudaStream_t stream), - ARG(kernel, gridDim, blockDim, args, sharedMem, stream)); -DEFINE_OVERLOAD(__cudaLaunchKernel_ptsz, - ARG(cudaKernel_t kernel, - dim3 gridDim, - dim3 blockDim, - void** args, - size_t sharedMem, - cudaStream_t stream), - ARG(kernel, gridDim, blockDim, args, sharedMem, stream)); -#endif + void callback(Sanitizer_CallbackDomain domain, Sanitizer_CallbackId cbid, const void* cbdata); +}; + +sanitizer_subscriber::sanitizer_subscriber() +{ + const auto cb = [](void* userdata, + Sanitizer_CallbackDomain domain, + Sanitizer_CallbackId cbid, + const void* cbdata) { + auto* subscriber = static_cast(userdata); + subscriber->callback(domain, cbid, cbdata); + }; + check_result(sanitizerSubscribe(&this->handle, cb, this)); + + check_result(sanitizerEnableDomain(1, this->handle, SANITIZER_CB_DOMAIN_RUNTIME_API)); +} -DEFINE_OVERLOAD(cudaLaunchCooperativeKernel, - ARG(void const* func, - dim3 gridDim, - dim3 blockDim, - void** args, - size_t sharedMem, - cudaStream_t stream), - ARG(func, gridDim, blockDim, args, sharedMem, stream)); -DEFINE_OVERLOAD(cudaLaunchHostFunc, - ARG(cudaStream_t stream, cudaHostFn_t fn, void* userData), - ARG(stream, fn, userData)); - -// Memory transfer APIS: -// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY +sanitizer_subscriber::~sanitizer_subscriber() { check_result(sanitizerUnsubscribe(this->handle)); } + +void sanitizer_subscriber::check_result(SanitizerResult result) +{ + if (result != SANITIZER_SUCCESS) { + const char* str; + sanitizerGetResultString(result, &str); + throw std::runtime_error(std::string("Sanitizer error: ") + str); + } +} + +template +void sanitizer_subscriber::check_stream_arg(const Sanitizer_CallbackData* cbdata) +{ + const auto* args = static_cast(cbdata->functionParams); + check_stream_and_error(args->*Field); +} + +// `generated_cuda_runtime_api_meta.h` is provided by the CUDA Toolkit/Compute Sanitizer. +// It defines versioned callback parameter structs named like +// `cudaMemcpyAsync_v3020_params`, where the numeric suffix identifies the CUDA runtime +// API version associated with that parameter layout. +#define CHECK_STREAM_ARG(call, version, field) \ + case SANITIZER_CBID_RUNTIME_API_##call: { \ + using args_t = call##_v##version##_params; \ + check_stream_arg(runtime_cbdata); \ + } break + +void sanitizer_subscriber::callback(Sanitizer_CallbackDomain domain, + Sanitizer_CallbackId cbid, + const void* cbdata) +{ + switch (domain) { + case SANITIZER_CB_DOMAIN_RUNTIME_API: { + const auto* runtime_cbdata = static_cast(cbdata); + + if (runtime_cbdata->callbackSite == SANITIZER_API_ENTER) { + switch (cbid) { + CHECK_STREAM_ARG(cudaEventRecord, 3020, stream); + CHECK_STREAM_ARG(cudaEventRecord_ptsz, 7000, stream); + CHECK_STREAM_ARG(cudaEventRecordWithFlags, 11010, stream); + CHECK_STREAM_ARG(cudaEventRecordWithFlags_ptsz, 11010, stream); + CHECK_STREAM_ARG(cudaLaunchKernel, 7000, stream); + CHECK_STREAM_ARG(cudaLaunchKernel_ptsz, 7000, stream); + CHECK_STREAM_ARG(cudaLaunchCooperativeKernel, 9000, stream); + CHECK_STREAM_ARG(cudaLaunchCooperativeKernel_ptsz, 9000, stream); + CHECK_STREAM_ARG(cudaLaunchHostFunc, 10000, stream); + CHECK_STREAM_ARG(cudaLaunchHostFunc_ptsz, 10000, stream); #if CUDART_VERSION >= 13000 -DEFINE_OVERLOAD( - cudaMemPrefetchAsync, - ARG(void const* devPtr, size_t count, cudaMemLocation loc, int flags, cudaStream_t stream), - ARG(devPtr, count, loc, flags, stream)); + CHECK_STREAM_ARG(cudaMemPrefetchAsync, 12020, stream); + CHECK_STREAM_ARG(cudaMemPrefetchAsync_ptsz, 12020, stream); #else -DEFINE_OVERLOAD(cudaMemPrefetchAsync, - ARG(void const* devPtr, size_t count, int dstDevice, cudaStream_t stream), - ARG(devPtr, count, dstDevice, stream)); + CHECK_STREAM_ARG(cudaMemPrefetchAsync, 8000, stream); + CHECK_STREAM_ARG(cudaMemPrefetchAsync_ptsz, 8000, stream); + CHECK_STREAM_ARG(cudaMemPrefetchAsync_v2, 12020, stream); + CHECK_STREAM_ARG(cudaMemPrefetchAsync_v2_ptsz, 12020, stream); #endif -DEFINE_OVERLOAD(cudaMemcpy2DAsync, - ARG(void* dst, - size_t dpitch, - void const* src, - size_t spitch, - size_t width, - size_t height, - cudaMemcpyKind kind, - cudaStream_t stream), - ARG(dst, dpitch, src, spitch, width, height, kind, stream)); -DEFINE_OVERLOAD(cudaMemcpy2DFromArrayAsync, - ARG(void* dst, - size_t dpitch, - cudaArray_const_t src, - size_t wOffset, - size_t hOffset, - size_t width, - size_t height, - cudaMemcpyKind kind, - cudaStream_t stream), - ARG(dst, dpitch, src, wOffset, hOffset, width, height, kind, stream)); -DEFINE_OVERLOAD(cudaMemcpy2DToArrayAsync, - ARG(cudaArray_t dst, - size_t wOffset, - size_t hOffset, - void const* src, - size_t spitch, - size_t width, - size_t height, - cudaMemcpyKind kind, - cudaStream_t stream), - ARG(dst, wOffset, hOffset, src, spitch, width, height, kind, stream)); -DEFINE_OVERLOAD(cudaMemcpy3DAsync, - ARG(cudaMemcpy3DParms const* p, cudaStream_t stream), - ARG(p, stream)); -DEFINE_OVERLOAD(cudaMemcpy3DPeerAsync, - ARG(cudaMemcpy3DPeerParms const* p, cudaStream_t stream), - ARG(p, stream)); -DEFINE_OVERLOAD( - cudaMemcpyAsync, - ARG(void* dst, void const* src, size_t count, cudaMemcpyKind kind, cudaStream_t stream), - ARG(dst, src, count, kind, stream)); -DEFINE_OVERLOAD(cudaMemcpyFromSymbolAsync, - ARG(void* dst, - void const* symbol, - size_t count, - size_t offset, - cudaMemcpyKind kind, - cudaStream_t stream), - ARG(dst, symbol, count, offset, kind, stream)); -DEFINE_OVERLOAD(cudaMemcpyToSymbolAsync, - ARG(void const* symbol, - void const* src, - size_t count, - size_t offset, - cudaMemcpyKind kind, - cudaStream_t stream), - ARG(symbol, src, count, offset, kind, stream)); -DEFINE_OVERLOAD( - cudaMemset2DAsync, - ARG(void* devPtr, size_t pitch, int value, size_t width, size_t height, cudaStream_t stream), - ARG(devPtr, pitch, value, width, height, stream)); -DEFINE_OVERLOAD( - cudaMemset3DAsync, - ARG(cudaPitchedPtr pitchedDevPtr, int value, cudaExtent extent, cudaStream_t stream), - ARG(pitchedDevPtr, value, extent, stream)); -DEFINE_OVERLOAD(cudaMemsetAsync, - ARG(void* devPtr, int value, size_t count, cudaStream_t stream), - ARG(devPtr, value, count, stream)); - -// Memory allocation APIS: -// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html#group__CUDART__MEMORY__POOLS -DEFINE_OVERLOAD(cudaFreeAsync, ARG(void* devPtr, cudaStream_t stream), ARG(devPtr, stream)); -DEFINE_OVERLOAD(cudaMallocAsync, - ARG(void** devPtr, size_t size, cudaStream_t stream), - ARG(devPtr, size, stream)); -DEFINE_OVERLOAD(cudaMallocFromPoolAsync, - ARG(void** ptr, size_t size, cudaMemPool_t memPool, cudaStream_t stream), - ARG(ptr, size, memPool, stream)); - -/** - * @brief Function to collect all the original CUDA symbols corresponding to overloaded functions. - * - * Note on priorities: - * - `originals` must be initialized first, so it is 1001. - * - The function names must be added to originals next in the macro, so those are 1002. - * - Finally, this function actually finds the original symbols so it is 1003. - */ -__attribute__((constructor(1003))) void find_originals() -{ - for (auto it : originals) { - originals[it.first] = dlsym(RTLD_NEXT, it.first.data()); + CHECK_STREAM_ARG(cudaMemcpy2DAsync, 3020, stream); + CHECK_STREAM_ARG(cudaMemcpy2DAsync_ptsz, 7000, stream); + CHECK_STREAM_ARG(cudaMemcpy2DFromArrayAsync, 3020, stream); + CHECK_STREAM_ARG(cudaMemcpy2DFromArrayAsync_ptsz, 7000, stream); + CHECK_STREAM_ARG(cudaMemcpy2DToArrayAsync, 3020, stream); + CHECK_STREAM_ARG(cudaMemcpy2DToArrayAsync_ptsz, 7000, stream); + CHECK_STREAM_ARG(cudaMemcpy3DAsync, 3020, stream); + CHECK_STREAM_ARG(cudaMemcpy3DAsync_ptsz, 7000, stream); + CHECK_STREAM_ARG(cudaMemcpy3DPeerAsync, 4000, stream); + CHECK_STREAM_ARG(cudaMemcpy3DPeerAsync_ptsz, 7000, stream); + CHECK_STREAM_ARG(cudaMemcpyAsync, 3020, stream); + CHECK_STREAM_ARG(cudaMemcpyAsync_ptsz, 7000, stream); + CHECK_STREAM_ARG(cudaMemcpyFromSymbolAsync, 3020, stream); + CHECK_STREAM_ARG(cudaMemcpyFromSymbolAsync_ptsz, 7000, stream); + CHECK_STREAM_ARG(cudaMemcpyToSymbolAsync, 3020, stream); + CHECK_STREAM_ARG(cudaMemcpyToSymbolAsync_ptsz, 7000, stream); + CHECK_STREAM_ARG(cudaMemset2DAsync, 3020, stream); + CHECK_STREAM_ARG(cudaMemset2DAsync_ptsz, 7000, stream); + CHECK_STREAM_ARG(cudaMemset3DAsync, 3020, stream); + CHECK_STREAM_ARG(cudaMemset3DAsync_ptsz, 7000, stream); + CHECK_STREAM_ARG(cudaMemsetAsync, 3020, stream); + CHECK_STREAM_ARG(cudaMemsetAsync_ptsz, 7000, stream); + CHECK_STREAM_ARG(cudaFreeAsync, 11020, hStream); + CHECK_STREAM_ARG(cudaFreeAsync_ptsz, 11020, hStream); + CHECK_STREAM_ARG(cudaMallocAsync, 11020, hStream); + CHECK_STREAM_ARG(cudaMallocAsync_ptsz, 11020, hStream); + CHECK_STREAM_ARG(cudaMallocFromPoolAsync, 11020, stream); + CHECK_STREAM_ARG(cudaMallocFromPoolAsync_ptsz, 11020, stream); + } + } + } break; + default: break; } } + +#undef CHECK_STREAM_ARG + +sanitizer_subscriber subscriber; diff --git a/dependencies.yaml b/dependencies.yaml index 77b4b456b08..cae4816eec5 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -147,13 +147,6 @@ files: - develop - iwyu - py_version - stream_tests: - output: none - includes: - - build_all - - build_base - - cuda - - cuda_version docs: output: none includes: @@ -657,6 +650,7 @@ dependencies: - cuda-cudart-dev - cuda-nvrtc-dev - cuda-nvtx-dev + - cuda-sanitizer-api - libcufile-dev - libcurand-dev - libnvjitlink-dev diff --git a/python/pylibcudf/tests/conftest.py b/python/pylibcudf/tests/conftest.py index 9b5638a4621..dc78c748e63 100644 --- a/python/pylibcudf/tests/conftest.py +++ b/python/pylibcudf/tests/conftest.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # Tell ruff it's OK that some imports occur after the sys.path.insert # ruff: noqa: E402 @@ -363,3 +363,16 @@ def has_nulls(request): ) def has_nans(request): return request.param + + +@pytest.fixture(scope="session") +def patch_cupy_stream(request): + import cupy as cp + + # TODO: Remove this version conditional once we require CuPy 14 + if hasattr(cp.cuda.Stream, "from_external"): + return cp.cuda.Stream.from_external(plc.utils.CUDF_DEFAULT_STREAM) + else: + version, stream_ptr = plc.utils.CUDF_DEFAULT_STREAM.__cuda_stream__() + assert version == 0 + return cp.cuda.ExternalStream(stream_ptr) diff --git a/python/pylibcudf/tests/test_column_from_array.py b/python/pylibcudf/tests/test_column_from_array.py index 31cf54864b3..39a4fff011f 100644 --- a/python/pylibcudf/tests/test_column_from_array.py +++ b/python/pylibcudf/tests/test_column_from_array.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 import pyarrow as pa @@ -50,7 +50,7 @@ def shape(request): @pytest.fixture(params=CUPY_DTYPES, ids=repr) -def cp_array(request, shape): +def cp_array(request, shape, patch_cupy_stream): dtype = request.param size = np.prod(shape) if dtype == np.bool_: @@ -59,7 +59,8 @@ def cp_array(request, shape): ).reshape(shape) else: np_arr = np.arange(size, dtype=dtype).reshape(shape) - return cp.asarray(np_arr), np_arr + with patch_cupy_stream: + return cp.asarray(np_arr), np_arr @pytest.fixture(params=NUMPY_DTYPES, ids=repr) @@ -104,16 +105,18 @@ def test_from_numpy_array(np_array): assert_column_eq(expected, got) -def test_non_c_contiguous_raises(cp_array): +def test_non_c_contiguous_raises(cp_array, patch_cupy_stream): cp_arr = cp_array[0] if len(cp_arr.shape) == 1: return + with patch_cupy_stream: + fortran_arr = cp.asfortranarray(cp_arr) with pytest.raises( ValueError, match="Data must be C-contiguous", ): - plc.Column.from_array(cp.asfortranarray(cp_arr)) + plc.Column.from_array(fortran_arr) def test_row_limit_exceed_raises(): @@ -134,11 +137,12 @@ def __init__(self, shape): plc.Column.from_array(Foo((SIZE_TYPE_LIMIT, 1))) -def test_flat_size_exceeds_size_type_limit(): +def test_flat_size_exceeds_size_type_limit(patch_cupy_stream): nrows = 2**16 ncols = (SIZE_TYPE_LIMIT // nrows) + 1 - arr = cp.zeros((nrows, ncols), dtype=np.int32) + with patch_cupy_stream: + arr = cp.zeros((nrows, ncols), dtype=np.int32) with pytest.raises( ValueError, @@ -191,8 +195,9 @@ def test_from_zero_dimensional_array(): ([[], []], np.int32, pa.array([[], []], type=pa.list_(pa.int32()))), ], ) -def test_empty_array(np_or_cp_array, arr, dtype, expect): - arr = np_or_cp_array(arr, dtype=dtype) +def test_empty_array(np_or_cp_array, arr, dtype, expect, patch_cupy_stream): + with patch_cupy_stream: + arr = np_or_cp_array(arr, dtype=dtype) col = plc.Column.from_array(arr) assert_column_eq(expect, col) diff --git a/python/pylibcudf/tests/test_interop.py b/python/pylibcudf/tests/test_interop.py index a21ed0277b0..b7ce9547f8d 100644 --- a/python/pylibcudf/tests/test_interop.py +++ b/python/pylibcudf/tests/test_interop.py @@ -143,8 +143,9 @@ def test_round_trip_dlpack_plc_table(): @pytest.mark.parametrize("array", [np.array, cp.array]) -def test_round_trip_dlpack_array(array): - arr = array([1, 2, 3]) +def test_round_trip_dlpack_array(array, patch_cupy_stream): + with patch_cupy_stream: + arr = array([1, 2, 3]) result = plc.interop.from_dlpack(arr.__dlpack__()) expected = pa.table({"a": [1, 2, 3]}) assert_table_eq(expected, result) diff --git a/python/pylibcudf/tests/test_reshape.py b/python/pylibcudf/tests/test_reshape.py index 1fb406d9719..e0c50542de7 100644 --- a/python/pylibcudf/tests/test_reshape.py +++ b/python/pylibcudf/tests/test_reshape.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 import cupy as cp @@ -50,7 +50,7 @@ def test_tile(reshape_data, cnt): ("float64", TypeId.FLOAT64), ], ) -def test_table_to_array(dtype, type_id): +def test_table_to_array(dtype, type_id, patch_cupy_stream): arrow_type = pa.from_numpy_dtype(getattr(cp, dtype)) arrs = [ pa.array([1, 2, 3], type=arrow_type), @@ -68,5 +68,6 @@ def test_table_to_array(dtype, type_id): got.nbytes, ) - expect = cp.array([[1, 4], [2, 5], [3, 6]], dtype=dtype) - cp.testing.assert_array_equal(expect, got) + with patch_cupy_stream: + expect = cp.array([[1, 4], [2, 5], [3, 6]], dtype=dtype) + cp.testing.assert_array_equal(expect, got) From 3700502e078c204124dda09ee7904f4ddb8f676d Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 6 May 2026 12:00:17 -0500 Subject: [PATCH 19/36] Run all nvbench benchmarks with timeout in smoketest (#20538) Updates the benchmark smoketest script to run all nvbench benchmarks. This should catch any invalid benchmarks and issues like segfaults that occur at runtime. We set a 1-minute timeout in case of hangs, since these are smoke tests. The `--profile` flag ensures only a quick one-off run is used. Authors: - Bradley Dice (https://github.com/bdice) - David Wendt (https://github.com/davidwendt) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - David Wendt (https://github.com/davidwendt) - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/cudf/pull/20538 --- .github/workflows/test.yaml | 12 ++++++++++++ ci/run_cudf_benchmark_smoketests.sh | 24 +++++++++++++++++++++--- ci/test_cpp.sh | 7 ------- ci/test_cpp_benchmarks.sh | 13 +++++++++++++ 4 files changed, 46 insertions(+), 10 deletions(-) create mode 100755 ci/test_cpp_benchmarks.sh diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 95439e65744..b4977f60def 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -39,6 +39,18 @@ jobs: date: ${{ inputs.date }} script: ci/test_cpp.sh sha: ${{ inputs.sha }} + conda-cpp-benchmark-tests: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@main + with: + build_type: ${{ inputs.build_type }} + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + node_type: "gpu-l4-latest-1" + arch: "amd64" + container_image: "rapidsai/ci-conda:26.06-latest" + script: ci/test_cpp_benchmarks.sh conda-cpp-memcheck-tests: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@main diff --git a/ci/run_cudf_benchmark_smoketests.sh b/ci/run_cudf_benchmark_smoketests.sh index 8069e60bd82..c9e9873a6f3 100755 --- a/ci/run_cudf_benchmark_smoketests.sh +++ b/ci/run_cudf_benchmark_smoketests.sh @@ -21,6 +21,24 @@ else exit 1 fi -# Ensure that benchmarks are runnable -# Run a small nvbench benchmark -./STRINGS_NVBENCH --profile --benchmark 0 --devices 0 +EXITCODE=0 +# Run all nvbench benchmarks with --profile and rmm_mode=cuda +for bench in *_NVBENCH; do + if [[ -x "$bench" && -f "$bench" ]]; then + start_time=$(date +%s) + echo "Running $bench with --profile..." + "./$bench" --profile --devices 0 -q --rmm_mode cuda + SUITEERROR=$? + end_time=$(date +%s) + duration=$((end_time - start_time)) + if (( SUITEERROR == 0 )); then + echo "Benchmark $bench passed in $duration seconds" + else + echo "Benchmark $bench failed in $duration seconds: $SUITEERROR" + EXITCODE=$SUITEERROR + fi + fi +done + +echo "Test script exiting with value: $EXITCODE" +exit ${EXITCODE} diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh index f548cc0a9e8..4ae71ff3081 100755 --- a/ci/test_cpp.sh +++ b/ci/test_cpp.sh @@ -32,12 +32,5 @@ if (( SUITEERROR == 0 )); then SUITEERROR=$? fi -# Ensure that benchmarks are runnable -if (( SUITEERROR == 0 )); then - rapids-logger "Run tests of libcudf benchmarks" - timeout 30m ./ci/run_cudf_benchmark_smoketests.sh - SUITEERROR=$? -fi - rapids-logger "Test script exiting with value: $EXITCODE" exit ${EXITCODE} diff --git a/ci/test_cpp_benchmarks.sh b/ci/test_cpp_benchmarks.sh new file mode 100755 index 00000000000..a2f34c047d8 --- /dev/null +++ b/ci/test_cpp_benchmarks.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +# Support invoking test_cpp_benchmarks.sh outside the script directory +cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../ + +source ./ci/test_cpp_common.sh + +rapids-logger "Run tests of libcudf benchmarks" +./ci/run_cudf_benchmark_smoketests.sh From f0b2a424bb6e55609de4645b3ce9a3a89eb78eda Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Wed, 6 May 2026 10:35:34 -0700 Subject: [PATCH 20/36] Fix a crash in the ORC reader with malformed stripe footers (#22383) `read_orc` segfaults on malformed ORC files whose stripe footer's `ColumnEncoding` array has fewer entries than the file footer's type count. This PR adds a check of `stripe_footer.columns.size()` against `ff.types.size()` to avoid OOB access to the stripe footer. Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Muhammad Haseeb (https://github.com/mhaseeb123) - Bradley Dice (https://github.com/bdice) - Tianyu Liu (https://github.com/kingcrimsontianyu) URL: https://github.com/rapidsai/cudf/pull/22383 --- cpp/src/io/orc/aggregate_orc_metadata.cpp | 5 +++++ .../tests/data/orc/stripe_footer_no_encodings.orc | Bin 0 -> 597 bytes python/cudf/cudf/tests/input_output/test_orc.py | 10 ++++++++++ 3 files changed, 15 insertions(+) create mode 100644 python/cudf/cudf/tests/data/orc/stripe_footer_no_encodings.orc diff --git a/cpp/src/io/orc/aggregate_orc_metadata.cpp b/cpp/src/io/orc/aggregate_orc_metadata.cpp index 8da5c355190..a4522461777 100644 --- a/cpp/src/io/orc/aggregate_orc_metadata.cpp +++ b/cpp/src/io/orc/aggregate_orc_metadata.cpp @@ -267,6 +267,11 @@ aggregate_orc_metadata::select_stripes( {buffer->data(), buffer->size()}); protobuf_reader(sf_data.data(), sf_data.size()) .read(per_file_metadata[mapping.source_idx].stripefooters[i]); + auto const& stripe_footer = per_file_metadata[mapping.source_idx].stripefooters[i]; + auto const num_types = per_file_metadata[mapping.source_idx].ff.types.size(); + CUDF_EXPECTS(stripe_footer.columns.size() >= num_types, + "Invalid ColumnEncoding field in a stripe footer.", + std::out_of_range); mapping.stripe_info[i].stripe_footer = &per_file_metadata[mapping.source_idx].stripefooters[i]; if (stripe->indexLength == 0) { row_grp_idx_present = false; } diff --git a/python/cudf/cudf/tests/data/orc/stripe_footer_no_encodings.orc b/python/cudf/cudf/tests/data/orc/stripe_footer_no_encodings.orc new file mode 100644 index 0000000000000000000000000000000000000000..70ecbdc70529b6ea665e13d6accb40100ba1c516 GIT binary patch literal 597 zcmZ9KK}*9h7>1K}o2B$nib&aMyZiaBS|s^IGNt6C0)5zJGjSmS8LFe@c$R$|7|Cz<|8 zSRSQRx+i1L_eMyb+oJ3?y|Fh&wGXG_n=Zh#^?#hbU0!sJ*NzR42CU(MzxVQ~>+|Jo F{~OQGpZ)*< literal 0 HcmV?d00001 diff --git a/python/cudf/cudf/tests/input_output/test_orc.py b/python/cudf/cudf/tests/input_output/test_orc.py index 42af9926f7f..b8f6ee0395a 100644 --- a/python/cudf/cudf/tests/input_output/test_orc.py +++ b/python/cudf/cudf/tests/input_output/test_orc.py @@ -584,6 +584,16 @@ def test_orc_read_incorrect_ps_length(): cudf.read_orc(buf) +def test_orc_read_stripe_footer_no_encodings(datadir): + # Crafted ORC whose stripe footer's ColumnEncoding list is empty even though + # the file footer declares one data column. The reader used to index the + # encoding list out of bounds and segfault; it now raises IndexError from + # the early stripe-footer validation in aggregate_orc_metadata. + path = datadir / "stripe_footer_no_encodings.orc" + with pytest.raises(IndexError): + cudf.read_orc(path) + + def test_orc_reader_tzif_timestamps(datadir): # Contains timstamps in the range covered by the TZif file # Other timedate tests only cover "future" times From df9ea2470dd5cf28ec017c978877024a781620e1 Mon Sep 17 00:00:00 2001 From: Yunsong Wang <12716979+PointKernel@users.noreply.github.com> Date: Wed, 6 May 2026 12:11:52 -0700 Subject: [PATCH 21/36] Rename build/probe to right/left in hash_join and distinct_hash_join (#22382) There have been prior discussions about unifying the join interfaces to avoid, or at least reduce, the mixed use of probe/build and left/right terminology, which can be confusing. This is the first PR in a series that renames join operations to replace the probe/build terminology with left/right. The probe/build roles are not deterministic and can vary depending on the algorithm, whereas left/right provides a consistent and unambiguous reference, minimizing confusion. This PR applies the renaming to `hash_join` and `distinct_hash_join`. There are no functional changes, only naming updates. Authors: - Yunsong Wang (https://github.com/PointKernel) Approvers: - Tianyu Liu (https://github.com/kingcrimsontianyu) - Lawrence Mitchell (https://github.com/wence-) - Shruti Shivakumar (https://github.com/shrshi) URL: https://github.com/rapidsai/cudf/pull/22382 --- cpp/benchmarks/join/distinct_join.cu | 16 +- .../cudf/detail/join/distinct_hash_join.cuh | 36 ++-- cpp/include/cudf/detail/join/hash_join.hpp | 50 ++--- cpp/include/cudf/join/distinct_hash_join.hpp | 40 ++-- cpp/include/cudf/join/hash_join.hpp | 142 +++++++------- cpp/src/join/distinct_hash_join.cu | 158 ++++++++-------- cpp/src/join/hash_join/common.cuh | 8 +- cpp/src/join/hash_join/dispatch.cuh | 28 +-- .../hash_join/full_join_match_context.cpp | 7 +- cpp/src/join/hash_join/full_join_retrieve.cu | 6 +- cpp/src/join/hash_join/full_join_size.cu | 6 +- cpp/src/join/hash_join/full_join_size_impl.cu | 28 +-- cpp/src/join/hash_join/hash_join.cu | 108 +++++------ .../hash_join/inner_join_match_context.cpp | 7 +- cpp/src/join/hash_join/inner_join_retrieve.cu | 6 +- cpp/src/join/hash_join/inner_join_size.cu | 6 +- .../hash_join/left_join_match_context.cpp | 7 +- cpp/src/join/hash_join/left_join_retrieve.cu | 6 +- cpp/src/join/hash_join/left_join_size.cu | 6 +- cpp/src/join/hash_join/match_context.cu | 33 ++-- cpp/src/join/hash_join/retrieve_impl.cuh | 74 ++++---- cpp/src/join/hash_join/size_impl.cuh | 74 ++++---- cpp/tests/join/distinct_join_tests.cpp | 176 +++++++++--------- cpp/tests/join/join_tests.cpp | 24 +-- 24 files changed, 524 insertions(+), 528 deletions(-) diff --git a/cpp/benchmarks/join/distinct_join.cu b/cpp/benchmarks/join/distinct_join.cu index 6fe8928128c..3f656e3423c 100644 --- a/cpp/benchmarks/join/distinct_join.cu +++ b/cpp/benchmarks/join/distinct_join.cu @@ -18,11 +18,11 @@ void nvbench_distinct_inner_join(nvbench::state& state, { auto dtypes = cycle_dtypes(get_type_or_group(static_cast(DataType)), num_keys); - auto join = [](cudf::table_view const& probe_input, - cudf::table_view const& build_input, + auto join = [](cudf::table_view const& left_input, + cudf::table_view const& right_input, cudf::null_equality compare_nulls) { - auto hj_obj = cudf::distinct_hash_join{build_input, compare_nulls, LOAD_FACTOR}; - return hj_obj.inner_join(probe_input); + auto hj_obj = cudf::distinct_hash_join{right_input, compare_nulls, LOAD_FACTOR}; + return hj_obj.inner_join(left_input); }; BM_join(state, dtypes, join); @@ -36,11 +36,11 @@ void nvbench_distinct_left_join(nvbench::state& state, { auto dtypes = cycle_dtypes(get_type_or_group(static_cast(DataType)), num_keys); - auto join = [](cudf::table_view const& probe_input, - cudf::table_view const& build_input, + auto join = [](cudf::table_view const& left_input, + cudf::table_view const& right_input, cudf::null_equality compare_nulls) { - auto hj_obj = cudf::distinct_hash_join{build_input, compare_nulls, LOAD_FACTOR}; - return hj_obj.left_join(probe_input); + auto hj_obj = cudf::distinct_hash_join{right_input, compare_nulls, LOAD_FACTOR}; + return hj_obj.left_join(left_input); }; BM_join(state, dtypes, join); diff --git a/cpp/include/cudf/detail/join/distinct_hash_join.cuh b/cpp/include/cudf/detail/join/distinct_hash_join.cuh index ecd2324d95c..3d0b1852e89 100644 --- a/cpp/include/cudf/detail/join/distinct_hash_join.cuh +++ b/cpp/include/cudf/detail/join/distinct_hash_join.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once @@ -25,14 +25,14 @@ using cudf::detail::row::lhs_index_type; using cudf::detail::row::rhs_index_type; /** - * @brief A custom comparator used for the build table insertion + * @brief A custom comparator used for the right table insertion */ struct always_not_equal { __device__ constexpr bool operator()( cuco::pair const&, cuco::pair const&) const noexcept { - // All build table keys are distinct thus `false` no matter what + // All right table keys are distinct thus `false` no matter what return false; } }; @@ -76,11 +76,11 @@ struct primitive_comparator_adapter { }; /** - * @brief Distinct hash join that builds hash table in creation and probes results in subsequent - * `*_join` member functions. + * @brief Distinct hash join that builds a hash table with the right table on construction and + * probes results in subsequent `*_join` member functions. * - * This class enables the distinct hash join scheme that builds hash table once, and probes as many - * times as needed (possibly in parallel). + * This class enables the distinct hash join scheme that builds with the right table once and + * probes with many left tables (possibly in parallel). */ class distinct_hash_join { public: @@ -104,15 +104,15 @@ class distinct_hash_join { }; /** - * @brief Constructor that internally builds the hash table based on the given `build` table. + * @brief Constructor that internally builds the hash table from the given `right` table. * - * @throw cudf::logic_error if the number of columns in `build` table is 0. + * @throw cudf::logic_error if the number of columns in `right` table is 0. * - * @param build The build table, from which the hash table is built + * @param right The right table, from which the hash table is built * @param compare_nulls Controls whether null join-key values should match or not. * @param stream CUDA stream used for device memory operations and kernel launches. */ - distinct_hash_join(cudf::table_view const& build, + distinct_hash_join(cudf::table_view const& right, cudf::null_equality compare_nulls, rmm::cuda_stream_view stream); @@ -121,7 +121,7 @@ class distinct_hash_join { * * @param load_factor The hash table occupancy ratio in (0,1]. A value of 0.5 means 50% occupancy. */ - distinct_hash_join(cudf::table_view const& build, + distinct_hash_join(cudf::table_view const& right, cudf::null_equality compare_nulls, double load_factor, rmm::cuda_stream_view stream); @@ -131,7 +131,7 @@ class distinct_hash_join { */ std::pair>, std::unique_ptr>> - inner_join(cudf::table_view const& probe, + inner_join(cudf::table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; @@ -139,7 +139,7 @@ class distinct_hash_join { * @copydoc cudf::distinct_hash_join::left_join */ std::unique_ptr> left_join( - cudf::table_view const& probe, + cudf::table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; @@ -156,11 +156,11 @@ class distinct_hash_join { rmm::mr::polymorphic_allocator, cuco_storage_type>; - bool _has_nested_columns; ///< True if nested columns are present in build and probe tables + bool _has_nested_columns; ///< True if nested columns are present in right and left tables cudf::null_equality _nulls_equal; ///< Whether to consider nulls as equal - cudf::table_view _build; ///< Input table to build the hash map + cudf::table_view _right; ///< Input table to build the hash map std::shared_ptr - _preprocessed_build; ///< Input table preprocssed for row operators - hash_table_type _hash_table; ///< Hash table built on `_build` + _preprocessed_right; ///< Input table preprocssed for row operators + hash_table_type _hash_table; ///< Hash table built on `_right` }; } // namespace cudf::detail diff --git a/cpp/include/cudf/detail/join/hash_join.hpp b/cpp/include/cudf/detail/join/hash_join.hpp index df3d0b6c2fe..b1b96ca7218 100644 --- a/cpp/include/cudf/detail/join/hash_join.hpp +++ b/cpp/include/cudf/detail/join/hash_join.hpp @@ -29,8 +29,8 @@ class preprocessed_table; namespace cudf { namespace detail { /** - * @brief Hash join that builds hash table in creation and probes results in subsequent `*_join` - * member functions. + * @brief Hash join that builds a hash table with the right table on construction and probes + * results in subsequent `*_join` member functions. * * User-defined hash function can be passed via the template parameter `Hasher` * @@ -50,17 +50,17 @@ class hash_join { hash_join& operator=(hash_join&&) = delete; /** - * @brief Constructor that internally builds the hash table based on the given `build` table. + * @brief Constructor that internally builds the hash table from the given `right` table. * - * @throw cudf::logic_error if the number of columns in `build` table is 0. + * @throw cudf::logic_error if the number of columns in `right` table is 0. * - * @param build The build table, from which the hash table is built. - * @param has_nulls Flag to indicate if the there exists any nulls in the `build` table or - * any `probe` table that will be used later for join. + * @param right The right table, from which the hash table is built. + * @param has_nulls Flag to indicate if the there exists any nulls in the `right` table or + * any `left` table that will be used later for join. * @param compare_nulls Controls whether null join-key values should match or not. * @param stream CUDA stream used for device memory operations and kernel launches. */ - hash_join(cudf::table_view const& build, + hash_join(cudf::table_view const& right, bool has_nulls, cudf::null_equality compare_nulls, rmm::cuda_stream_view stream); @@ -70,7 +70,7 @@ class hash_join { * * @param load_factor The hash table occupancy ratio in (0,1]. A value of 0.5 means 50% occupancy. */ - hash_join(cudf::table_view const& build, + hash_join(cudf::table_view const& right, bool has_nulls, cudf::null_equality compare_nulls, double load_factor, @@ -81,7 +81,7 @@ class hash_join { */ [[nodiscard]] std::pair>, std::unique_ptr>> - inner_join(cudf::table_view const& probe, + inner_join(cudf::table_view const& left, std::optional output_size, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; @@ -91,7 +91,7 @@ class hash_join { */ [[nodiscard]] std::pair>, std::unique_ptr>> - left_join(cudf::table_view const& probe, + left_join(cudf::table_view const& left, std::optional output_size, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; @@ -101,7 +101,7 @@ class hash_join { */ [[nodiscard]] std::pair>, std::unique_ptr>> - full_join(cudf::table_view const& probe, + full_join(cudf::table_view const& left, std::optional output_size, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; @@ -109,19 +109,19 @@ class hash_join { /** * @copydoc cudf::hash_join::inner_join_size */ - [[nodiscard]] std::size_t inner_join_size(cudf::table_view const& probe, + [[nodiscard]] std::size_t inner_join_size(cudf::table_view const& left, rmm::cuda_stream_view stream) const; /** * @copydoc cudf::hash_join::left_join_size */ - [[nodiscard]] std::size_t left_join_size(cudf::table_view const& probe, + [[nodiscard]] std::size_t left_join_size(cudf::table_view const& left, rmm::cuda_stream_view stream) const; /** * @copydoc cudf::hash_join::full_join_size */ - [[nodiscard]] std::size_t full_join_size(cudf::table_view const& probe, + [[nodiscard]] std::size_t full_join_size(cudf::table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; @@ -129,7 +129,7 @@ class hash_join { * @copydoc cudf::hash_join::inner_join_match_context */ [[nodiscard]] cudf::join_match_context inner_join_match_context( - cudf::table_view const& probe, + cudf::table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; @@ -137,7 +137,7 @@ class hash_join { * @copydoc cudf::hash_join::left_join_match_context */ [[nodiscard]] cudf::join_match_context left_join_match_context( - cudf::table_view const& probe, + cudf::table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; @@ -145,39 +145,39 @@ class hash_join { * @copydoc cudf::hash_join::full_join_match_context */ [[nodiscard]] cudf::join_match_context full_join_match_context( - cudf::table_view const& probe, + cudf::table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; private: bool const _is_empty; ///< true if `_hash_table` is empty - bool const _has_nulls; ///< true if nulls are present in either build table or any probe table + bool const _has_nulls; ///< true if nulls are present in either right table or any left table cudf::null_equality const _nulls_equal; ///< whether to consider nulls as equal - cudf::table_view _build; ///< input table to build the hash map + cudf::table_view _right; ///< input table to build the hash map std::shared_ptr - _preprocessed_build; ///< input table preprocssed for row operators + _preprocessed_right; ///< input table preprocssed for row operators std::unique_ptr _impl; ///< CUDA hash table implementation [[nodiscard]] std::unique_ptr> make_match_counts( join_kind join, - cudf::table_view const& probe, + cudf::table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; template [[nodiscard]] std::pair>, std::unique_ptr>> - join_retrieve(cudf::table_view const& probe, + join_retrieve(cudf::table_view const& left, std::optional output_size, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; template - [[nodiscard]] std::size_t join_size(cudf::table_view const& probe, + [[nodiscard]] std::size_t join_size(cudf::table_view const& left, rmm::cuda_stream_view stream) const; template - [[nodiscard]] std::size_t join_size(cudf::table_view const& probe, + [[nodiscard]] std::size_t join_size(cudf::table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; }; diff --git a/cpp/include/cudf/join/distinct_hash_join.hpp b/cpp/include/cudf/join/distinct_hash_join.hpp index da1338f4351..4d1b686978a 100644 --- a/cpp/include/cudf/join/distinct_hash_join.hpp +++ b/cpp/include/cudf/join/distinct_hash_join.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -33,13 +33,13 @@ class distinct_hash_join; } // namespace detail /** - * @brief Distinct hash join that builds hash table in creation and probes results in subsequent - * `*_join` member functions + * @brief Distinct hash join that builds a hash table with the right table on construction and + * probes results in subsequent `*_join` member functions * - * This class enables the distinct hash join scheme that builds hash table once, and probes as many - * times as needed (possibly in parallel). + * This class enables the distinct hash join scheme that builds with the right table once and + * probes with many left tables (possibly in parallel). * - * @note Behavior is undefined if the build table contains duplicates. + * @note Behavior is undefined if the right table contains duplicates. * @note All NaNs are considered as equal */ class distinct_hash_join { @@ -54,17 +54,17 @@ class distinct_hash_join { /** * @brief Constructs a distinct hash join object for subsequent probe calls * - * @throw cudf::logic_error if the build table has no columns + * @throw cudf::logic_error if the right table has no columns * @throw std::invalid_argument if load_factor is not greater than 0 and less than or equal to 1 * - * @param build The build table that contains distinct elements + * @param right The right table that contains distinct elements * @param compare_nulls Controls whether null join-key values should match or not * @param load_factor The desired ratio of filled slots to total slots in the hash table, must be * in range (0,1]. For example, 0.5 indicates a target of 50% occupancy. Note that the actual * occupancy achieved may be slightly lower than the specified value. * @param stream CUDA stream used for device memory operations and kernel launches */ - distinct_hash_join(cudf::table_view const& build, + distinct_hash_join(cudf::table_view const& right, null_equality compare_nulls = null_equality::EQUAL, double load_factor = 0.5, rmm::cuda_stream_view stream = cudf::get_default_stream()); @@ -73,39 +73,39 @@ class distinct_hash_join { * @brief Returns the row indices that can be used to construct the result of performing * an inner join between two tables. @see cudf::inner_join(). * - * @param probe The probe table, from which the keys are probed + * @param left The left table, from which the keys are probed * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned indices' device memory. * - * @return A pair of columns [`probe_indices`, `build_indices`] that can be used to + * @return A pair of columns [`left_indices`, `right_indices`] that can be used to * construct the result of performing an inner join between two tables - * with `build` and `probe` as the join keys. + * with `left` and `right` as the join keys. */ [[nodiscard]] std::pair>, std::unique_ptr>> - inner_join(cudf::table_view const& probe, + inner_join(cudf::table_view const& left, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const; /** - * @brief Returns the build table indices that can be used to construct the result of performing + * @brief Returns the right table indices that can be used to construct the result of performing * a left join between two tables. * - * @note For a given row index `i` of the probe table, the resulting `build_indices[i]` contains - * the row index of the matched row from the build table if there is a match. Otherwise, contains + * @note For a given row index `i` of the left table, the resulting `right_indices[i]` contains + * the row index of the matched row from the right table if there is a match. Otherwise, contains * `JoinNoMatch`. * - * @param probe The probe table, from which the keys are probed + * @param left The left table, from which the keys are probed * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device * memory. * - * @return A `build_indices` column that can be used to construct the result of - * performing a left join between two tables with `build` and `probe` as the join + * @return A `right_indices` column that can be used to construct the result of + * performing a left join between two tables with `left` and `right` as the join * keys. */ [[nodiscard]] std::unique_ptr> left_join( - cudf::table_view const& probe, + cudf::table_view const& left, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const; diff --git a/cpp/include/cudf/join/hash_join.hpp b/cpp/include/cudf/join/hash_join.hpp index ace38dcb3f3..0865fb784cb 100644 --- a/cpp/include/cudf/join/hash_join.hpp +++ b/cpp/include/cudf/join/hash_join.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -45,8 +45,8 @@ class hash_join; } // namespace detail /** - * @brief The enum class to specify if any of the input join tables (`build` table and any later - * `probe` table) has nulls. + * @brief The enum class to specify if any of the input join tables (`right` table and any later + * `left` table) has nulls. * * This is used upon hash_join object construction to specify the existence of nulls in all the * possible input tables. If such null existence is unknown, `YES` should be used as the default @@ -55,11 +55,11 @@ class hash_join; enum class nullable_join : bool { YES, NO }; /** - * @brief Hash join that builds hash table in creation and probes results in subsequent `*_join` - * member functions. + * @brief Hash join that builds a hash table with the right table on construction and probes + * results in subsequent `*_join` member functions. * - * This class enables the hash join scheme that builds hash table once, and probes as many times as - * needed (possibly in parallel). + * This class enables the hash join scheme that builds with the right table once and probes + * with many left tables (possibly in parallel). */ class hash_join { public: @@ -76,16 +76,16 @@ class hash_join { /** * @brief Construct a hash join object for subsequent probe calls. * - * @note The `hash_join` object must not outlive the table viewed by `build`, else behavior is + * @note The `hash_join` object must not outlive the table viewed by `right`, else behavior is * undefined. * - * @throws std::invalid_argument if the build table has no columns + * @throws std::invalid_argument if the right table has no columns * - * @param build The build table, from which the hash table is built + * @param right The right table, from which the hash table is built * @param compare_nulls Controls whether null join-key values should match or not * @param stream CUDA stream used for device memory operations and kernel launches */ - hash_join(cudf::table_view const& build, + hash_join(cudf::table_view const& right, null_equality compare_nulls, rmm::cuda_stream_view stream = cudf::get_default_stream()); @@ -94,12 +94,12 @@ class hash_join { * * @throws std::invalid_argument if load_factor is not greater than 0 and less than or equal to 1 * - * @param has_nulls Flag to indicate if there exists any nulls in the `build` table or - * any `probe` table that will be used later for join + * @param has_nulls Flag to indicate if there exists any nulls in the `right` table or + * any `left` table that will be used later for join * @param load_factor The hash table occupancy ratio in (0,1]. A value of 0.5 means 50% desired * occupancy. */ - hash_join(cudf::table_view const& build, + hash_join(cudf::table_view const& right, nullable_join has_nulls, null_equality compare_nulls, double load_factor, @@ -110,22 +110,22 @@ class hash_join { * an inner join between two tables. @see cudf::inner_join(). Behavior is undefined if the * provided `output_size` is smaller than the actual output size. * - * @throw std::invalid_argument If the input probe table has nulls while this hash_join object was + * @throw std::invalid_argument If the input left table has nulls while this hash_join object was * not constructed with null check. * - * @param probe The probe table, from which the tuples are probed + * @param left The left table, from which the tuples are probed * @param output_size Optional value which allows users to specify the exact output size * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device * memory. * * @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct - * the result of performing an inner join between two tables with `build` and `probe` + * the result of performing an inner join between two tables with `left` and `right` * as the join keys . */ [[nodiscard]] std::pair>, std::unique_ptr>> - inner_join(cudf::table_view const& probe, + inner_join(cudf::table_view const& left, std::optional output_size = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const; @@ -135,22 +135,22 @@ class hash_join { * a left join between two tables. @see cudf::left_join(). Behavior is undefined if the * provided `output_size` is smaller than the actual output size. * - * @throw std::invalid_argument If the input probe table has nulls while this hash_join object was + * @throw std::invalid_argument If the input left table has nulls while this hash_join object was * not constructed with null check. * - * @param probe The probe table, from which the tuples are probed + * @param left The left table, from which the tuples are probed * @param output_size Optional value which allows users to specify the exact output size * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device * memory. * * @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct - * the result of performing a left join between two tables with `build` and `probe` + * the result of performing a left join between two tables with `left` and `right` * as the join keys. */ [[nodiscard]] std::pair>, std::unique_ptr>> - left_join(cudf::table_view const& probe, + left_join(cudf::table_view const& left, std::optional output_size = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const; @@ -160,151 +160,151 @@ class hash_join { * a full join between two tables. @see cudf::full_join(). Behavior is undefined if the * provided `output_size` is smaller than the actual output size. * - * @throw std::invalid_argument If the input probe table has nulls while this hash_join object was + * @throw std::invalid_argument If the input left table has nulls while this hash_join object was * not constructed with null check. * - * @param probe The probe table, from which the tuples are probed + * @param left The left table, from which the tuples are probed * @param output_size Optional value which allows users to specify the exact output size * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device * memory. * * @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct - * the result of performing a full join between two tables with `build` and `probe` + * the result of performing a full join between two tables with `left` and `right` * as the join keys . */ [[nodiscard]] std::pair>, std::unique_ptr>> - full_join(cudf::table_view const& probe, + full_join(cudf::table_view const& left, std::optional output_size = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const; /** * Returns the exact number of matches (rows) when performing an inner join with the specified - * probe table. + * left table. * - * @throw std::invalid_argument If the input probe table has nulls while this hash_join object was + * @throw std::invalid_argument If the input left table has nulls while this hash_join object was * not constructed with null check. * - * @param probe The probe table, from which the tuples are probed + * @param left The left table, from which the tuples are probed * @param stream CUDA stream used for device memory operations and kernel launches * * @return The exact number of output when performing an inner join between two tables with - * `build` and `probe` as the join keys . + * `left` and `right` as the join keys . */ [[nodiscard]] std::size_t inner_join_size( - cudf::table_view const& probe, rmm::cuda_stream_view stream = cudf::get_default_stream()) const; + cudf::table_view const& left, rmm::cuda_stream_view stream = cudf::get_default_stream()) const; /** - * Returns the exact number of matches (rows) when performing a left join with the specified probe + * Returns the exact number of matches (rows) when performing a left join with the specified left * table. * - * @throw std::invalid_argument If the input probe table has nulls while this hash_join object was + * @throw std::invalid_argument If the input left table has nulls while this hash_join object was * not constructed with null check. * - * @param probe The probe table, from which the tuples are probed + * @param left The left table, from which the tuples are probed * @param stream CUDA stream used for device memory operations and kernel launches * - * @return The exact number of output when performing a left join between two tables with `build` - * and `probe` as the join keys . + * @return The exact number of output when performing a left join between two tables with `left` + * and `right` as the join keys . */ [[nodiscard]] std::size_t left_join_size( - cudf::table_view const& probe, rmm::cuda_stream_view stream = cudf::get_default_stream()) const; + cudf::table_view const& left, rmm::cuda_stream_view stream = cudf::get_default_stream()) const; /** - * Returns the exact number of matches (rows) when performing a full join with the specified probe + * Returns the exact number of matches (rows) when performing a full join with the specified left * table. * - * @throw std::invalid_argument If the input probe table has nulls while this hash_join object was + * @throw std::invalid_argument If the input left table has nulls while this hash_join object was * not constructed with null check. * - * @param probe The probe table, from which the tuples are probed + * @param left The left table, from which the tuples are probed * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the intermediate table and columns' device * memory. * - * @return The exact number of output when performing a full join between two tables with `build` - * and `probe` as the join keys . + * @return The exact number of output when performing a full join between two tables with `left` + * and `right` as the join keys . */ [[nodiscard]] std::size_t full_join_size( - cudf::table_view const& probe, + cudf::table_view const& left, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const; /** - * @brief Returns context information about matches between the probe and build tables. + * @brief Returns context information about matches between the left and right tables. * - * This method computes, for each row in the probe table, how many matching rows exist in - * the build table according to inner join semantics, and returns the number of matches through a + * This method computes, for each row in the left table, how many matching rows exist in + * the right table according to inner join semantics, and returns the number of matches through a * join_match_context object. * * This is particularly useful for: * - Determining the total size of a potential join result without materializing it * - Planning partitioned join operations for large datasets * - * @throw std::invalid_argument If the input probe table has nulls while this hash_join object was + * @throw std::invalid_argument If the input left table has nulls while this hash_join object was * not constructed with null check. * - * @param probe The probe table to join with the pre-processed build table + * @param left The left table to join with the pre-processed right table * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the result device memory * - * @return A join_match_context object containing the probe table view and a device vector - * of match counts for each row in the probe table + * @return A join_match_context object containing the left table view and a device vector + * of match counts for each row in the left table */ [[nodiscard]] cudf::join_match_context inner_join_match_context( - cudf::table_view const& probe, + cudf::table_view const& left, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const; /** - * @brief Returns context information about matches between the probe and build tables. + * @brief Returns context information about matches between the left and right tables. * - * This method computes, for each row in the probe table, how many matching rows exist in - * the build table according to left join semantics, and returns the number of matches through a + * This method computes, for each row in the left table, how many matching rows exist in + * the right table according to left join semantics, and returns the number of matches through a * join_match_context object. * - * For left join, every row in the probe table will have at least one match (either with a - * matching row from the build table or with a null placeholder). + * For left join, every row in the left table will have at least one match (either with a + * matching row from the right table or with a null placeholder). * - * @throw std::invalid_argument If the input probe table has nulls while this hash_join object was + * @throw std::invalid_argument If the input left table has nulls while this hash_join object was * not constructed with null check. * - * @param probe The probe table to join with the pre-processed build table + * @param left The left table to join with the pre-processed right table * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the result device memory * - * @return A join_match_context object containing the probe table view and a device vector - * of match counts for each row in the probe table + * @return A join_match_context object containing the left table view and a device vector + * of match counts for each row in the left table */ [[nodiscard]] cudf::join_match_context left_join_match_context( - cudf::table_view const& probe, + cudf::table_view const& left, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const; /** - * @brief Returns context information about matches between the probe and build tables. + * @brief Returns context information about matches between the left and right tables. * - * This method computes, for each row in the probe table, how many matching rows exist in - * the build table according to full join semantics, and returns the number of matches through a + * This method computes, for each row in the left table, how many matching rows exist in + * the right table according to full join semantics, and returns the number of matches through a * join_match_context object. * - * For full join, this includes matches for probe table rows, and the result may need to be - * combined with unmatched rows from the build table to get the complete picture. + * For full join, this includes matches for left table rows, and the result may need to be + * combined with unmatched rows from the right table to get the complete picture. * - * @throw std::invalid_argument If the input probe table has nulls while this hash_join object was + * @throw std::invalid_argument If the input left table has nulls while this hash_join object was * not constructed with null check. * - * @param probe The probe table to join with the pre-processed build table + * @param left The left table to join with the pre-processed right table * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the result device memory * - * @return A join_match_context object containing the probe table view and a device vector - * of match counts for each row in the probe table + * @return A join_match_context object containing the left table view and a device vector + * of match counts for each row in the left table */ [[nodiscard]] cudf::join_match_context full_join_match_context( - cudf::table_view const& probe, + cudf::table_view const& left, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const; diff --git a/cpp/src/join/distinct_hash_join.cu b/cpp/src/join/distinct_hash_join.cu index 36b057d33a4..718f96bf29d 100644 --- a/cpp/src/join/distinct_hash_join.cu +++ b/cpp/src/join/distinct_hash_join.cu @@ -103,7 +103,7 @@ struct output_fn { * @param hash_table The hash table to search in * @param iter Iterator over hash values * @param d_equal Equality comparator - * @param probe The probe table + * @param left The left table * @param hasher Hash function * @param nulls_equal Null equality setting * @param found_begin Output iterator for found indices @@ -117,27 +117,27 @@ template {0}; auto const row_bitmask = - cudf::detail::bitmask_and(probe, stream, cudf::get_current_device_resource_ref()).first; + cudf::detail::bitmask_and(left, stream, cudf::get_current_device_resource_ref()).first; auto const pred = cudf::detail::row_is_valid{reinterpret_cast(row_bitmask.data())}; hash_table.find_if_async(iter, - iter + probe_table_num_rows, + iter + left_table_num_rows, stencil, pred, d_equal, @@ -149,22 +149,22 @@ void find_matches_in_hash_table(HashTableType const& hash_table, } // namespace -distinct_hash_join::distinct_hash_join(cudf::table_view const& build, +distinct_hash_join::distinct_hash_join(cudf::table_view const& right, cudf::null_equality compare_nulls, rmm::cuda_stream_view stream) - : distinct_hash_join{build, compare_nulls, CUCO_DESIRED_LOAD_FACTOR, stream} + : distinct_hash_join{right, compare_nulls, CUCO_DESIRED_LOAD_FACTOR, stream} { } -distinct_hash_join::distinct_hash_join(cudf::table_view const& build, +distinct_hash_join::distinct_hash_join(cudf::table_view const& right, cudf::null_equality compare_nulls, double load_factor, rmm::cuda_stream_view stream) - : _has_nested_columns{cudf::has_nested_columns(build)}, + : _has_nested_columns{cudf::has_nested_columns(right)}, _nulls_equal{compare_nulls}, - _build{build}, - _preprocessed_build{cudf::detail::row::equality::preprocessed_table::create(_build, stream)}, - _hash_table{cuco::extent{static_cast(build.num_rows())}, + _right{right}, + _preprocessed_right{cudf::detail::row::equality::preprocessed_table::create(_right, stream)}, + _hash_table{cuco::extent{static_cast(right.num_rows())}, load_factor, cuco::empty_key{cuco::pair{std::numeric_limits::max(), rhs_index_type{cudf::JoinNoMatch}}}, @@ -176,41 +176,41 @@ distinct_hash_join::distinct_hash_join(cudf::table_view const& build, stream.value()} { CUDF_FUNC_RANGE(); - CUDF_EXPECTS(0 != this->_build.num_columns(), "Hash join build table is empty"); + CUDF_EXPECTS(0 != this->_right.num_columns(), "Hash join right table is empty"); CUDF_EXPECTS(load_factor > 0 && load_factor <= 1, "Invalid load factor: must be greater than 0 and less than or equal to 1.", std::invalid_argument); - size_type const build_table_num_rows{_build.num_rows()}; + size_type const right_table_num_rows{_right.num_rows()}; - if (build_table_num_rows == 0) { return; } + if (right_table_num_rows == 0) { return; } auto const build_hash_table = [&](auto iter) { - if (this->_nulls_equal == cudf::null_equality::EQUAL or (not cudf::nullable(build))) { - this->_hash_table.insert_async(iter, iter + build_table_num_rows, stream.value()); + if (this->_nulls_equal == cudf::null_equality::EQUAL or (not cudf::nullable(right))) { + this->_hash_table.insert_async(iter, iter + right_table_num_rows, stream.value()); } else { auto stencil = cuda::counting_iterator{0}; auto const row_bitmask = - cudf::detail::bitmask_and(_build, stream, cudf::get_current_device_resource_ref()).first; + cudf::detail::bitmask_and(_right, stream, cudf::get_current_device_resource_ref()).first; auto const pred = cudf::detail::row_is_valid{reinterpret_cast(row_bitmask.data())}; // insert valid rows this->_hash_table.insert_if_async( - iter, iter + build_table_num_rows, stencil, pred, stream.value()); + iter, iter + right_table_num_rows, stencil, pred, stream.value()); } }; - if (cudf::detail::is_primitive_row_op_compatible(_build)) { + if (cudf::detail::is_primitive_row_op_compatible(_right)) { auto const d_hasher = cudf::detail::row::primitive::row_hasher{nullate::DYNAMIC{has_nulls}, - this->_preprocessed_build}; + this->_preprocessed_right}; auto const iter = cudf::detail::make_counting_transform_iterator( 0, primitive_keys_fn{d_hasher}); build_hash_table(iter); } else { - auto const row_hasher = detail::row::hash::row_hasher{this->_preprocessed_build}; + auto const row_hasher = detail::row::hash::row_hasher{this->_preprocessed_right}; auto const d_hasher = row_hasher.device_hasher(nullate::DYNAMIC{has_nulls}); auto const iter = @@ -222,54 +222,54 @@ distinct_hash_join::distinct_hash_join(cudf::table_view const& build, std::pair>, std::unique_ptr>> -distinct_hash_join::inner_join(cudf::table_view const& probe, +distinct_hash_join::inner_join(cudf::table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { cudf::scoped_range range{"distinct_hash_join::inner_join"}; - size_type const probe_table_num_rows{probe.num_rows()}; + size_type const left_table_num_rows{left.num_rows()}; // If output size is zero, return immediately - if (probe_table_num_rows == 0) { + if (left_table_num_rows == 0) { return std::pair(std::make_unique>(0, stream, mr), std::make_unique>(0, stream, mr)); } - auto build_indices = - std::make_unique>(probe_table_num_rows, stream, mr); - auto probe_indices = - std::make_unique>(probe_table_num_rows, stream, mr); + auto right_indices = + std::make_unique>(left_table_num_rows, stream, mr); + auto left_indices = + std::make_unique>(left_table_num_rows, stream, mr); - auto found_indices = rmm::device_uvector(probe_table_num_rows, stream); + auto found_indices = rmm::device_uvector(left_table_num_rows, stream); auto const found_begin = thrust::make_transform_output_iterator(found_indices.begin(), output_fn{}); - auto preprocessed_probe = cudf::detail::row::equality::preprocessed_table::create(probe, stream); - if (cudf::detail::is_primitive_row_op_compatible(_build)) { + auto preprocessed_left = cudf::detail::row::equality::preprocessed_table::create(left, stream); + if (cudf::detail::is_primitive_row_op_compatible(_right)) { auto const d_hasher = - cudf::detail::row::primitive::row_hasher{nullate::DYNAMIC{has_nulls}, preprocessed_probe}; + cudf::detail::row::primitive::row_hasher{nullate::DYNAMIC{has_nulls}, preprocessed_left}; auto const d_equal = cudf::detail::row::primitive::row_equality_comparator{ - nullate::DYNAMIC{has_nulls}, preprocessed_probe, _preprocessed_build, _nulls_equal}; + nullate::DYNAMIC{has_nulls}, preprocessed_left, _preprocessed_right, _nulls_equal}; auto const iter = cudf::detail::make_counting_transform_iterator( 0, primitive_keys_fn{d_hasher}); find_matches_in_hash_table(this->_hash_table, iter, primitive_comparator_adapter{d_equal}, - probe, + left, hasher{}, _nulls_equal, found_begin, stream); } else { auto const two_table_equal = - cudf::detail::row::equality::two_table_comparator(preprocessed_probe, _preprocessed_build); + cudf::detail::row::equality::two_table_comparator(preprocessed_left, _preprocessed_right); - auto const probe_row_hasher = cudf::detail::row::hash::row_hasher{preprocessed_probe}; - auto const d_probe_hasher = probe_row_hasher.device_hasher(nullate::DYNAMIC{has_nulls}); - auto const iter = cudf::detail::make_counting_transform_iterator( - 0, build_keys_fn{d_probe_hasher}); + auto const left_row_hasher = cudf::detail::row::hash::row_hasher{preprocessed_left}; + auto const d_left_hasher = left_row_hasher.device_hasher(nullate::DYNAMIC{has_nulls}); + auto const iter = cudf::detail::make_counting_transform_iterator( + 0, build_keys_fn{d_left_hasher}); if (_has_nested_columns) { auto const device_comparator = @@ -277,7 +277,7 @@ distinct_hash_join::inner_join(cudf::table_view const& probe, find_matches_in_hash_table(this->_hash_table, iter, comparator_adapter{device_comparator}, - probe, + left, hasher{}, _nulls_equal, found_begin, @@ -288,7 +288,7 @@ distinct_hash_join::inner_join(cudf::table_view const& probe, find_matches_in_hash_table(this->_hash_table, iter, comparator_adapter{device_comparator}, - probe, + left, hasher{}, _nulls_equal, found_begin, @@ -303,10 +303,10 @@ distinct_hash_join::inner_join(cudf::table_view const& probe, return cuda::std::tuple{*(found_iter + idx), idx}; })); auto const output_begin = - thrust::make_zip_iterator(build_indices->begin(), probe_indices->begin()); + thrust::make_zip_iterator(right_indices->begin(), left_indices->begin()); auto const output_end = cudf::detail::copy_if(tuple_iter, - tuple_iter + probe_table_num_rows, + tuple_iter + left_table_num_rows, found_indices.begin(), output_begin, cuda::proclaim_return_type( @@ -314,38 +314,38 @@ distinct_hash_join::inner_join(cudf::table_view const& probe, stream); auto const actual_size = std::distance(output_begin, output_end); - build_indices->resize(actual_size, stream); - probe_indices->resize(actual_size, stream); + right_indices->resize(actual_size, stream); + left_indices->resize(actual_size, stream); - return {std::move(probe_indices), std::move(build_indices)}; + return {std::move(left_indices), std::move(right_indices)}; } std::unique_ptr> distinct_hash_join::left_join( - cudf::table_view const& probe, + cudf::table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { cudf::scoped_range range{"distinct_hash_join::left_join"}; - size_type const probe_table_num_rows{probe.num_rows()}; + size_type const left_table_num_rows{left.num_rows()}; // If output size is zero, return empty - if (probe_table_num_rows == 0) { + if (left_table_num_rows == 0) { return std::make_unique>(0, stream, mr); } - auto build_indices = - std::make_unique>(probe_table_num_rows, stream, mr); + auto right_indices = + std::make_unique>(left_table_num_rows, stream, mr); auto const output_begin = - thrust::make_transform_output_iterator(build_indices->begin(), output_fn{}); + thrust::make_transform_output_iterator(right_indices->begin(), output_fn{}); - auto preprocessed_probe = cudf::detail::row::equality::preprocessed_table::create(probe, stream); + auto preprocessed_left = cudf::detail::row::equality::preprocessed_table::create(left, stream); - if (cudf::detail::is_primitive_row_op_compatible(_build)) { + if (cudf::detail::is_primitive_row_op_compatible(_right)) { auto const d_hasher = - cudf::detail::row::primitive::row_hasher{nullate::DYNAMIC{has_nulls}, preprocessed_probe}; + cudf::detail::row::primitive::row_hasher{nullate::DYNAMIC{has_nulls}, preprocessed_left}; auto const d_equal = cudf::detail::row::primitive::row_equality_comparator{ - nullate::DYNAMIC{has_nulls}, preprocessed_probe, _preprocessed_build, _nulls_equal}; + nullate::DYNAMIC{has_nulls}, preprocessed_left, _preprocessed_right, _nulls_equal}; auto const iter = cudf::detail::make_counting_transform_iterator( 0, primitive_keys_fn{d_hasher}); @@ -353,26 +353,26 @@ std::unique_ptr> distinct_hash_join::left_join( find_matches_in_hash_table(this->_hash_table, iter, primitive_comparator_adapter{d_equal}, - probe, + left, hasher{}, _nulls_equal, output_begin, stream); } else { - // If build table is empty, return probe table - if (this->_build.num_rows() == 0) { + // If right table is empty, return left table + if (this->_right.num_rows() == 0) { thrust::fill(rmm::exec_policy_nosync(stream, cudf::get_current_device_resource_ref()), - build_indices->begin(), - build_indices->end(), + right_indices->begin(), + right_indices->end(), cudf::JoinNoMatch); } else { auto const two_table_equal = - cudf::detail::row::equality::two_table_comparator(preprocessed_probe, _preprocessed_build); + cudf::detail::row::equality::two_table_comparator(preprocessed_left, _preprocessed_right); - auto const probe_row_hasher = cudf::detail::row::hash::row_hasher{preprocessed_probe}; - auto const d_probe_hasher = probe_row_hasher.device_hasher(nullate::DYNAMIC{has_nulls}); - auto const iter = cudf::detail::make_counting_transform_iterator( - 0, build_keys_fn{d_probe_hasher}); + auto const left_row_hasher = cudf::detail::row::hash::row_hasher{preprocessed_left}; + auto const d_left_hasher = left_row_hasher.device_hasher(nullate::DYNAMIC{has_nulls}); + auto const iter = cudf::detail::make_counting_transform_iterator( + 0, build_keys_fn{d_left_hasher}); if (_has_nested_columns) { auto const device_comparator = @@ -380,7 +380,7 @@ std::unique_ptr> distinct_hash_join::left_join( find_matches_in_hash_table(this->_hash_table, iter, comparator_adapter{device_comparator}, - probe, + left, hasher{}, _nulls_equal, output_begin, @@ -391,7 +391,7 @@ std::unique_ptr> distinct_hash_join::left_join( find_matches_in_hash_table(this->_hash_table, iter, comparator_adapter{device_comparator}, - probe, + left, hasher{}, _nulls_equal, output_begin, @@ -399,34 +399,34 @@ std::unique_ptr> distinct_hash_join::left_join( } } } - return build_indices; + return right_indices; } } // namespace detail distinct_hash_join::~distinct_hash_join() = default; -distinct_hash_join::distinct_hash_join(cudf::table_view const& build, +distinct_hash_join::distinct_hash_join(cudf::table_view const& right, null_equality compare_nulls, double load_factor, rmm::cuda_stream_view stream) - : _impl{std::make_unique(build, compare_nulls, load_factor, stream)} + : _impl{std::make_unique(right, compare_nulls, load_factor, stream)} { } std::pair>, std::unique_ptr>> -distinct_hash_join::inner_join(cudf::table_view const& probe, +distinct_hash_join::inner_join(cudf::table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { - return _impl->inner_join(probe, stream, mr); + return _impl->inner_join(left, stream, mr); } std::unique_ptr> distinct_hash_join::left_join( - cudf::table_view const& probe, + cudf::table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { - return _impl->left_join(probe, stream, mr); + return _impl->left_join(left, stream, mr); } } // namespace cudf diff --git a/cpp/src/join/hash_join/common.cuh b/cpp/src/join/hash_join/common.cuh index f523564f20f..d80f981099e 100644 --- a/cpp/src/join/hash_join/common.cuh +++ b/cpp/src/join/hash_join/common.cuh @@ -24,17 +24,17 @@ using hash_table_t = typename cudf::detail::hash_join::imp bool is_trivial_join(table_view const& left, table_view const& right, join_kind join_type); -void validate_hash_join_probe(table_view const& build, table_view const& probe, bool has_nulls); +void validate_hash_join_probe(table_view const& right, table_view const& left, bool has_nulls); std::unique_ptr> make_join_match_counts( - table_view const& build, - std::shared_ptr const& preprocessed_build, + table_view const& right, + std::shared_ptr const& preprocessed_right, cudf::detail::hash_table_t const& hash_table, bool is_empty, bool has_nulls, null_equality compare_nulls, join_kind join, - table_view const& probe, + table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); diff --git a/cpp/src/join/hash_join/dispatch.cuh b/cpp/src/join/hash_join/dispatch.cuh index 43f3c6edf3e..2092817e293 100644 --- a/cpp/src/join/hash_join/dispatch.cuh +++ b/cpp/src/join/hash_join/dispatch.cuh @@ -42,7 +42,7 @@ class pair_equal { }; /** - * @brief Extracts the build-side row index from a cuco hash table slot. + * @brief Extracts the right-side row index from a cuco hash table slot. */ struct output_fn { __device__ constexpr cudf::size_type operator()( @@ -75,34 +75,34 @@ class primitive_pair_equal { template decltype(auto) dispatch_join_comparator( - table_view const& build_table, - table_view const& probe_table, - std::shared_ptr const& preprocessed_build, - std::shared_ptr const& preprocessed_probe, + table_view const& right_table, + table_view const& left_table, + std::shared_ptr const& preprocessed_right, + std::shared_ptr const& preprocessed_left, bool has_nulls, null_equality compare_nulls, Fn&& fn) { - auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls}; + auto const left_nulls = cudf::nullate::DYNAMIC{has_nulls}; - if (cudf::detail::is_primitive_row_op_compatible(build_table)) { - auto const d_hasher = cudf::detail::row::primitive::row_hasher{probe_nulls, preprocessed_probe}; + if (cudf::detail::is_primitive_row_op_compatible(right_table)) { + auto const d_hasher = cudf::detail::row::primitive::row_hasher{left_nulls, preprocessed_left}; auto const d_equal = cudf::detail::row::primitive::row_equality_comparator{ - probe_nulls, preprocessed_probe, preprocessed_build, compare_nulls}; + left_nulls, preprocessed_left, preprocessed_right, compare_nulls}; return std::forward(fn)(primitive_pair_equal{d_equal}, d_hasher); } auto const d_hasher = - cudf::detail::row::hash::row_hasher{preprocessed_probe}.device_hasher(probe_nulls); + cudf::detail::row::hash::row_hasher{preprocessed_left}.device_hasher(left_nulls); auto const row_comparator = - cudf::detail::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; + cudf::detail::row::equality::two_table_comparator{preprocessed_left, preprocessed_right}; - if (cudf::detail::has_nested_columns(probe_table)) { - auto const d_equal = row_comparator.equal_to(probe_nulls, compare_nulls); + if (cudf::detail::has_nested_columns(left_table)) { + auto const d_equal = row_comparator.equal_to(left_nulls, compare_nulls); return std::forward(fn)(pair_equal{d_equal}, d_hasher); } - auto const d_equal = row_comparator.equal_to(probe_nulls, compare_nulls); + auto const d_equal = row_comparator.equal_to(left_nulls, compare_nulls); return std::forward(fn)(pair_equal{d_equal}, d_hasher); } diff --git a/cpp/src/join/hash_join/full_join_match_context.cpp b/cpp/src/join/hash_join/full_join_match_context.cpp index b2a6f063f22..f065064e7f6 100644 --- a/cpp/src/join/hash_join/full_join_match_context.cpp +++ b/cpp/src/join/hash_join/full_join_match_context.cpp @@ -12,17 +12,16 @@ namespace cudf::detail { template cudf::join_match_context hash_join::full_join_match_context( - cudf::table_view const& probe, + cudf::table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { cudf::scoped_range range{"hash_join::full_join_match_context"}; - return cudf::join_match_context{probe, - make_match_counts(join_kind::FULL_JOIN, probe, stream, mr)}; + return cudf::join_match_context{left, make_match_counts(join_kind::FULL_JOIN, left, stream, mr)}; } template cudf::join_match_context cudf::hash_join::impl_type::full_join_match_context( - cudf::table_view const& probe, + cudf::table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; diff --git a/cpp/src/join/hash_join/full_join_retrieve.cu b/cpp/src/join/hash_join/full_join_retrieve.cu index cf0b33e6ef7..7830b6aea6d 100644 --- a/cpp/src/join/hash_join/full_join_retrieve.cu +++ b/cpp/src/join/hash_join/full_join_retrieve.cu @@ -10,17 +10,17 @@ namespace cudf::detail { template std::pair>, std::unique_ptr>> -hash_join::full_join(cudf::table_view const& probe, +hash_join::full_join(cudf::table_view const& left, std::optional output_size, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { - return this->template join_retrieve(probe, output_size, stream, mr); + return this->template join_retrieve(left, output_size, stream, mr); } template std::pair>, std::unique_ptr>> -hash_join::full_join(cudf::table_view const& probe, +hash_join::full_join(cudf::table_view const& left, std::optional output_size, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; diff --git a/cpp/src/join/hash_join/full_join_size.cu b/cpp/src/join/hash_join/full_join_size.cu index b892289d277..b73f19b4240 100644 --- a/cpp/src/join/hash_join/full_join_size.cu +++ b/cpp/src/join/hash_join/full_join_size.cu @@ -8,15 +8,15 @@ namespace cudf::detail { template -std::size_t hash_join::full_join_size(cudf::table_view const& probe, +std::size_t hash_join::full_join_size(cudf::table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { - return this->template join_size(probe, stream, mr); + return this->template join_size(left, stream, mr); } template std::size_t hash_join::full_join_size( - cudf::table_view const& probe, + cudf::table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; diff --git a/cpp/src/join/hash_join/full_join_size_impl.cu b/cpp/src/join/hash_join/full_join_size_impl.cu index 4ce0f2b57aa..308bfa62da4 100644 --- a/cpp/src/join/hash_join/full_join_size_impl.cu +++ b/cpp/src/join/hash_join/full_join_size_impl.cu @@ -53,20 +53,20 @@ std::size_t compute_left_join_complement_size(cudf::device_span } // namespace std::size_t get_full_join_size( - cudf::table_view const& build_table, - cudf::table_view const& probe_table, - std::shared_ptr const& preprocessed_build, - std::shared_ptr const& preprocessed_probe, + cudf::table_view const& right_table, + cudf::table_view const& left_table, + std::shared_ptr const& preprocessed_right, + std::shared_ptr const& preprocessed_left, cudf::detail::hash_table_t const& hash_table, bool has_nulls, null_equality compare_nulls, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - std::size_t join_size = compute_join_output_size(build_table, - probe_table, - preprocessed_build, - preprocessed_probe, + std::size_t join_size = compute_join_output_size(right_table, + left_table, + preprocessed_right, + preprocessed_left, hash_table, has_nulls, compare_nulls, @@ -79,18 +79,18 @@ std::size_t get_full_join_size( auto const out_build_begin = thrust::make_transform_output_iterator(right_indices->begin(), output_fn{}); - retrieve_left_join_build_indices(build_table, - probe_table, - preprocessed_build, - preprocessed_probe, + retrieve_left_join_build_indices(right_table, + left_table, + preprocessed_right, + preprocessed_left, hash_table, has_nulls, compare_nulls, out_build_begin, stream); - auto const left_table_row_count = probe_table.num_rows(); - auto const right_table_row_count = build_table.num_rows(); + auto const left_table_row_count = left_table.num_rows(); + auto const right_table_row_count = right_table.num_rows(); return join_size + compute_left_join_complement_size( *right_indices, left_table_row_count, right_table_row_count, stream); diff --git a/cpp/src/join/hash_join/hash_join.cu b/cpp/src/join/hash_join/hash_join.cu index 28448cbb677..5c7679ae588 100644 --- a/cpp/src/join/hash_join/hash_join.cu +++ b/cpp/src/join/hash_join/hash_join.cu @@ -43,81 +43,81 @@ bool is_trivial_join(table_view const& left, table_view const& right, join_kind return false; } -void validate_hash_join_probe(table_view const& build, table_view const& probe, bool has_nulls) +void validate_hash_join_probe(table_view const& right, table_view const& left, bool has_nulls) { - CUDF_EXPECTS(0 != probe.num_columns(), "Hash join probe table is empty", std::invalid_argument); - CUDF_EXPECTS(build.num_columns() == probe.num_columns(), + CUDF_EXPECTS(0 != left.num_columns(), "Hash join left table is empty", std::invalid_argument); + CUDF_EXPECTS(right.num_columns() == left.num_columns(), "Mismatch in number of columns to be joined on", std::invalid_argument); - CUDF_EXPECTS(has_nulls || !cudf::has_nested_nulls(probe), - "Probe table has nulls while build table was not hashed with null check.", + CUDF_EXPECTS(has_nulls || !cudf::has_nested_nulls(left), + "Left table has nulls while right table was not hashed with null check.", std::invalid_argument); - CUDF_EXPECTS(cudf::have_same_types(build, probe), + CUDF_EXPECTS(cudf::have_same_types(right, left), "Mismatch in joining column data types", cudf::data_type_error); } namespace { void build_hash_join( - cudf::table_view const& build, - std::shared_ptr const& preprocessed_build, + cudf::table_view const& right, + std::shared_ptr const& preprocessed_right, cudf::detail::hash_table_t& hash_table, bool has_nested_nulls, null_equality nulls_equal, [[maybe_unused]] bitmask_type const* bitmask, rmm::cuda_stream_view stream) { - CUDF_EXPECTS(0 != build.num_columns(), "Selected build dataset is empty", std::invalid_argument); - CUDF_EXPECTS(0 != build.num_rows(), "Build side table has no rows", std::invalid_argument); + CUDF_EXPECTS(0 != right.num_columns(), "Selected right dataset is empty", std::invalid_argument); + CUDF_EXPECTS(0 != right.num_rows(), "Right side table has no rows", std::invalid_argument); - auto insert_rows = [&](auto const& build, auto const& d_hasher) { + auto insert_rows = [&](auto const& right, auto const& d_hasher) { auto const iter = cudf::detail::make_counting_transform_iterator(0, pair_fn{d_hasher}); - if (nulls_equal == cudf::null_equality::EQUAL or not nullable(build)) { - hash_table.insert(iter, iter + build.num_rows(), stream.value()); + if (nulls_equal == cudf::null_equality::EQUAL or not nullable(right)) { + hash_table.insert(iter, iter + right.num_rows(), stream.value()); } else { auto const stencil = cuda::counting_iterator{0}; auto const pred = row_is_valid{bitmask}; - hash_table.insert_if(iter, iter + build.num_rows(), stencil, pred, stream.value()); + hash_table.insert_if(iter, iter + right.num_rows(), stencil, pred, stream.value()); } }; auto const nulls = nullate::DYNAMIC{has_nested_nulls}; - if (cudf::detail::is_primitive_row_op_compatible(build)) { - auto const d_hasher = cudf::detail::row::primitive::row_hasher{nulls, preprocessed_build}; + if (cudf::detail::is_primitive_row_op_compatible(right)) { + auto const d_hasher = cudf::detail::row::primitive::row_hasher{nulls, preprocessed_right}; - insert_rows(build, d_hasher); + insert_rows(right, d_hasher); } else { - auto const row_hash = detail::row::hash::row_hasher{preprocessed_build}; + auto const row_hash = detail::row::hash::row_hasher{preprocessed_right}; auto const d_hasher = row_hash.device_hasher(nulls); - insert_rows(build, d_hasher); + insert_rows(right, d_hasher); } } } // namespace template -hash_join::hash_join(cudf::table_view const& build, +hash_join::hash_join(cudf::table_view const& right, bool has_nulls, cudf::null_equality compare_nulls, rmm::cuda_stream_view stream) - : hash_join{build, has_nulls, compare_nulls, CUCO_DESIRED_LOAD_FACTOR, stream} + : hash_join{right, has_nulls, compare_nulls, CUCO_DESIRED_LOAD_FACTOR, stream} { } template -hash_join::hash_join(cudf::table_view const& build, +hash_join::hash_join(cudf::table_view const& right, bool has_nulls, cudf::null_equality compare_nulls, double load_factor, rmm::cuda_stream_view stream) : _has_nulls(has_nulls), - _is_empty{build.num_rows() == 0}, + _is_empty{right.num_rows() == 0}, _nulls_equal{compare_nulls}, _impl{std::make_unique(impl{typename impl::hash_table_t{ - cuco::extent{static_cast(build.num_rows())}, + cuco::extent{static_cast(right.num_rows())}, load_factor, cuco::empty_key{cuco::pair{std::numeric_limits::max(), cudf::JoinNoMatch}}, {}, @@ -126,11 +126,11 @@ hash_join::hash_join(cudf::table_view const& build, {}, rmm::mr::polymorphic_allocator{}, stream.value()}})}, - _build{build}, - _preprocessed_build{cudf::detail::row::equality::preprocessed_table::create(_build, stream)} + _right{right}, + _preprocessed_right{cudf::detail::row::equality::preprocessed_table::create(_right, stream)} { CUDF_FUNC_RANGE(); - CUDF_EXPECTS(0 != build.num_columns(), "Hash join build table is empty", std::invalid_argument); + CUDF_EXPECTS(0 != right.num_columns(), "Hash join right table is empty", std::invalid_argument); CUDF_EXPECTS(load_factor > 0 && load_factor <= 1, "Invalid load factor: must be greater than 0 and less than or equal to 1.", std::invalid_argument); @@ -138,9 +138,9 @@ hash_join::hash_join(cudf::table_view const& build, if (_is_empty) { return; } auto const row_bitmask = - cudf::detail::bitmask_and(build, stream, cudf::get_current_device_resource_ref()).first; - cudf::detail::build_hash_join(_build, - _preprocessed_build, + cudf::detail::bitmask_and(right, stream, cudf::get_current_device_resource_ref()).first; + cudf::detail::build_hash_join(_right, + _preprocessed_right, _impl->_hash_table, _has_nulls, _nulls_equal, @@ -148,12 +148,12 @@ hash_join::hash_join(cudf::table_view const& build, stream); } -template hash_join::hash_join(cudf::table_view const& build, +template hash_join::hash_join(cudf::table_view const& right, bool has_nulls, cudf::null_equality compare_nulls, rmm::cuda_stream_view stream); -template hash_join::hash_join(cudf::table_view const& build, +template hash_join::hash_join(cudf::table_view const& right, bool has_nulls, cudf::null_equality compare_nulls, double load_factor, @@ -170,93 +170,93 @@ namespace cudf { hash_join::~hash_join() = default; -hash_join::hash_join(cudf::table_view const& build, +hash_join::hash_join(cudf::table_view const& right, null_equality compare_nulls, rmm::cuda_stream_view stream) : hash_join( - build, nullable_join::YES, compare_nulls, cudf::detail::CUCO_DESIRED_LOAD_FACTOR, stream) + right, nullable_join::YES, compare_nulls, cudf::detail::CUCO_DESIRED_LOAD_FACTOR, stream) { } -hash_join::hash_join(cudf::table_view const& build, +hash_join::hash_join(cudf::table_view const& right, nullable_join has_nulls, null_equality compare_nulls, double load_factor, rmm::cuda_stream_view stream) : _impl{std::make_unique( - build, has_nulls == nullable_join::YES, compare_nulls, load_factor, stream)} + right, has_nulls == nullable_join::YES, compare_nulls, load_factor, stream)} { } std::pair>, std::unique_ptr>> -hash_join::inner_join(cudf::table_view const& probe, +hash_join::inner_join(cudf::table_view const& left, std::optional output_size, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { - return _impl->inner_join(probe, output_size, stream, mr); + return _impl->inner_join(left, output_size, stream, mr); } std::pair>, std::unique_ptr>> -hash_join::left_join(cudf::table_view const& probe, +hash_join::left_join(cudf::table_view const& left, std::optional output_size, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { - return _impl->left_join(probe, output_size, stream, mr); + return _impl->left_join(left, output_size, stream, mr); } std::pair>, std::unique_ptr>> -hash_join::full_join(cudf::table_view const& probe, +hash_join::full_join(cudf::table_view const& left, std::optional output_size, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { - return _impl->full_join(probe, output_size, stream, mr); + return _impl->full_join(left, output_size, stream, mr); } -std::size_t hash_join::inner_join_size(cudf::table_view const& probe, +std::size_t hash_join::inner_join_size(cudf::table_view const& left, rmm::cuda_stream_view stream) const { - return _impl->inner_join_size(probe, stream); + return _impl->inner_join_size(left, stream); } -std::size_t hash_join::left_join_size(cudf::table_view const& probe, +std::size_t hash_join::left_join_size(cudf::table_view const& left, rmm::cuda_stream_view stream) const { - return _impl->left_join_size(probe, stream); + return _impl->left_join_size(left, stream); } -std::size_t hash_join::full_join_size(cudf::table_view const& probe, +std::size_t hash_join::full_join_size(cudf::table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { - return _impl->full_join_size(probe, stream, mr); + return _impl->full_join_size(left, stream, mr); } cudf::join_match_context hash_join::inner_join_match_context( - cudf::table_view const& probe, + cudf::table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { - return _impl->inner_join_match_context(probe, stream, mr); + return _impl->inner_join_match_context(left, stream, mr); } -cudf::join_match_context hash_join::left_join_match_context(cudf::table_view const& probe, +cudf::join_match_context hash_join::left_join_match_context(cudf::table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { - return _impl->left_join_match_context(probe, stream, mr); + return _impl->left_join_match_context(left, stream, mr); } -cudf::join_match_context hash_join::full_join_match_context(cudf::table_view const& probe, +cudf::join_match_context hash_join::full_join_match_context(cudf::table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { - return _impl->full_join_match_context(probe, stream, mr); + return _impl->full_join_match_context(left, stream, mr); } } // namespace cudf diff --git a/cpp/src/join/hash_join/inner_join_match_context.cpp b/cpp/src/join/hash_join/inner_join_match_context.cpp index 93b2ee7da5a..cf107c7a422 100644 --- a/cpp/src/join/hash_join/inner_join_match_context.cpp +++ b/cpp/src/join/hash_join/inner_join_match_context.cpp @@ -12,17 +12,16 @@ namespace cudf::detail { template cudf::join_match_context hash_join::inner_join_match_context( - cudf::table_view const& probe, + cudf::table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { cudf::scoped_range range{"hash_join::inner_join_match_context"}; - return cudf::join_match_context{probe, - make_match_counts(join_kind::INNER_JOIN, probe, stream, mr)}; + return cudf::join_match_context{left, make_match_counts(join_kind::INNER_JOIN, left, stream, mr)}; } template cudf::join_match_context cudf::hash_join::impl_type::inner_join_match_context( - cudf::table_view const& probe, + cudf::table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; diff --git a/cpp/src/join/hash_join/inner_join_retrieve.cu b/cpp/src/join/hash_join/inner_join_retrieve.cu index aad679d216c..57386367a81 100644 --- a/cpp/src/join/hash_join/inner_join_retrieve.cu +++ b/cpp/src/join/hash_join/inner_join_retrieve.cu @@ -10,17 +10,17 @@ namespace cudf::detail { template std::pair>, std::unique_ptr>> -hash_join::inner_join(cudf::table_view const& probe, +hash_join::inner_join(cudf::table_view const& left, std::optional output_size, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { - return this->template join_retrieve(probe, output_size, stream, mr); + return this->template join_retrieve(left, output_size, stream, mr); } template std::pair>, std::unique_ptr>> -hash_join::inner_join(cudf::table_view const& probe, +hash_join::inner_join(cudf::table_view const& left, std::optional output_size, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; diff --git a/cpp/src/join/hash_join/inner_join_size.cu b/cpp/src/join/hash_join/inner_join_size.cu index 3bcd250a80c..2fedd93593e 100644 --- a/cpp/src/join/hash_join/inner_join_size.cu +++ b/cpp/src/join/hash_join/inner_join_size.cu @@ -8,13 +8,13 @@ namespace cudf::detail { template -std::size_t hash_join::inner_join_size(cudf::table_view const& probe, +std::size_t hash_join::inner_join_size(cudf::table_view const& left, rmm::cuda_stream_view stream) const { - return this->template join_size(probe, stream); + return this->template join_size(left, stream); } template std::size_t hash_join::inner_join_size( - cudf::table_view const& probe, rmm::cuda_stream_view stream) const; + cudf::table_view const& left, rmm::cuda_stream_view stream) const; } // namespace cudf::detail diff --git a/cpp/src/join/hash_join/left_join_match_context.cpp b/cpp/src/join/hash_join/left_join_match_context.cpp index 86cc963ec3f..59bb6255a79 100644 --- a/cpp/src/join/hash_join/left_join_match_context.cpp +++ b/cpp/src/join/hash_join/left_join_match_context.cpp @@ -12,17 +12,16 @@ namespace cudf::detail { template cudf::join_match_context hash_join::left_join_match_context( - cudf::table_view const& probe, + cudf::table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { cudf::scoped_range range{"hash_join::left_join_match_context"}; - return cudf::join_match_context{probe, - make_match_counts(join_kind::LEFT_JOIN, probe, stream, mr)}; + return cudf::join_match_context{left, make_match_counts(join_kind::LEFT_JOIN, left, stream, mr)}; } template cudf::join_match_context cudf::hash_join::impl_type::left_join_match_context( - cudf::table_view const& probe, + cudf::table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; diff --git a/cpp/src/join/hash_join/left_join_retrieve.cu b/cpp/src/join/hash_join/left_join_retrieve.cu index 5737703aba8..d84eb05cd4c 100644 --- a/cpp/src/join/hash_join/left_join_retrieve.cu +++ b/cpp/src/join/hash_join/left_join_retrieve.cu @@ -10,17 +10,17 @@ namespace cudf::detail { template std::pair>, std::unique_ptr>> -hash_join::left_join(cudf::table_view const& probe, +hash_join::left_join(cudf::table_view const& left, std::optional output_size, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { - return this->template join_retrieve(probe, output_size, stream, mr); + return this->template join_retrieve(left, output_size, stream, mr); } template std::pair>, std::unique_ptr>> -hash_join::left_join(cudf::table_view const& probe, +hash_join::left_join(cudf::table_view const& left, std::optional output_size, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; diff --git a/cpp/src/join/hash_join/left_join_size.cu b/cpp/src/join/hash_join/left_join_size.cu index 24e9c64dc5d..4bc382165d3 100644 --- a/cpp/src/join/hash_join/left_join_size.cu +++ b/cpp/src/join/hash_join/left_join_size.cu @@ -8,13 +8,13 @@ namespace cudf::detail { template -std::size_t hash_join::left_join_size(cudf::table_view const& probe, +std::size_t hash_join::left_join_size(cudf::table_view const& left, rmm::cuda_stream_view stream) const { - return this->template join_size(probe, stream); + return this->template join_size(left, stream); } template std::size_t hash_join::left_join_size( - cudf::table_view const& probe, rmm::cuda_stream_view stream) const; + cudf::table_view const& left, rmm::cuda_stream_view stream) const; } // namespace cudf::detail diff --git a/cpp/src/join/hash_join/match_context.cu b/cpp/src/join/hash_join/match_context.cu index f990b3b5c42..b6653be623c 100644 --- a/cpp/src/join/hash_join/match_context.cu +++ b/cpp/src/join/hash_join/match_context.cu @@ -25,19 +25,18 @@ struct clamp_zero_to_one { } // namespace std::unique_ptr> make_join_match_counts( - table_view const& build, - std::shared_ptr const& preprocessed_build, + table_view const& right, + std::shared_ptr const& preprocessed_right, cudf::detail::hash_table_t const& hash_table, bool is_empty, bool has_nulls, null_equality compare_nulls, join_kind join, - table_view const& probe, + table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - auto match_counts = - std::make_unique>(probe.num_rows(), stream, mr); + auto match_counts = std::make_unique>(left.num_rows(), stream, mr); if (is_empty) { thrust::fill(rmm::exec_policy_nosync(stream, cudf::get_current_device_resource_ref()), @@ -47,19 +46,19 @@ std::unique_ptr> make_join_match_counts( return match_counts; } - CUDF_EXPECTS(has_nulls || !cudf::has_nested_nulls(probe), - "Probe table has nulls while build table was not hashed with null check.", + CUDF_EXPECTS(has_nulls || !cudf::has_nested_nulls(left), + "Left table has nulls while right table was not hashed with null check.", std::invalid_argument); - auto const preprocessed_probe = - cudf::detail::row::equality::preprocessed_table::create(probe, stream); - auto const probe_table_num_rows = probe.num_rows(); + auto const preprocessed_left = + cudf::detail::row::equality::preprocessed_table::create(left, stream); + auto const left_table_num_rows = left.num_rows(); auto count_matches = [&](auto equality, auto d_hasher) { auto const iter = cudf::detail::make_counting_transform_iterator(0, pair_fn{d_hasher}); if (join == join_kind::INNER_JOIN) { hash_table.count_each(iter, - iter + probe_table_num_rows, + iter + left_table_num_rows, equality, hash_table.hash_function(), match_counts->begin(), @@ -69,7 +68,7 @@ std::unique_ptr> make_join_match_counts( auto const output = thrust::make_transform_output_iterator(match_counts->begin(), clamp_zero_to_one{}); hash_table.count_each(iter, - iter + probe_table_num_rows, + iter + left_table_num_rows, equality, hash_table.hash_function(), output, @@ -78,7 +77,7 @@ std::unique_ptr> make_join_match_counts( }; dispatch_join_comparator( - build, probe, preprocessed_build, preprocessed_probe, has_nulls, compare_nulls, count_matches); + right, left, preprocessed_right, preprocessed_left, has_nulls, compare_nulls, count_matches); return match_counts; } @@ -86,18 +85,18 @@ std::unique_ptr> make_join_match_counts( template std::unique_ptr> hash_join::make_match_counts( join_kind join, - cudf::table_view const& probe, + cudf::table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { - return make_join_match_counts(_build, - _preprocessed_build, + return make_join_match_counts(_right, + _preprocessed_right, _impl->_hash_table, _is_empty, _has_nulls, _nulls_equal, join, - probe, + left, stream, mr); } diff --git a/cpp/src/join/hash_join/retrieve_impl.cuh b/cpp/src/join/hash_join/retrieve_impl.cuh index d365710a73c..58b29562a2c 100644 --- a/cpp/src/join/hash_join/retrieve_impl.cuh +++ b/cpp/src/join/hash_join/retrieve_impl.cuh @@ -23,10 +23,10 @@ template std::pair>, std::unique_ptr>> probe_join_hash_table( - cudf::table_view const& build_table, - cudf::table_view const& probe_table, - std::shared_ptr const& preprocessed_build, - std::shared_ptr const& preprocessed_probe, + cudf::table_view const& right_table, + cudf::table_view const& left_table, + std::shared_ptr const& preprocessed_right, + std::shared_ptr const& preprocessed_left, cudf::detail::hash_table_t const& hash_table, bool has_nulls, null_equality compare_nulls, @@ -41,10 +41,10 @@ probe_join_hash_table( std::size_t const join_size = output_size ? *output_size - : compute_join_output_size(build_table, - probe_table, - preprocessed_build, - preprocessed_probe, + : compute_join_output_size(right_table, + left_table, + preprocessed_right, + preprocessed_left, hash_table, has_nulls, compare_nulls, @@ -60,7 +60,7 @@ probe_join_hash_table( cudf::prefetch::detail::prefetch(*left_indices, stream); cudf::prefetch::detail::prefetch(*right_indices, stream); - auto const probe_table_num_rows = probe_table.num_rows(); + auto const left_table_num_rows = left_table.num_rows(); auto const out_probe_begin = thrust::make_transform_output_iterator(left_indices->begin(), output_fn{}); auto const out_build_begin = @@ -70,7 +70,7 @@ probe_join_hash_table( auto const iter = cudf::detail::make_counting_transform_iterator(0, pair_fn{d_hasher}); if constexpr (Join == join_kind::INNER_JOIN) { hash_table.retrieve(iter, - iter + probe_table_num_rows, + iter + left_table_num_rows, equality, hash_table.hash_function(), out_probe_begin, @@ -79,7 +79,7 @@ probe_join_hash_table( } else { [[maybe_unused]] auto out_probe_end = hash_table .retrieve_outer(iter, - iter + probe_table_num_rows, + iter + left_table_num_rows, equality, hash_table.hash_function(), out_probe_begin, @@ -95,10 +95,10 @@ probe_join_hash_table( } }; - dispatch_join_comparator(build_table, - probe_table, - preprocessed_build, - preprocessed_probe, + dispatch_join_comparator(right_table, + left_table, + preprocessed_right, + preprocessed_left, has_nulls, compare_nulls, retrieve_results); @@ -108,22 +108,22 @@ probe_join_hash_table( template void retrieve_left_join_build_indices( - cudf::table_view const& build_table, - cudf::table_view const& probe_table, - std::shared_ptr const& preprocessed_build, - std::shared_ptr const& preprocessed_probe, + cudf::table_view const& right_table, + cudf::table_view const& left_table, + std::shared_ptr const& preprocessed_right, + std::shared_ptr const& preprocessed_left, cudf::detail::hash_table_t const& hash_table, bool has_nulls, null_equality compare_nulls, RightOutputIterator out_build_begin, rmm::cuda_stream_view stream) { - auto const probe_table_num_rows = probe_table.num_rows(); + auto const left_table_num_rows = left_table.num_rows(); auto retrieve_results = [&](auto equality, auto d_hasher) { auto const iter = cudf::detail::make_counting_transform_iterator(0, pair_fn{d_hasher}); hash_table.retrieve_outer(iter, - iter + probe_table_num_rows, + iter + left_table_num_rows, equality, hash_table.hash_function(), cuda::make_discard_iterator(), @@ -131,10 +131,10 @@ void retrieve_left_join_build_indices( stream.value()); }; - dispatch_join_comparator(build_table, - probe_table, - preprocessed_build, - preprocessed_probe, + dispatch_join_comparator(right_table, + left_table, + preprocessed_right, + preprocessed_left, has_nulls, compare_nulls, retrieve_results); @@ -144,36 +144,36 @@ template template std::pair>, std::unique_ptr>> -hash_join::join_retrieve(cudf::table_view const& probe, +hash_join::join_retrieve(cudf::table_view const& left, std::optional output_size, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { CUDF_FUNC_RANGE(); - validate_hash_join_probe(_build, probe, _has_nulls); + validate_hash_join_probe(_right, left, _has_nulls); if constexpr (Join == join_kind::INNER_JOIN) { - if (is_trivial_join(probe, _build, Join)) { + if (is_trivial_join(left, _right, Join)) { return std::pair(std::make_unique>(0, stream, mr), std::make_unique>(0, stream, mr)); } } else { - if (_is_empty) { return get_trivial_left_join_indices(probe, stream, mr); } + if (_is_empty) { return get_trivial_left_join_indices(left, stream, mr); } - if (is_trivial_join(probe, _build, Join)) { + if (is_trivial_join(left, _right, Join)) { return std::pair(std::make_unique>(0, stream, mr), std::make_unique>(0, stream, mr)); } } - auto const preprocessed_probe = - cudf::detail::row::equality::preprocessed_table::create(probe, stream); + auto const preprocessed_left = + cudf::detail::row::equality::preprocessed_table::create(left, stream); - auto join_indices = cudf::detail::probe_join_hash_table(_build, - probe, - _preprocessed_build, - preprocessed_probe, + auto join_indices = cudf::detail::probe_join_hash_table(_right, + left, + _preprocessed_right, + preprocessed_left, _impl->_hash_table, _has_nulls, _nulls_equal, @@ -183,7 +183,7 @@ hash_join::join_retrieve(cudf::table_view const& probe, if constexpr (Join == join_kind::FULL_JOIN) { auto complement_indices = detail::get_left_join_indices_complement( - join_indices.second, probe.num_rows(), _build.num_rows(), stream, mr); + join_indices.second, left.num_rows(), _right.num_rows(), stream, mr); return detail::concatenate_vector_pairs(join_indices, complement_indices, stream); } else { return join_indices; diff --git a/cpp/src/join/hash_join/size_impl.cuh b/cpp/src/join/hash_join/size_impl.cuh index 6fb2da63d4f..3e20ebc7367 100644 --- a/cpp/src/join/hash_join/size_impl.cuh +++ b/cpp/src/join/hash_join/size_impl.cuh @@ -13,10 +13,10 @@ namespace cudf::detail { std::size_t get_full_join_size( - cudf::table_view const& build_table, - cudf::table_view const& probe_table, - std::shared_ptr const& preprocessed_build, - std::shared_ptr const& preprocessed_probe, + cudf::table_view const& right_table, + cudf::table_view const& left_table, + std::shared_ptr const& preprocessed_right, + std::shared_ptr const& preprocessed_left, cudf::detail::hash_table_t const& hash_table, bool has_nulls, null_equality compare_nulls, @@ -25,10 +25,10 @@ std::size_t get_full_join_size( template std::size_t compute_join_output_size( - table_view const& build_table, - table_view const& probe_table, - std::shared_ptr const& preprocessed_build, - std::shared_ptr const& preprocessed_probe, + table_view const& right_table, + table_view const& left_table, + std::shared_ptr const& preprocessed_right, + std::shared_ptr const& preprocessed_left, cudf::detail::hash_table_t const& hash_table, bool has_nulls, cudf::null_equality nulls_equal, @@ -36,34 +36,34 @@ std::size_t compute_join_output_size( { static_assert(Join == join_kind::INNER_JOIN || Join == join_kind::LEFT_JOIN); - if (build_table.num_rows() == 0) { - return Join == join_kind::INNER_JOIN ? 0 : probe_table.num_rows(); + if (right_table.num_rows() == 0) { + return Join == join_kind::INNER_JOIN ? 0 : left_table.num_rows(); } - auto const probe_table_num_rows = probe_table.num_rows(); + auto const left_table_num_rows = left_table.num_rows(); return dispatch_join_comparator( - build_table, - probe_table, - preprocessed_build, - preprocessed_probe, + right_table, + left_table, + preprocessed_right, + preprocessed_left, has_nulls, nulls_equal, [&](auto equality, auto d_hasher) { auto const iter = cudf::detail::make_counting_transform_iterator(0, pair_fn{d_hasher}); if constexpr (Join == join_kind::LEFT_JOIN) { return hash_table.count_outer( - iter, iter + probe_table_num_rows, equality, hash_table.hash_function(), stream.value()); + iter, iter + left_table_num_rows, equality, hash_table.hash_function(), stream.value()); } else { return hash_table.count( - iter, iter + probe_table_num_rows, equality, hash_table.hash_function(), stream.value()); + iter, iter + left_table_num_rows, equality, hash_table.hash_function(), stream.value()); } }); } template template -std::size_t hash_join::join_size(cudf::table_view const& probe, +std::size_t hash_join::join_size(cudf::table_view const& left, rmm::cuda_stream_view stream) const { static_assert(Join == join_kind::INNER_JOIN || Join == join_kind::LEFT_JOIN); @@ -73,20 +73,20 @@ std::size_t hash_join::join_size(cudf::table_view const& probe, if constexpr (Join == join_kind::INNER_JOIN) { if (_is_empty) { return 0; } } else { - if (_is_empty) { return probe.num_rows(); } + if (_is_empty) { return left.num_rows(); } } - CUDF_EXPECTS(_has_nulls || !cudf::has_nested_nulls(probe), - "Probe table has nulls while build table was not hashed with null check.", + CUDF_EXPECTS(_has_nulls || !cudf::has_nested_nulls(left), + "Left table has nulls while right table was not hashed with null check.", std::invalid_argument); - auto const preprocessed_probe = - cudf::detail::row::equality::preprocessed_table::create(probe, stream); + auto const preprocessed_left = + cudf::detail::row::equality::preprocessed_table::create(left, stream); - return cudf::detail::compute_join_output_size(_build, - probe, - _preprocessed_build, - preprocessed_probe, + return cudf::detail::compute_join_output_size(_right, + left, + _preprocessed_right, + preprocessed_left, _impl->_hash_table, _has_nulls, _nulls_equal, @@ -95,7 +95,7 @@ std::size_t hash_join::join_size(cudf::table_view const& probe, template template -std::size_t hash_join::join_size(cudf::table_view const& probe, +std::size_t hash_join::join_size(cudf::table_view const& left, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const { @@ -103,19 +103,19 @@ std::size_t hash_join::join_size(cudf::table_view const& probe, CUDF_FUNC_RANGE(); - if (_is_empty) { return probe.num_rows(); } + if (_is_empty) { return left.num_rows(); } - CUDF_EXPECTS(_has_nulls || !cudf::has_nested_nulls(probe), - "Probe table has nulls while build table was not hashed with null check.", + CUDF_EXPECTS(_has_nulls || !cudf::has_nested_nulls(left), + "Left table has nulls while right table was not hashed with null check.", std::invalid_argument); - auto const preprocessed_probe = - cudf::detail::row::equality::preprocessed_table::create(probe, stream); + auto const preprocessed_left = + cudf::detail::row::equality::preprocessed_table::create(left, stream); - return cudf::detail::get_full_join_size(_build, - probe, - _preprocessed_build, - preprocessed_probe, + return cudf::detail::get_full_join_size(_right, + left, + _preprocessed_right, + preprocessed_left, _impl->_hash_table, _has_nulls, _nulls_equal, diff --git a/cpp/tests/join/distinct_join_tests.cpp b/cpp/tests/join/distinct_join_tests.cpp index e3864f06e97..744caddeb49 100644 --- a/cpp/tests/join/distinct_join_tests.cpp +++ b/cpp/tests/join/distinct_join_tests.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -39,8 +39,8 @@ std::unique_ptr> get_left_indices(cudf::siz struct DistinctJoinTest : public cudf::test::BaseFixture { void compare_to_reference( - cudf::table_view const& build_table, - cudf::table_view const& probe_table, + cudf::table_view const& right_table, + cudf::table_view const& left_table, std::pair>, std::unique_ptr>> const& result, cudf::table_view const& expected_table, @@ -54,8 +54,8 @@ struct DistinctJoinTest : public cudf::test::BaseFixture { auto build_indices_col = cudf::column_view{build_indices_span}; auto probe_indices_col = cudf::column_view{probe_indices_span}; - auto joined_cols = cudf::gather(probe_table, probe_indices_col, oob_policy)->release(); - auto right_cols = cudf::gather(build_table, build_indices_col, oob_policy)->release(); + auto joined_cols = cudf::gather(left_table, probe_indices_col, oob_policy)->release(); + auto right_cols = cudf::gather(right_table, build_indices_col, oob_policy)->release(); joined_cols.insert(joined_cols.end(), std::make_move_iterator(right_cols.begin()), @@ -76,19 +76,19 @@ TEST_F(DistinctJoinTest, IntegerInnerJoin) auto const init = cudf::numeric_scalar{0}; - auto build = cudf::sequence(size, init, cudf::numeric_scalar{1}); - auto probe = cudf::sequence(size, init, cudf::numeric_scalar{2}); + auto right = cudf::sequence(size, init, cudf::numeric_scalar{1}); + auto left = cudf::sequence(size, init, cudf::numeric_scalar{2}); - auto build_table = cudf::table_view{{build->view()}}; - auto probe_table = cudf::table_view{{probe->view()}}; + auto right_table = cudf::table_view{{right->view()}}; + auto left_table = cudf::table_view{{left->view()}}; - auto distinct_join = cudf::distinct_hash_join{build_table}; + auto distinct_join = cudf::distinct_hash_join{right_table}; - auto result = distinct_join.inner_join(probe_table); + auto result = distinct_join.inner_join(left_table); auto constexpr gold_size = size / 2; auto gold = cudf::sequence(gold_size, init, cudf::numeric_scalar{2}); - this->compare_to_reference(build_table, probe_table, result, cudf::table_view{{gold->view()}}); + this->compare_to_reference(right_table, left_table, result, cudf::table_view{{gold->view()}}); } TEST_F(DistinctJoinTest, InnerJoinNoNulls) @@ -109,11 +109,11 @@ TEST_F(DistinctJoinTest, InnerJoinNoNulls) cols1.push_back(col1_1.release()); cols1.push_back(col1_2.release()); - Table build(std::move(cols0)); - Table probe(std::move(cols1)); + Table right(std::move(cols0)); + Table left(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view()}; - auto result = distinct_join.inner_join(probe.view()); + auto distinct_join = cudf::distinct_hash_join{right.view()}; + auto result = distinct_join.inner_join(left.view()); column_wrapper col_gold_0{{1, 2}}; strcol_wrapper col_gold_1({"s0", "s0"}); @@ -130,7 +130,7 @@ TEST_F(DistinctJoinTest, InnerJoinNoNulls) cols_gold.push_back(col_gold_5.release()); Table gold(std::move(cols_gold)); - this->compare_to_reference(build.view(), probe.view(), result, gold.view()); + this->compare_to_reference(right.view(), left.view(), result, gold.view()); } TEST_F(DistinctJoinTest, PrimitiveInnerJoinNoNulls) @@ -151,11 +151,11 @@ TEST_F(DistinctJoinTest, PrimitiveInnerJoinNoNulls) cols1.push_back(col1_1.release()); cols1.push_back(col1_2.release()); - Table build(std::move(cols0)); - Table probe(std::move(cols1)); + Table right(std::move(cols0)); + Table left(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view()}; - auto result = distinct_join.inner_join(probe.view()); + auto distinct_join = cudf::distinct_hash_join{right.view()}; + auto result = distinct_join.inner_join(left.view()); column_wrapper col_gold_0{{1, 2}}; column_wrapper col_gold_1({0, 0}); @@ -172,7 +172,7 @@ TEST_F(DistinctJoinTest, PrimitiveInnerJoinNoNulls) cols_gold.push_back(col_gold_5.release()); Table gold(std::move(cols_gold)); - this->compare_to_reference(build.view(), probe.view(), result, gold.view()); + this->compare_to_reference(right.view(), left.view(), result, gold.view()); } TEST_F(DistinctJoinTest, InnerJoinWithNulls) @@ -193,8 +193,8 @@ TEST_F(DistinctJoinTest, InnerJoinWithNulls) cols1.push_back(col1_1.release()); cols1.push_back(col1_2.release()); - Table build(std::move(cols0)); - Table probe(std::move(cols1)); + Table right(std::move(cols0)); + Table left(std::move(cols1)); // Create gold table once column_wrapper col_gold_0{{3, 2}}; @@ -217,10 +217,10 @@ TEST_F(DistinctJoinTest, InnerJoinWithNulls) for (auto load_factor : load_factors) { auto distinct_join = - cudf::distinct_hash_join{build.view(), cudf::null_equality::EQUAL, load_factor}; - auto result = distinct_join.inner_join(probe.view()); + cudf::distinct_hash_join{right.view(), cudf::null_equality::EQUAL, load_factor}; + auto result = distinct_join.inner_join(left.view()); - this->compare_to_reference(build.view(), probe.view(), result, gold.view()); + this->compare_to_reference(right.view(), left.view(), result, gold.view()); } } @@ -242,8 +242,8 @@ TEST_F(DistinctJoinTest, PrimitiveInnerJoinWithNulls) cols1.push_back(col1_1.release()); cols1.push_back(col1_2.release()); - Table build(std::move(cols0)); - Table probe(std::move(cols1)); + Table right(std::move(cols0)); + Table left(std::move(cols1)); // Create gold table once column_wrapper col_gold_0{{3, 2}}; @@ -266,10 +266,10 @@ TEST_F(DistinctJoinTest, PrimitiveInnerJoinWithNulls) for (auto load_factor : load_factors) { auto distinct_join = - cudf::distinct_hash_join{build.view(), cudf::null_equality::EQUAL, load_factor}; - auto result = distinct_join.inner_join(probe.view()); + cudf::distinct_hash_join{right.view(), cudf::null_equality::EQUAL, load_factor}; + auto result = distinct_join.inner_join(left.view()); - this->compare_to_reference(build.view(), probe.view(), result, gold.view()); + this->compare_to_reference(right.view(), left.view(), result, gold.view()); } } @@ -316,11 +316,11 @@ TEST_F(DistinctJoinTest, InnerJoinWithStructsAndNulls) cols1.push_back(col1_2.release()); cols1.push_back(col1_3.release()); - Table probe(std::move(cols0)); - Table build(std::move(cols1)); + Table left(std::move(cols0)); + Table right(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view()}; - auto result = distinct_join.inner_join(probe.view()); + auto distinct_join = cudf::distinct_hash_join{right.view()}; + auto result = distinct_join.inner_join(left.view()); column_wrapper col_gold_0{{3, 2}}; strcol_wrapper col_gold_1({"s1", "s0"}, {true, true}); @@ -354,10 +354,10 @@ TEST_F(DistinctJoinTest, InnerJoinWithStructsAndNulls) cols_gold.push_back(col_gold_7.release()); Table gold(std::move(cols_gold)); - this->compare_to_reference(build.view(), probe.view(), result, gold.view()); + this->compare_to_reference(right.view(), left.view(), result, gold.view()); } -TEST_F(DistinctJoinTest, EmptyBuildTableInnerJoin) +TEST_F(DistinctJoinTest, EmptyRightTableInnerJoin) { column_wrapper col0_0; column_wrapper col0_1; @@ -371,16 +371,16 @@ TEST_F(DistinctJoinTest, EmptyBuildTableInnerJoin) cols1.push_back(col1_0.release()); cols1.push_back(col1_1.release()); - Table build(std::move(cols0)); - Table probe(std::move(cols1)); + Table right(std::move(cols0)); + Table left(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view()}; - auto result = distinct_join.inner_join(probe.view()); + auto distinct_join = cudf::distinct_hash_join{right.view()}; + auto result = distinct_join.inner_join(left.view()); - this->compare_to_reference(build.view(), probe.view(), result, build.view()); + this->compare_to_reference(right.view(), left.view(), result, right.view()); } -TEST_F(DistinctJoinTest, EmptyBuildTableLeftJoin) +TEST_F(DistinctJoinTest, EmptyRightTableLeftJoin) { column_wrapper col0_0; column_wrapper col0_1; @@ -394,18 +394,18 @@ TEST_F(DistinctJoinTest, EmptyBuildTableLeftJoin) cols1.push_back(col1_0.release()); cols1.push_back(col1_1.release()); - Table build(std::move(cols0)); - Table probe(std::move(cols1)); + Table right(std::move(cols0)); + Table left(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view()}; - auto result = distinct_join.left_join(probe.view()); + auto distinct_join = cudf::distinct_hash_join{right.view()}; + auto result = distinct_join.left_join(left.view()); auto gather_map = std::pair{get_left_indices(result->size()), std::move(result)}; this->compare_to_reference( - build.view(), probe.view(), gather_map, probe.view(), cudf::out_of_bounds_policy::NULLIFY); + right.view(), left.view(), gather_map, left.view(), cudf::out_of_bounds_policy::NULLIFY); } -TEST_F(DistinctJoinTest, EmptyProbeTableInnerJoin) +TEST_F(DistinctJoinTest, EmptyLeftTableInnerJoin) { column_wrapper col0_0{{2, 2, 0, 4, 3}}; column_wrapper col0_1{{1, 0, 1, 2, 1}, {true, false, true, true, true}}; @@ -419,16 +419,16 @@ TEST_F(DistinctJoinTest, EmptyProbeTableInnerJoin) cols1.push_back(col1_0.release()); cols1.push_back(col1_1.release()); - Table build(std::move(cols0)); - Table probe(std::move(cols1)); + Table right(std::move(cols0)); + Table left(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view()}; - auto result = distinct_join.inner_join(probe.view()); + auto distinct_join = cudf::distinct_hash_join{right.view()}; + auto result = distinct_join.inner_join(left.view()); - this->compare_to_reference(build.view(), probe.view(), result, probe.view()); + this->compare_to_reference(right.view(), left.view(), result, left.view()); } -TEST_F(DistinctJoinTest, EmptyProbeTableLeftJoin) +TEST_F(DistinctJoinTest, EmptyLeftTableLeftJoin) { column_wrapper col0_0{{2, 2, 0, 4, 3}}; column_wrapper col0_1{{1, 0, 1, 2, 1}, {true, false, true, true, true}}; @@ -442,15 +442,15 @@ TEST_F(DistinctJoinTest, EmptyProbeTableLeftJoin) cols1.push_back(col1_0.release()); cols1.push_back(col1_1.release()); - Table build(std::move(cols0)); - Table probe(std::move(cols1)); + Table right(std::move(cols0)); + Table left(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view()}; - auto result = distinct_join.left_join(probe.view()); + auto distinct_join = cudf::distinct_hash_join{right.view()}; + auto result = distinct_join.left_join(left.view()); auto gather_map = std::pair{get_left_indices(result->size()), std::move(result)}; this->compare_to_reference( - build.view(), probe.view(), gather_map, probe.view(), cudf::out_of_bounds_policy::NULLIFY); + right.view(), left.view(), gather_map, left.view(), cudf::out_of_bounds_policy::NULLIFY); } TEST_F(DistinctJoinTest, LeftJoinNoNulls) @@ -467,8 +467,8 @@ TEST_F(DistinctJoinTest, LeftJoinNoNulls) cols1.push_back(col1_0.release()); cols1.push_back(col1_1.release()); - Table probe(std::move(cols0)); - Table build(std::move(cols1)); + Table left(std::move(cols0)); + Table right(std::move(cols1)); column_wrapper col_gold_0({3, 1, 2, 0, 3}); strcol_wrapper col_gold_1({"s0", "s1", "s2", "s4", "s1"}); @@ -481,12 +481,12 @@ TEST_F(DistinctJoinTest, LeftJoinNoNulls) cols_gold.push_back(col_gold_3.release()); Table gold(std::move(cols_gold)); - auto distinct_join = cudf::distinct_hash_join{build.view()}; - auto result = distinct_join.left_join(probe.view()); + auto distinct_join = cudf::distinct_hash_join{right.view()}; + auto result = distinct_join.left_join(left.view()); auto gather_map = std::pair{get_left_indices(result->size()), std::move(result)}; this->compare_to_reference( - build.view(), probe.view(), gather_map, gold.view(), cudf::out_of_bounds_policy::NULLIFY); + right.view(), left.view(), gather_map, gold.view(), cudf::out_of_bounds_policy::NULLIFY); } TEST_F(DistinctJoinTest, PrimitiveLeftJoinNoNulls) @@ -503,8 +503,8 @@ TEST_F(DistinctJoinTest, PrimitiveLeftJoinNoNulls) cols1.push_back(col1_0.release()); cols1.push_back(col1_1.release()); - Table probe(std::move(cols0)); - Table build(std::move(cols1)); + Table left(std::move(cols0)); + Table right(std::move(cols1)); column_wrapper col_gold_0({3, 1, 2, 0, 3}); column_wrapper col_gold_1({0, 1, 2, 4, 1}); @@ -517,12 +517,12 @@ TEST_F(DistinctJoinTest, PrimitiveLeftJoinNoNulls) cols_gold.push_back(col_gold_3.release()); Table gold(std::move(cols_gold)); - auto distinct_join = cudf::distinct_hash_join{build.view()}; - auto result = distinct_join.left_join(probe.view()); + auto distinct_join = cudf::distinct_hash_join{right.view()}; + auto result = distinct_join.left_join(left.view()); auto gather_map = std::pair{get_left_indices(result->size()), std::move(result)}; this->compare_to_reference( - build.view(), probe.view(), gather_map, gold.view(), cudf::out_of_bounds_policy::NULLIFY); + right.view(), left.view(), gather_map, gold.view(), cudf::out_of_bounds_policy::NULLIFY); } TEST_F(DistinctJoinTest, LeftJoinWithNulls) @@ -539,11 +539,11 @@ TEST_F(DistinctJoinTest, LeftJoinWithNulls) cols1.push_back(col1_0.release()); cols1.push_back(col1_1.release()); - Table probe(std::move(cols0)); - Table build(std::move(cols1)); + Table left(std::move(cols0)); + Table right(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view()}; - auto result = distinct_join.left_join(probe.view()); + auto distinct_join = cudf::distinct_hash_join{right.view()}; + auto result = distinct_join.left_join(left.view()); auto gather_map = std::pair{get_left_indices(result->size()), std::move(result)}; column_wrapper col_gold_0{{3, 1, 2, 0, 2}, {true, true, true, true, true}}; @@ -559,7 +559,7 @@ TEST_F(DistinctJoinTest, LeftJoinWithNulls) Table gold(std::move(cols_gold)); this->compare_to_reference( - build.view(), probe.view(), gather_map, gold.view(), cudf::out_of_bounds_policy::NULLIFY); + right.view(), left.view(), gather_map, gold.view(), cudf::out_of_bounds_policy::NULLIFY); } TEST_F(DistinctJoinTest, PrimitiveLeftJoinWithNulls) @@ -576,11 +576,11 @@ TEST_F(DistinctJoinTest, PrimitiveLeftJoinWithNulls) cols1.push_back(col1_0.release()); cols1.push_back(col1_1.release()); - Table probe(std::move(cols0)); - Table build(std::move(cols1)); + Table left(std::move(cols0)); + Table right(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view()}; - auto result = distinct_join.left_join(probe.view()); + auto distinct_join = cudf::distinct_hash_join{right.view()}; + auto result = distinct_join.left_join(left.view()); auto gather_map = std::pair{get_left_indices(result->size()), std::move(result)}; column_wrapper col_gold_0{{3, 1, 2, 0, 2}, {true, true, true, true, true}}; @@ -596,7 +596,7 @@ TEST_F(DistinctJoinTest, PrimitiveLeftJoinWithNulls) Table gold(std::move(cols_gold)); this->compare_to_reference( - build.view(), probe.view(), gather_map, gold.view(), cudf::out_of_bounds_policy::NULLIFY); + right.view(), left.view(), gather_map, gold.view(), cudf::out_of_bounds_policy::NULLIFY); } TEST_F(DistinctJoinTest, LeftJoinWithStructsAndNulls) @@ -621,11 +621,11 @@ TEST_F(DistinctJoinTest, LeftJoinWithStructsAndNulls) cols0.push_back(col0.release()); cols1.push_back(col1.release()); - Table probe(std::move(cols0)); - Table build(std::move(cols1)); + Table left(std::move(cols0)); + Table right(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view()}; - auto result = distinct_join.left_join(probe.view()); + auto distinct_join = cudf::distinct_hash_join{right.view()}; + auto result = distinct_join.left_join(left.view()); auto gather_map = std::pair{get_left_indices(result->size()), std::move(result)}; auto col0_gold_names_col = strcol_wrapper{ @@ -658,7 +658,7 @@ TEST_F(DistinctJoinTest, LeftJoinWithStructsAndNulls) Table gold(std::move(cols_gold)); this->compare_to_reference( - build.view(), probe.view(), gather_map, gold.view(), cudf::out_of_bounds_policy::NULLIFY); + right.view(), left.view(), gather_map, gold.view(), cudf::out_of_bounds_policy::NULLIFY); } TEST_F(DistinctJoinTest, InvalidLoadFactor) @@ -699,14 +699,14 @@ TEST_F(DistinctJoinTest, DistinctLargeExtentOverflowPrevention) auto const init = cudf::numeric_scalar{0}; auto build_col = cudf::sequence(table_size, init, cudf::numeric_scalar{1}); - auto build_table = cudf::table_view{{build_col->view()}}; + auto right_table = cudf::table_view{{build_col->view()}}; cudf::table empty_probe_table{}; // This should succeed with size_t extent - would have failed with int32_t extent // in scenarios approaching the overflow boundary EXPECT_NO_THROW({ auto distinct_join = cudf::distinct_hash_join( - build_table, cudf::null_equality::EQUAL, load_factor, cudf::get_default_stream()); + right_table, cudf::null_equality::EQUAL, load_factor, cudf::get_default_stream()); auto result = distinct_join.inner_join(empty_probe_table); }); } diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp index 0afa6de2062..d08c7fcf8a6 100644 --- a/cpp/tests/join/join_tests.cpp +++ b/cpp/tests/join/join_tests.cpp @@ -2401,9 +2401,9 @@ TEST_F(JoinTest, HashJoinFullMatchContext) } } -TEST_F(JoinTest, HashJoinMatchContextEmptyBuild) +TEST_F(JoinTest, HashJoinMatchContextEmptyRight) { - // Test match context with empty build table + // Test match context with empty right table column_wrapper col0_0{{3, 1, 2}}; column_wrapper col1_0{}; // Empty @@ -2855,7 +2855,7 @@ struct JoinTestLists : public cudf::test::BaseFixture { [], 3 [5, 6] 4 */ - lcw build{{{0}, {1}, {{2, 0}, null_at(1)}, {}, {5, 6}}, null_at(0)}; + lcw right{{{0}, {1}, {{2, 0}, null_at(1)}, {}, {5, 6}}, null_at(0)}; /* [ @@ -2868,7 +2868,7 @@ struct JoinTestLists : public cudf::test::BaseFixture { [6] 6 ] */ - lcw probe{{{1}, {3}, {0}, {}, {{2, 0}, null_at(1)}, {5}, {6}}, null_at(2)}; + lcw left{{{1}, {3}, {0}, {}, {{2, 0}, null_at(1)}, {5}, {6}}, null_at(2)}; auto column_view_from_device_uvector(rmm::device_uvector const& vector) { @@ -2893,23 +2893,23 @@ struct JoinTestLists : public cudf::test::BaseFixture { JoinFunc join_func, cudf::out_of_bounds_policy oob_policy) { - auto const build_tv = cudf::table_view{{build}}; - auto const probe_tv = cudf::table_view{{probe}}; + auto const right_tv = cudf::table_view{{right}}; + auto const left_tv = cudf::table_view{{left}}; auto const [left_result_map, right_result_map] = - join_func(build_tv, - probe_tv, + join_func(right_tv, + left_tv, nulls_equal, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const left_result_table = - sort_and_gather(build_tv, column_view_from_device_uvector(*left_result_map), oob_policy); + sort_and_gather(right_tv, column_view_from_device_uvector(*left_result_map), oob_policy); auto const right_result_table = - sort_and_gather(probe_tv, column_view_from_device_uvector(*right_result_map), oob_policy); + sort_and_gather(left_tv, column_view_from_device_uvector(*right_result_map), oob_policy); - auto const left_gold_table = sort_and_gather(build_tv, left_gold_map, oob_policy); - auto const right_gold_table = sort_and_gather(probe_tv, right_gold_map, oob_policy); + auto const left_gold_table = sort_and_gather(right_tv, left_gold_map, oob_policy); + auto const right_gold_table = sort_and_gather(left_tv, right_gold_map, oob_policy); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*left_result_table, *left_gold_table); CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*right_result_table, *right_gold_table); From d9195b609800ef03fbc28ee97e42d938d0ad462e Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 6 May 2026 21:00:51 +0000 Subject: [PATCH 22/36] remove pylibcudf calls --- python/cudf/cudf/core/groupby/groupby.py | 27 +++++++----------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 8538953ea7e..816b466653c 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -3018,29 +3018,18 @@ def _bool_reduce(self, op: str, *, skipna: bool, min_count: int): # nulls are preserved through the aggregation (min/max skip # nulls). For ``skipna=False``, nulls are replaced with True so # they don't flip ``all`` to False and always make ``any`` True. - def _to_bool_col(col): - from cudf.core.column import ColumnBase + bool_dtype = np.dtype(np.bool_) + def _to_bool_col(col): if is_dtype_obj_string(col.dtype): - counts_plc = plc.strings.attributes.count_characters( - col.plc_column - ) - gt_plc = plc.binaryop.binary_operation( - counts_plc, - plc.Scalar.from_py(0), - plc.binaryop.BinaryOperator.GREATER, - plc.DataType(plc.TypeId.BOOL8), - ) - bool_col = ColumnBase.create(gt_plc, np.dtype(np.bool_)) + bool_col = col.count_characters() > np.int8(0) else: # For numeric/bool inputs, cast to bool preserving nulls. - ne_plc = plc.binaryop.binary_operation( - col.plc_column, - plc.Scalar.from_py(0), - plc.binaryop.BinaryOperator.NOT_EQUAL, - plc.DataType(plc.TypeId.BOOL8), - ) - bool_col = ColumnBase.create(ne_plc, np.dtype(np.bool_)) + bool_col = col != 0 + # Normalize away pandas-extension bool dtypes so the downstream + # aggregation always sees ``np.bool_``. + if bool_col.dtype != bool_dtype: + bool_col = ColumnBase.create(bool_col.plc_column, bool_dtype) if not skipna: bool_col = bool_col.fillna(True) return bool_col From e9dd32bbc4cad7f9ac678d494c921c9a786cda01 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Thu, 7 May 2026 21:48:58 -0500 Subject: [PATCH 23/36] Update python/cudf/cudf/core/groupby/groupby.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- python/cudf/cudf/core/groupby/groupby.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 63b231152ce..153b459f0ba 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -3028,8 +3028,7 @@ def _to_bool_col(col): bool_col = col != 0 # Normalize away pandas-extension bool dtypes so the downstream # aggregation always sees ``np.bool_``. - if bool_col.dtype != bool_dtype: - bool_col = ColumnBase.create(bool_col.plc_column, bool_dtype) + bool_col = bool_col.astype(bool_dtype, copy=False) if not skipna: bool_col = bool_col.fillna(True) return bool_col From e03db070fb4ced664e28a4eac5a0fedccbfb43c0 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Wed, 6 May 2026 13:22:57 -0700 Subject: [PATCH 24/36] Correctly handle blocks with "block byte size" fields in the Avro reader (#22387) When the number of elements in the Avro block is stored as a negative number, the block also includes its size in bytes. This PR allows the reader to correctly parse such files. Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Lawrence Mitchell (https://github.com/wence-) - Muhammad Haseeb (https://github.com/mhaseeb123) URL: https://github.com/rapidsai/cudf/pull/22387 --- cpp/src/io/avro/avro.cpp | 14 ++++++-- .../cudf/cudf/tests/data/avro/hang_input.avro | Bin 0 -> 101 bytes .../cudf/cudf/tests/input_output/test_avro.py | 32 ++++++++++++++++++ 3 files changed, 43 insertions(+), 3 deletions(-) create mode 100644 python/cudf/cudf/tests/data/avro/hang_input.avro diff --git a/cpp/src/io/avro/avro.cpp b/cpp/src/io/avro/avro.cpp index bf7d983d481..4639ea6ba23 100644 --- a/cpp/src/io/avro/avro.cpp +++ b/cpp/src/io/avro/avro.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -64,8 +64,16 @@ bool container::parse(file_metadata* md, size_t max_num_rows, size_t first_row) sig4 |= get_raw() << 24; if (sig4 != avro_magic) { return false; } for (;;) { - auto num_md_items = static_cast(get_encoded()); - if (num_md_items == 0) { break; } + auto md_items_signed = get_encoded(); + if (md_items_signed == 0) { break; } + if (md_items_signed < 0) { + // A negative count means a block's byte size follows. Read it and discard it. + [[maybe_unused]] auto const md_block_size = get_encoded(); + md_items_signed = -md_items_signed; + } + // Check that the claimed item count can fit in the remaining input + if (md_items_signed > (m_end - m_cur) / 2) { return false; } + auto const num_md_items = static_cast(md_items_signed); for (uint32_t i = 0; i < num_md_items; i++) { auto const key = get_encoded(); auto const value = get_encoded(); diff --git a/python/cudf/cudf/tests/data/avro/hang_input.avro b/python/cudf/cudf/tests/data/avro/hang_input.avro new file mode 100644 index 0000000000000000000000000000000000000000..b26cb797fe8e1343a7560135f4d79e31902ca1d8 GIT binary patch literal 101 zcmeZI%3@>^ODrqO*DFrWNX<>`VyspwsVqoUvQjEaP0lY$QPNS$OUwoHfy}hb)SQ%J pC9CLam}psIPH8Gorlis(G_Aa2CKFT0s@-SqzWdK0sALQg0sxX>BQF2| literal 0 HcmV?d00001 diff --git a/python/cudf/cudf/tests/input_output/test_avro.py b/python/cudf/cudf/tests/input_output/test_avro.py index f982af4a85a..9820c6c9334 100644 --- a/python/cudf/cudf/tests/input_output/test_avro.py +++ b/python/cudf/cudf/tests/input_output/test_avro.py @@ -5,6 +5,9 @@ import datetime import io import pathlib +import subprocess +import sys +import textwrap import fastavro import numpy as np @@ -641,3 +644,32 @@ def test_avro_reader_multiblock( actual_df = cudf.read_avro(buffer, skiprows=skip_rows, num_rows=num_rows) assert_eq(expected_df, actual_df) + + +def test_avro_reader_no_hang_on_truncated_schema(datadir): + path = datadir / "avro" / "hang_input.avro" + assert path.is_file(), path + + script = textwrap.dedent( + f""" + import cudf + try: + cudf.read_avro({str(path)!r}) + except Exception: + pass + """ + ) + + timeout_s = 10 + try: + subprocess.run( + [sys.executable, "-c", script], + timeout=timeout_s, + check=False, + capture_output=True, + ) + except subprocess.TimeoutExpired: + pytest.fail( + f"cudf.read_avro hung on malformed input {path.name!r} " + f"(no completion within {timeout_s}s)" + ) From 62c8c5a99c664474d71eb77238ea75296d28005e Mon Sep 17 00:00:00 2001 From: Paul Taylor <178183+trxcllnt@users.noreply.github.com> Date: Wed, 6 May 2026 13:29:07 -0700 Subject: [PATCH 25/36] Use `token.rapids.nvidia.com` when issuing S3 bucket creds in devcontainers (#22338) Set AWS_IDP_URL and update AWS_ROLE_ARN to use `token.rapids.nvidia.com` Authors: - Paul Taylor (https://github.com/trxcllnt) Approvers: - Gil Forsyth (https://github.com/gforsyth) URL: https://github.com/rapidsai/cudf/pull/22338 --- .devcontainer/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 4be36d4402c..b4b2ecb69e0 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -37,7 +37,8 @@ ENV LIBCUDF_KERNEL_CACHE_PATH="/home/coder/cudf/cpp/build/${PYTHON_PACKAGE_MANAG ### # sccache configuration ### -ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs" +ENV AWS_IDP_URL="https://token.rapids.nvidia.com" +ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/rapids-token-sccache-devs" ENV SCCACHE_REGION="us-east-2" ENV SCCACHE_BUCKET="rapids-sccache-devs" ENV SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE=true From 6ffe708fa1d5f486abab54aeb53244bf3ff192a8 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Wed, 6 May 2026 16:41:36 -0400 Subject: [PATCH 26/36] Use static cudart by default (#22397) Issue: https://github.com/rapidsai/build-planning/issues/235 Authors: - Kyle Edwards (https://github.com/KyleFromNVIDIA) Approvers: - Bradley Dice (https://github.com/bdice) - MithunR (https://github.com/mythrocks) URL: https://github.com/rapidsai/cudf/pull/22397 --- .agents/skills/build-test-cudf-java/SKILL.md | 2 +- conda/recipes/cudf/recipe.yaml | 1 - cpp/CMakeLists.txt | 16 ++++++---------- cpp/cmake/Modules/JitifyPreprocessKernels.cmake | 2 +- cpp/tests/CMakeLists.txt | 2 +- java/README.md | 7 ------- java/ci/build-in-docker.sh | 6 +----- java/pom.xml | 2 -- java/src/main/native/CMakeLists.txt | 10 +--------- python/libcudf/CMakeLists.txt | 4 +--- 10 files changed, 12 insertions(+), 40 deletions(-) diff --git a/.agents/skills/build-test-cudf-java/SKILL.md b/.agents/skills/build-test-cudf-java/SKILL.md index 6284a5e4230..ca9eb575c37 100644 --- a/.agents/skills/build-test-cudf-java/SKILL.md +++ b/.agents/skills/build-test-cudf-java/SKILL.md @@ -51,7 +51,7 @@ export MAVEN_OPTS="--add-opens java.base/java.lang=ALL-UNNAMED --add-opens java. Export `MVN_COMMON_OPTS` to match the CI build configuration in `java/ci/build-in-docker.sh`. For example: ```bash -export MVN_COMMON_OPTS="-DCUDF_CPP_BUILD_DIR=$CUDF_CPP_BUILD_DIR -DBUILD_SHARED_LIBS=OFF -DCUDF_USE_PER_THREAD_DEFAULT_STREAM=ON -DCUDA_STATIC_CUFILE=ON -DCUDA_STATIC_RUNTIME=ON -DCUDF_JNI_LIBCUDF_STATIC=ON" +export MVN_COMMON_OPTS="-DCUDF_CPP_BUILD_DIR=$CUDF_CPP_BUILD_DIR -DBUILD_SHARED_LIBS=OFF -DCUDF_USE_PER_THREAD_DEFAULT_STREAM=ON -DCUDA_STATIC_CUFILE=ON -DCUDF_JNI_LIBCUDF_STATIC=ON" ``` ## Building cudf-java diff --git a/conda/recipes/cudf/recipe.yaml b/conda/recipes/cudf/recipe.yaml index f0f7768b1cd..f8516466a58 100644 --- a/conda/recipes/cudf/recipe.yaml +++ b/conda/recipes/cudf/recipe.yaml @@ -98,7 +98,6 @@ requirements: - pylibcudf =${{ version }} - ${{ pin_compatible("rmm", upper_bound="x.x") }} - fsspec >=0.6.0 - - cuda-cudart - if: cuda_major == "12" then: cuda-python >=12.9.2,<13.0 else: cuda-python >=13.0.1,<14.0 diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c2485171c71..6d684af8d99 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -58,9 +58,6 @@ option(CUDA_ENABLE_LINEINFO ) option(CUDA_WARNINGS_AS_ERRORS "Enable -Werror=all-warnings for all CUDA compilation" ON) -# cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic linking -option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF) - set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL ON) if(NOT BUILD_SHARED_LIBS) @@ -97,7 +94,6 @@ message( VERBOSE "CUDF: Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler): ${CUDA_ENABLE_LINEINFO}" ) -message(VERBOSE "CUDF: Statically link the CUDA runtime: ${CUDA_STATIC_RUNTIME}") message(VERBOSE "CUDF: Build with remote IO (e.g. AWS S3) support through KvikIO: ${CUDF_KVIKIO_REMOTE_IO}" ) @@ -1012,7 +1008,7 @@ if(TARGET conda_env) target_link_libraries(cudf PRIVATE conda_env) endif() -rapids_cuda_set_runtime(cudf USE_STATIC ${CUDA_STATIC_RUNTIME}) +rapids_cuda_set_runtime(cudf USE_STATIC ON) file( WRITE "${CUDF_BINARY_DIR}/fatbin.ld" @@ -1059,7 +1055,7 @@ if(CUDF_BUILD_TESTUTIL) PUBLIC cudf PRIVATE $ ) - rapids_cuda_set_runtime(cudftest_default_stream USE_STATIC ${CUDA_STATIC_RUNTIME}) + rapids_cuda_set_runtime(cudftest_default_stream USE_STATIC ON) add_library(cudf::cudftest_default_stream ALIAS cudftest_default_stream) @@ -1090,7 +1086,7 @@ if(CUDF_BUILD_TESTUTIL) cudftestutil INTERFACE "$" "$" ) - rapids_cuda_set_runtime(cudftestutil USE_STATIC ${CUDA_STATIC_RUNTIME}) + rapids_cuda_set_runtime(cudftestutil USE_STATIC ON) add_library(cudf::cudftestutil ALIAS cudftestutil) add_library(cudftestutil_impl INTERFACE) @@ -1151,7 +1147,7 @@ if(CUDF_BUILD_STREAMS_TEST_UTIL) endif() set(sanitizer_relative_genex - "$,$>" + "$,$>" ) set_target_properties( ${_tgt} @@ -1166,9 +1162,9 @@ if(CUDF_BUILD_STREAMS_TEST_UTIL) ${_tgt} PRIVATE "$:${CUDF_CXX_FLAGS}>>" ) target_include_directories(${_tgt} PRIVATE "$") - target_link_libraries(${_tgt} PUBLIC CUDA::cudart rmm::rmm CUDA::sanitizer) + target_link_libraries(${_tgt} PUBLIC CUDA::cudart_static rmm::rmm CUDA::sanitizer) - rapids_cuda_set_runtime(${_tgt} USE_STATIC ${CUDA_STATIC_RUNTIME}) + rapids_cuda_set_runtime(${_tgt} USE_STATIC ON) add_library(cudf::${_tgt} ALIAS ${_tgt}) if("${_mode}" STREQUAL "testing") diff --git a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake index 10ba33eb397..d035e1ea6ab 100644 --- a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake +++ b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake @@ -9,7 +9,7 @@ add_executable(jitify_preprocess "${JITIFY_INCLUDE_DIR}/jitify2_preprocess.cpp") target_compile_definitions(jitify_preprocess PRIVATE "_FILE_OFFSET_BITS=64") -rapids_cuda_set_runtime(jitify_preprocess USE_STATIC ${CUDA_STATIC_RUNTIME}) +rapids_cuda_set_runtime(jitify_preprocess USE_STATIC ON) target_link_libraries(jitify_preprocess PUBLIC ${CMAKE_DL_LIBS}) # Take a list of files to JIT-compile and run them through jitify_preprocess. diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 68cde65c57b..a45b7280127 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -61,7 +61,7 @@ function(ConfigureTest CMAKE_TEST_NAME) ${CMAKE_TEST_NAME} PRIVATE cudf::cudftestutil_objects nvtx3::nvtx3-cpp $ "${_CUDF_TEST_EXTRA_LIBS}" ) - rapids_cuda_set_runtime(${CMAKE_TEST_NAME} USE_STATIC ${CUDA_STATIC_RUNTIME}) + rapids_cuda_set_runtime(${CMAKE_TEST_NAME} USE_STATIC ON) rapids_test_add( NAME ${CMAKE_TEST_NAME} COMMAND ${CMAKE_TEST_NAME} diff --git a/java/README.md b/java/README.md index 7b33f303cf3..e1552712587 100644 --- a/java/README.md +++ b/java/README.md @@ -79,13 +79,6 @@ If you decide to build without Docker and the build script, examining the cmake settings in the [Java CI build script](ci/build-in-docker.sh) can be helpful if you are encountering difficulties during the build. -## Statically Linking the CUDA Runtime - -If you use the default cmake options libcudart will be dynamically linked to libcudf and libcudfjni. -To build with a static CUDA runtime, build libcudf with the `-DCUDA_STATIC_RUNTIME=ON` as a cmake -parameter, and similarly build with `-DCUDA_STATIC_RUNTIME=ON` when building the Java bindings -with Maven. - ### Building with a libcudf Archive When statically linking the CUDA runtime, it is recommended to build cuDF as an archive rather than diff --git a/java/ci/build-in-docker.sh b/java/ci/build-in-docker.sh index 66140f387fd..e15536c8b6b 100755 --- a/java/ci/build-in-docker.sh +++ b/java/ci/build-in-docker.sh @@ -1,7 +1,7 @@ #!/bin/bash # -# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # @@ -10,7 +10,6 @@ gcc --version SKIP_JAVA_TESTS=${SKIP_JAVA_TESTS:-true} BUILD_CPP_TESTS=${BUILD_CPP_TESTS:-OFF} -ENABLE_CUDA_STATIC_RUNTIME=${ENABLE_CUDA_STATIC_RUNTIME:-ON} ENABLE_PTDS=${ENABLE_PTDS:-ON} RMM_LOGGING_LEVEL=${RMM_LOGGING_LEVEL:-OFF} ENABLE_NVTX=${ENABLE_NVTX:-ON} @@ -27,7 +26,6 @@ OUT_PATH="$WORKSPACE/$OUT" echo "SIGN_FILE: $SIGN_FILE,\ SKIP_JAVA_TESTS: $SKIP_JAVA_TESTS,\ BUILD_CPP_TESTS: $BUILD_CPP_TESTS,\ - ENABLE_CUDA_STATIC_RUNTIME: $ENABLE_CUDA_STATIC_RUNTIME,\ ENABLED_PTDS: $ENABLE_PTDS,\ ENABLE_NVTX: $ENABLE_NVTX,\ ENABLE_GDS: $ENABLE_GDS,\ @@ -47,7 +45,6 @@ mkdir -p "$LIBCUDF_BUILD_PATH" cd "$LIBCUDF_BUILD_PATH" cmake .. -G"${CMAKE_GENERATOR}" \ -DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX \ - -DCUDA_STATIC_RUNTIME="$ENABLE_CUDA_STATIC_RUNTIME" \ -DUSE_NVTX="$ENABLE_NVTX" \ -DCUDF_LARGE_STRINGS_DISABLED=ON \ -DCUDF_USE_ARROW_STATIC=ON \ @@ -70,7 +67,6 @@ BUILD_ARG=( "-Dmaven.repo.local=$WORKSPACE/.m2" "-DskipTests=$SKIP_JAVA_TESTS" "-DCUDF_USE_PER_THREAD_DEFAULT_STREAM=$ENABLE_PTDS" - "-DCUDA_STATIC_RUNTIME=$ENABLE_CUDA_STATIC_RUNTIME" "-DCUDF_JNI_LIBCUDF_STATIC=ON" "-DUSE_GDS=$ENABLE_GDS" "-Dtest=*,!CuFileTest,!CudaFatalTest,!ColumnViewNonEmptyNullsTest" diff --git a/java/pom.xml b/java/pom.xml index 5df61ec4352..12af51eba71 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -153,7 +153,6 @@ false OFF - OFF OFF OFF RAPIDS @@ -484,7 +483,6 @@ - diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt index 1e7df3802b9..208bc4035c9 100644 --- a/java/src/main/native/CMakeLists.txt +++ b/java/src/main/native/CMakeLists.txt @@ -29,7 +29,6 @@ option(USE_NVTX "Build with NVTX support" ON) option(BUILD_SHARED_LIBS "Build cuDF JNI shared libraries" ON) option(BUILD_TESTS "Configure CMake to build tests" ON) option(CUDF_USE_PER_THREAD_DEFAULT_STREAM "Build with per-thread default stream" OFF) -option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF) option(USE_GDS "Build with GPUDirect Storage (GDS)/cuFile support" OFF) option(CUDF_JNI_LIBCUDF_STATIC "Link with libcudf.a" OFF) option(CUDF_JNI_ENABLE_PROFILING "Build with profiling support" ON) @@ -41,7 +40,6 @@ message(VERBOSE "CUDF_JNI: Configure CMake to build tests: ${BUILD_TESTS}") message(VERBOSE "CUDF_JNI: Build with per-thread default stream: ${CUDF_USE_PER_THREAD_DEFAULT_STREAM}" ) -message(VERBOSE "CUDF_JNI: Statically link the CUDA runtime: ${CUDA_STATIC_RUNTIME}") message(VERBOSE "CUDF_JNI: Build with GPUDirect Storage support: ${USE_GDS}") message(VERBOSE "CUDF_JNI: Link with libcudf statically: ${CUDF_JNI_LIBCUDF_STATIC}") @@ -279,13 +277,7 @@ target_link_libraries( # cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic # linking -if(CUDA_STATIC_RUNTIME) - # Tell CMake what CUDA language runtime to use - set_target_properties(cudfjni PROPERTIES CUDA_RUNTIME_LIBRARY Static) -else() - # Tell CMake what CUDA language runtime to use - set_target_properties(cudfjni PROPERTIES CUDA_RUNTIME_LIBRARY Shared) -endif() +set_target_properties(cudfjni PROPERTIES CUDA_RUNTIME_LIBRARY Static) # ################################################################################################## # * install shared libraries ---------------------------------------------------------------------- diff --git a/python/libcudf/CMakeLists.txt b/python/libcudf/CMakeLists.txt index 6feea8e8ba6..7f5176048ad 100644 --- a/python/libcudf/CMakeLists.txt +++ b/python/libcudf/CMakeLists.txt @@ -1,6 +1,6 @@ # ============================================================================= # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on # ============================================================================= @@ -63,8 +63,6 @@ if(NOT USE_NVCOMP_RUNTIME_WHEEL) endif() endif() -set(CUDA_STATIC_RUNTIME ON) - set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) add_subdirectory(../../cpp cudf-cpp) From 6598b6399c017dc9cc5e892cfdccfca79ae6277d Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 6 May 2026 17:33:21 -0500 Subject: [PATCH 27/36] Fix `to_array` to return non-corrupted data (#22342) Fixes #22136 This PR gueared the homogeneous numeric `DataFrame.to_cupy` fast path so it only uses `table_to_array` when `dtype` is `None` or exactly matches the source column `dtype`. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) - https://github.com/apps/pre-commit-ci Approvers: - Matthew Roeschke (https://github.com/mroeschke) URL: https://github.com/rapidsai/cudf/pull/22342 --- python/cudf/cudf/core/frame.py | 1 + .../cudf/tests/dataframe/methods/test_to_cupy.py | 14 +++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 8c933649af2..fc9cd5b2cd3 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -842,6 +842,7 @@ def to_cupy( self._num_columns > 1 and na_value is None and self._columns[0].dtype.kind in {"i", "u", "f", "b"} + and (dtype is None or dtype == self._columns[0].dtype) and all( not col.nullable and col.dtype == self._columns[0].dtype for col in self._columns diff --git a/python/cudf/cudf/tests/dataframe/methods/test_to_cupy.py b/python/cudf/cudf/tests/dataframe/methods/test_to_cupy.py index 44ee7a4278d..3eb69e0e928 100644 --- a/python/cudf/cudf/tests/dataframe/methods/test_to_cupy.py +++ b/python/cudf/cudf/tests/dataframe/methods/test_to_cupy.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 import cupy as cp @@ -64,6 +64,18 @@ def test_dataframe_to_cupy(): np.testing.assert_array_equal(df[k].to_numpy(), mat[:, i]) +@pytest.mark.parametrize("in_dtype", ["int32", "int64", "float32", "float64"]) +@pytest.mark.parametrize("out_dtype", ["int32", "int64", "float32", "float64"]) +def test_dataframe_to_cupy_dtype(in_dtype, out_dtype): + data = np.arange(12, dtype=in_dtype).reshape(3, 4) + df = cudf.DataFrame(data) + + result = df.to_cupy(dtype=out_dtype) + + assert result.dtype == np.dtype(out_dtype) + np.testing.assert_allclose(result.get(), data.astype(out_dtype)) + + @pytest.mark.parametrize("has_nulls", [False, True]) @pytest.mark.parametrize("use_na_value", [False, True]) def test_dataframe_to_cupy_single_column(has_nulls, use_na_value): From aa0a7070655a98701281c2ce5e01b84e747fdafa Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 6 May 2026 16:21:33 -0700 Subject: [PATCH 28/36] Use cudaStream_t instead of cuda_stream_view in pylibcudf Cython (#22368) Contributes to https://github.com/rapidsai/rmm/issues/2359 Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/22368 --- .../cudf_polars/utils/cuda_stream.py | 5 +- python/pylibcudf/pylibcudf/binaryop.pxd | 5 +- python/pylibcudf/pylibcudf/binaryop.pyi | 6 +- python/pylibcudf/pylibcudf/binaryop.pyx | 16 +- python/pylibcudf/pylibcudf/column.pxd | 19 +- python/pylibcudf/pylibcudf/column.pyi | 32 ++- python/pylibcudf/pylibcudf/column.pyx | 118 ++++---- .../pylibcudf/pylibcudf/column_factories.pxd | 15 +- .../pylibcudf/pylibcudf/column_factories.pyi | 16 +- .../pylibcudf/pylibcudf/column_factories.pyx | 58 ++-- python/pylibcudf/pylibcudf/concatenate.pxd | 5 +- python/pylibcudf/pylibcudf/concatenate.pyi | 6 +- python/pylibcudf/pylibcudf/concatenate.pyx | 16 +- .../pylibcudf/pylibcudf/contiguous_split.pxd | 8 +- .../pylibcudf/pylibcudf/contiguous_split.pyi | 14 +- .../pylibcudf/pylibcudf/contiguous_split.pyx | 46 ++-- python/pylibcudf/pylibcudf/copying.pxd | 27 +- python/pylibcudf/pylibcudf/copying.pyi | 32 ++- python/pylibcudf/pylibcudf/copying.pyx | 116 ++++---- python/pylibcudf/pylibcudf/datetime.pxd | 23 +- python/pylibcudf/pylibcudf/datetime.pyi | 24 +- python/pylibcudf/pylibcudf/datetime.pyx | 93 ++++--- .../pylibcudf/experimental/_join_streams.pxd | 5 +- .../pylibcudf/experimental/_join_streams.pyi | 8 +- .../pylibcudf/experimental/_join_streams.pyx | 20 +- python/pylibcudf/pylibcudf/filling.pxd | 13 +- python/pylibcudf/pylibcudf/filling.pyi | 17 +- python/pylibcudf/pylibcudf/filling.pyx | 48 ++-- python/pylibcudf/pylibcudf/groupby.pxd | 15 +- python/pylibcudf/pylibcudf/groupby.pyi | 14 +- python/pylibcudf/pylibcudf/groupby.pyx | 68 +++-- python/pylibcudf/pylibcudf/hashing.pxd | 23 +- python/pylibcudf/pylibcudf/hashing.pyi | 24 +- python/pylibcudf/pylibcudf/hashing.pyx | 93 ++++--- python/pylibcudf/pylibcudf/interop.pxd | 7 +- python/pylibcudf/pylibcudf/interop.pyi | 12 +- python/pylibcudf/pylibcudf/interop.pyx | 17 +- python/pylibcudf/pylibcudf/io/avro.pxd | 5 +- python/pylibcudf/pylibcudf/io/avro.pyi | 6 +- python/pylibcudf/pylibcudf/io/avro.pyx | 8 +- python/pylibcudf/pylibcudf/io/csv.pxd | 7 +- python/pylibcudf/pylibcudf/io/csv.pyi | 8 +- python/pylibcudf/pylibcudf/io/csv.pyx | 13 +- .../pylibcudf/io/experimental/hybrid_scan.pxd | 2 +- .../pylibcudf/io/experimental/hybrid_scan.pyi | 20 +- .../pylibcudf/io/experimental/hybrid_scan.pyx | 66 ++--- python/pylibcudf/pylibcudf/io/json.pxd | 11 +- python/pylibcudf/pylibcudf/io/json.pyi | 10 +- python/pylibcudf/pylibcudf/io/json.pyx | 31 ++- python/pylibcudf/pylibcudf/io/orc.pxd | 9 +- python/pylibcudf/pylibcudf/io/orc.pyi | 10 +- python/pylibcudf/pylibcudf/io/orc.pyx | 24 +- python/pylibcudf/pylibcudf/io/parquet.pxd | 8 +- python/pylibcudf/pylibcudf/io/parquet.pyi | 11 +- python/pylibcudf/pylibcudf/io/parquet.pyx | 26 +- python/pylibcudf/pylibcudf/io/text.pxd | 5 +- python/pylibcudf/pylibcudf/io/text.pyi | 6 +- python/pylibcudf/pylibcudf/io/text.pyx | 14 +- python/pylibcudf/pylibcudf/io/timezone.pxd | 6 +- python/pylibcudf/pylibcudf/io/timezone.pyi | 6 +- python/pylibcudf/pylibcudf/io/timezone.pyx | 14 +- python/pylibcudf/pylibcudf/io/types.pxd | 5 +- python/pylibcudf/pylibcudf/io/types.pyx | 3 +- python/pylibcudf/pylibcudf/join.pxd | 35 ++- python/pylibcudf/pylibcudf/join.pyi | 40 +-- python/pylibcudf/pylibcudf/join.pyx | 211 ++++++++------ python/pylibcudf/pylibcudf/json.pxd | 5 +- python/pylibcudf/pylibcudf/json.pyi | 6 +- python/pylibcudf/pylibcudf/json.pyx | 12 +- python/pylibcudf/pylibcudf/labeling.pxd | 5 +- python/pylibcudf/pylibcudf/labeling.pyi | 6 +- python/pylibcudf/pylibcudf/labeling.pyx | 12 +- .../pylibcudf/pylibcudf/libcudf/binaryop.pxd | 10 +- .../pylibcudf/libcudf/column/column.pxd | 6 +- .../libcudf/column/column_factories.pxd | 28 +- .../pylibcudf/libcudf/concatenate.pxd | 6 +- .../pylibcudf/libcudf/contiguous_split.pxd | 8 +- .../pylibcudf/pylibcudf/libcudf/copying.pxd | 40 +-- .../pylibcudf/pylibcudf/libcudf/datetime.pxd | 24 +- .../libcudf/detail/utilities/stream_pool.pxd | 29 +- .../pylibcudf/libcudf/distinct_count.pxd | 6 +- .../pylibcudf/pylibcudf/libcudf/filling.pxd | 14 +- .../pylibcudf/pylibcudf/libcudf/groupby.pxd | 12 +- python/pylibcudf/pylibcudf/libcudf/hash.pxd | 22 +- .../pylibcudf/pylibcudf/libcudf/interop.pxd | 24 +- .../pylibcudf/pylibcudf/libcudf/io/avro.pxd | 4 +- python/pylibcudf/pylibcudf/libcudf/io/csv.pxd | 6 +- .../pylibcudf/libcudf/io/hybrid_scan.pxd | 20 +- .../pylibcudf/pylibcudf/libcudf/io/json.pxd | 6 +- python/pylibcudf/pylibcudf/libcudf/io/orc.pxd | 8 +- .../pylibcudf/libcudf/io/orc_metadata.pxd | 6 +- .../pylibcudf/libcudf/io/parquet.pxd | 12 +- .../pylibcudf/pylibcudf/libcudf/io/text.pxd | 4 +- .../pylibcudf/libcudf/io/timezone.pxd | 4 +- python/pylibcudf/pylibcudf/libcudf/join.pxd | 52 ++-- python/pylibcudf/pylibcudf/libcudf/json.pxd | 4 +- .../pylibcudf/pylibcudf/libcudf/labeling.pxd | 4 +- .../pylibcudf/libcudf/lists/combine.pxd | 8 +- .../pylibcudf/libcudf/lists/contains.pxd | 12 +- .../libcudf/lists/count_elements.pxd | 4 +- .../pylibcudf/libcudf/lists/explode.pxd | 4 +- .../pylibcudf/libcudf/lists/extract.pxd | 6 +- .../pylibcudf/libcudf/lists/filling.pxd | 6 +- .../pylibcudf/libcudf/lists/gather.pxd | 4 +- .../libcudf/lists/lists_column_view.pxd | 4 +- .../pylibcudf/libcudf/lists/reverse.pxd | 4 +- .../libcudf/lists/set_operations.pxd | 10 +- .../pylibcudf/libcudf/lists/sorting.pxd | 6 +- .../libcudf/lists/stream_compaction.pxd | 6 +- python/pylibcudf/pylibcudf/libcudf/merge.pxd | 4 +- .../pylibcudf/pylibcudf/libcudf/null_mask.pxd | 16 +- .../libcudf/nvtext/byte_pair_encode.pxd | 6 +- .../pylibcudf/libcudf/nvtext/deduplicate.pxd | 8 +- .../libcudf/nvtext/edit_distance.pxd | 6 +- .../libcudf/nvtext/generate_ngrams.pxd | 8 +- .../pylibcudf/libcudf/nvtext/jaccard.pxd | 4 +- .../pylibcudf/libcudf/nvtext/minhash.pxd | 10 +- .../libcudf/nvtext/ngrams_tokenize.pxd | 4 +- .../pylibcudf/libcudf/nvtext/normalize.pxd | 8 +- .../pylibcudf/libcudf/nvtext/replace.pxd | 6 +- .../pylibcudf/libcudf/nvtext/stemmer.pxd | 8 +- .../pylibcudf/libcudf/nvtext/tokenize.pxd | 18 +- .../libcudf/nvtext/wordpiece_tokenize.pxd | 6 +- .../pylibcudf/libcudf/partitioning.pxd | 10 +- .../pylibcudf/pylibcudf/libcudf/quantiles.pxd | 6 +- python/pylibcudf/pylibcudf/libcudf/reduce.pxd | 8 +- .../pylibcudf/pylibcudf/libcudf/replace.pxd | 18 +- .../pylibcudf/pylibcudf/libcudf/reshape.pxd | 8 +- .../pylibcudf/pylibcudf/libcudf/rolling.pxd | 10 +- python/pylibcudf/pylibcudf/libcudf/round.pxd | 6 +- .../pylibcudf/libcudf/scalar/scalar.pxd | 18 +- .../libcudf/scalar/scalar_factories.pxd | 18 +- python/pylibcudf/pylibcudf/libcudf/search.pxd | 8 +- .../pylibcudf/pylibcudf/libcudf/sorting.pxd | 26 +- .../pylibcudf/libcudf/stream_compaction.pxd | 18 +- .../pylibcudf/libcudf/strings/attributes.pxd | 8 +- .../pylibcudf/libcudf/strings/capitalize.pxd | 8 +- .../pylibcudf/libcudf/strings/case.pxd | 8 +- .../pylibcudf/libcudf/strings/char_types.pxd | 6 +- .../pylibcudf/libcudf/strings/combine.pxd | 12 +- .../pylibcudf/libcudf/strings/contains.pxd | 12 +- .../strings/convert/convert_booleans.pxd | 6 +- .../strings/convert/convert_datetime.pxd | 8 +- .../strings/convert/convert_durations.pxd | 6 +- .../strings/convert/convert_fixed_point.pxd | 8 +- .../strings/convert/convert_floats.pxd | 8 +- .../strings/convert/convert_integers.pxd | 16 +- .../libcudf/strings/convert/convert_ipv4.pxd | 8 +- .../libcudf/strings/convert/convert_lists.pxd | 4 +- .../libcudf/strings/convert/convert_urls.pxd | 6 +- .../pylibcudf/libcudf/strings/extract.pxd | 8 +- .../pylibcudf/libcudf/strings/find.pxd | 20 +- .../libcudf/strings/find_multiple.pxd | 6 +- .../pylibcudf/libcudf/strings/findall.pxd | 6 +- .../pylibcudf/libcudf/strings/padding.pxd | 8 +- .../pylibcudf/libcudf/strings/repeat.pxd | 6 +- .../pylibcudf/libcudf/strings/replace.pxd | 8 +- .../pylibcudf/libcudf/strings/replace_re.pxd | 8 +- .../pylibcudf/libcudf/strings/reverse.pxd | 4 +- .../libcudf/strings/split/partition.pxd | 6 +- .../pylibcudf/libcudf/strings/split/split.pxd | 20 +- .../libcudf/strings/strings_column_view.pxd | 6 +- .../pylibcudf/libcudf/strings/strip.pxd | 4 +- .../pylibcudf/libcudf/strings/substring.pxd | 6 +- .../pylibcudf/libcudf/strings/translate.pxd | 6 +- .../pylibcudf/libcudf/strings/wrap.pxd | 4 +- .../libcudf/structs/structs_column_view.pxd | 4 +- .../pylibcudf/libcudf/table/table.pxd | 6 +- .../pylibcudf/pylibcudf/libcudf/transform.pxd | 20 +- .../pylibcudf/pylibcudf/libcudf/transpose.pxd | 4 +- python/pylibcudf/pylibcudf/libcudf/unary.pxd | 14 +- .../pylibcudf/libcudf/unique_count.pxd | 6 +- .../libcudf/utilities/default_stream.pxd | 7 +- python/pylibcudf/pylibcudf/lists.pxd | 39 ++- python/pylibcudf/pylibcudf/lists.pyi | 40 +-- python/pylibcudf/pylibcudf/lists.pyx | 167 ++++++----- python/pylibcudf/pylibcudf/merge.pxd | 5 +- python/pylibcudf/pylibcudf/merge.pyi | 6 +- python/pylibcudf/pylibcudf/merge.pyx | 12 +- python/pylibcudf/pylibcudf/null_mask.pxd | 17 +- python/pylibcudf/pylibcudf/null_mask.pyi | 16 +- python/pylibcudf/pylibcudf/null_mask.pyx | 64 +++-- .../pylibcudf/nvtext/byte_pair_encode.pxd | 5 +- .../pylibcudf/nvtext/byte_pair_encode.pyi | 8 +- .../pylibcudf/nvtext/byte_pair_encode.pyx | 23 +- .../pylibcudf/nvtext/deduplicate.pxd | 9 +- .../pylibcudf/nvtext/deduplicate.pyi | 10 +- .../pylibcudf/nvtext/deduplicate.pyx | 34 +-- .../pylibcudf/nvtext/edit_distance.pxd | 7 +- .../pylibcudf/nvtext/edit_distance.pyi | 8 +- .../pylibcudf/nvtext/edit_distance.pyx | 19 +- .../pylibcudf/nvtext/generate_ngrams.pxd | 9 +- .../pylibcudf/nvtext/generate_ngrams.pyi | 10 +- .../pylibcudf/nvtext/generate_ngrams.pyx | 30 +- python/pylibcudf/pylibcudf/nvtext/jaccard.pxd | 5 +- python/pylibcudf/pylibcudf/nvtext/jaccard.pyi | 6 +- python/pylibcudf/pylibcudf/nvtext/jaccard.pyx | 12 +- python/pylibcudf/pylibcudf/nvtext/minhash.pxd | 11 +- python/pylibcudf/pylibcudf/nvtext/minhash.pyi | 12 +- python/pylibcudf/pylibcudf/nvtext/minhash.pyx | 39 +-- .../pylibcudf/nvtext/ngrams_tokenize.pxd | 5 +- .../pylibcudf/nvtext/ngrams_tokenize.pyi | 6 +- .../pylibcudf/nvtext/ngrams_tokenize.pyx | 12 +- .../pylibcudf/pylibcudf/nvtext/normalize.pxd | 9 +- .../pylibcudf/pylibcudf/nvtext/normalize.pyi | 10 +- .../pylibcudf/pylibcudf/nvtext/normalize.pyx | 28 +- python/pylibcudf/pylibcudf/nvtext/replace.pxd | 7 +- python/pylibcudf/pylibcudf/nvtext/replace.pyi | 8 +- python/pylibcudf/pylibcudf/nvtext/replace.pyx | 27 +- python/pylibcudf/pylibcudf/nvtext/stemmer.pxd | 7 +- python/pylibcudf/pylibcudf/nvtext/stemmer.pyi | 8 +- python/pylibcudf/pylibcudf/nvtext/stemmer.pyx | 21 +- .../pylibcudf/pylibcudf/nvtext/tokenize.pxd | 17 +- .../pylibcudf/pylibcudf/nvtext/tokenize.pyi | 20 +- .../pylibcudf/pylibcudf/nvtext/tokenize.pyx | 79 +++--- .../pylibcudf/nvtext/wordpiece_tokenize.pxd | 5 +- .../pylibcudf/nvtext/wordpiece_tokenize.pyi | 8 +- .../pylibcudf/nvtext/wordpiece_tokenize.pyx | 19 +- python/pylibcudf/pylibcudf/partitioning.pxd | 7 +- python/pylibcudf/pylibcudf/partitioning.pyi | 8 +- python/pylibcudf/pylibcudf/partitioning.pyx | 30 +- python/pylibcudf/pylibcudf/quantiles.pxd | 7 +- python/pylibcudf/pylibcudf/quantiles.pyi | 8 +- python/pylibcudf/pylibcudf/quantiles.pyx | 21 +- python/pylibcudf/pylibcudf/reduce.pxd | 11 +- python/pylibcudf/pylibcudf/reduce.pyi | 12 +- python/pylibcudf/pylibcudf/reduce.pyx | 36 +-- python/pylibcudf/pylibcudf/replace.pxd | 11 +- python/pylibcudf/pylibcudf/replace.pyi | 12 +- python/pylibcudf/pylibcudf/replace.pyx | 51 ++-- python/pylibcudf/pylibcudf/reshape.pxd | 9 +- python/pylibcudf/pylibcudf/reshape.pyi | 10 +- python/pylibcudf/pylibcudf/reshape.pyx | 28 +- python/pylibcudf/pylibcudf/rolling.pxd | 9 +- python/pylibcudf/pylibcudf/rolling.pyi | 10 +- python/pylibcudf/pylibcudf/rolling.pyx | 34 ++- python/pylibcudf/pylibcudf/round.pxd | 5 +- python/pylibcudf/pylibcudf/round.pyi | 6 +- python/pylibcudf/pylibcudf/round.pyx | 19 +- python/pylibcudf/pylibcudf/scalar.pxd | 7 +- python/pylibcudf/pylibcudf/scalar.pyi | 19 +- python/pylibcudf/pylibcudf/scalar.pyx | 259 +++++++++++------- python/pylibcudf/pylibcudf/search.pxd | 9 +- python/pylibcudf/pylibcudf/search.pyi | 10 +- python/pylibcudf/pylibcudf/search.pyx | 30 +- python/pylibcudf/pylibcudf/sorting.pxd | 27 +- python/pylibcudf/pylibcudf/sorting.pyi | 28 +- python/pylibcudf/pylibcudf/sorting.pyx | 109 ++++---- .../pylibcudf/pylibcudf/stream_compaction.pxd | 17 +- .../pylibcudf/pylibcudf/stream_compaction.pyi | 18 +- .../pylibcudf/pylibcudf/stream_compaction.pyx | 73 ++--- .../pylibcudf/strings/attributes.pxd | 9 +- .../pylibcudf/strings/attributes.pyi | 10 +- .../pylibcudf/strings/attributes.pyx | 30 +- .../pylibcudf/strings/capitalize.pxd | 9 +- .../pylibcudf/strings/capitalize.pyi | 10 +- .../pylibcudf/strings/capitalize.pyx | 32 ++- python/pylibcudf/pylibcudf/strings/case.pxd | 9 +- python/pylibcudf/pylibcudf/strings/case.pyi | 10 +- python/pylibcudf/pylibcudf/strings/case.pyx | 30 +- .../pylibcudf/strings/char_types.pxd | 7 +- .../pylibcudf/strings/char_types.pyi | 8 +- .../pylibcudf/strings/char_types.pyx | 21 +- .../pylibcudf/pylibcudf/strings/combine.pxd | 9 +- .../pylibcudf/pylibcudf/strings/combine.pyi | 10 +- .../pylibcudf/pylibcudf/strings/combine.pyx | 38 +-- .../pylibcudf/pylibcudf/strings/contains.pxd | 11 +- .../pylibcudf/pylibcudf/strings/contains.pyi | 12 +- .../pylibcudf/pylibcudf/strings/contains.pyx | 43 +-- .../strings/convert/convert_booleans.pxd | 7 +- .../strings/convert/convert_booleans.pyi | 8 +- .../strings/convert/convert_booleans.pyx | 21 +- .../strings/convert/convert_datetime.pxd | 9 +- .../strings/convert/convert_datetime.pyi | 10 +- .../strings/convert/convert_datetime.pyx | 30 +- .../strings/convert/convert_durations.pxd | 7 +- .../strings/convert/convert_durations.pyi | 8 +- .../strings/convert/convert_durations.pyx | 21 +- .../strings/convert/convert_fixed_point.pxd | 9 +- .../strings/convert/convert_fixed_point.pyi | 10 +- .../strings/convert/convert_fixed_point.pyx | 30 +- .../strings/convert/convert_floats.pxd | 9 +- .../strings/convert/convert_floats.pyi | 10 +- .../strings/convert/convert_floats.pyx | 32 ++- .../strings/convert/convert_integers.pxd | 15 +- .../strings/convert/convert_integers.pyi | 16 +- .../strings/convert/convert_integers.pyx | 59 ++-- .../strings/convert/convert_ipv4.pxd | 9 +- .../strings/convert/convert_ipv4.pyi | 10 +- .../strings/convert/convert_ipv4.pyx | 30 +- .../strings/convert/convert_lists.pxd | 5 +- .../strings/convert/convert_lists.pyi | 6 +- .../strings/convert/convert_lists.pyx | 14 +- .../strings/convert/convert_urls.pxd | 7 +- .../strings/convert/convert_urls.pyi | 8 +- .../strings/convert/convert_urls.pyx | 25 +- .../pylibcudf/pylibcudf/strings/extract.pxd | 9 +- .../pylibcudf/pylibcudf/strings/extract.pyi | 10 +- .../pylibcudf/pylibcudf/strings/extract.pyx | 30 +- python/pylibcudf/pylibcudf/strings/find.pxd | 13 +- python/pylibcudf/pylibcudf/strings/find.pyi | 14 +- python/pylibcudf/pylibcudf/strings/find.pyx | 56 ++-- .../pylibcudf/strings/find_multiple.pxd | 7 +- .../pylibcudf/strings/find_multiple.pyi | 8 +- .../pylibcudf/strings/find_multiple.pyx | 21 +- .../pylibcudf/pylibcudf/strings/findall.pxd | 7 +- .../pylibcudf/pylibcudf/strings/findall.pyi | 8 +- .../pylibcudf/pylibcudf/strings/findall.pyx | 21 +- .../pylibcudf/pylibcudf/strings/padding.pxd | 9 +- .../pylibcudf/pylibcudf/strings/padding.pyi | 10 +- .../pylibcudf/pylibcudf/strings/padding.pyx | 30 +- python/pylibcudf/pylibcudf/strings/repeat.pxd | 5 +- python/pylibcudf/pylibcudf/strings/repeat.pyi | 6 +- python/pylibcudf/pylibcudf/strings/repeat.pyx | 14 +- .../pylibcudf/pylibcudf/strings/replace.pxd | 9 +- .../pylibcudf/pylibcudf/strings/replace.pyi | 10 +- .../pylibcudf/pylibcudf/strings/replace.pyx | 32 ++- .../pylibcudf/strings/replace_re.pxd | 7 +- .../pylibcudf/strings/replace_re.pyi | 10 +- .../pylibcudf/strings/replace_re.pyx | 27 +- .../pylibcudf/pylibcudf/strings/reverse.pyi | 6 +- .../pylibcudf/pylibcudf/strings/reverse.pyx | 12 +- python/pylibcudf/pylibcudf/strings/slice.pxd | 5 +- python/pylibcudf/pylibcudf/strings/slice.pyi | 6 +- python/pylibcudf/pylibcudf/strings/slice.pyx | 20 +- .../pylibcudf/strings/split/partition.pxd | 7 +- .../pylibcudf/strings/split/partition.pyi | 8 +- .../pylibcudf/strings/split/partition.pyx | 25 +- .../pylibcudf/strings/split/split.pxd | 19 +- .../pylibcudf/strings/split/split.pyi | 20 +- .../pylibcudf/strings/split/split.pyx | 82 +++--- python/pylibcudf/pylibcudf/strings/strip.pxd | 5 +- python/pylibcudf/pylibcudf/strings/strip.pyi | 6 +- python/pylibcudf/pylibcudf/strings/strip.pyx | 14 +- .../pylibcudf/pylibcudf/strings/translate.pxd | 7 +- .../pylibcudf/pylibcudf/strings/translate.pyi | 8 +- .../pylibcudf/pylibcudf/strings/translate.pyx | 21 +- python/pylibcudf/pylibcudf/strings/wrap.pxd | 5 +- python/pylibcudf/pylibcudf/strings/wrap.pyi | 6 +- python/pylibcudf/pylibcudf/strings/wrap.pyx | 12 +- python/pylibcudf/pylibcudf/table.pxd | 7 +- python/pylibcudf/pylibcudf/table.pyi | 10 +- python/pylibcudf/pylibcudf/table.pyx | 29 +- python/pylibcudf/pylibcudf/transform.pxd | 19 +- python/pylibcudf/pylibcudf/transform.pyi | 20 +- python/pylibcudf/pylibcudf/transform.pyx | 92 ++++--- python/pylibcudf/pylibcudf/transpose.pxd | 5 +- python/pylibcudf/pylibcudf/transpose.pyi | 6 +- python/pylibcudf/pylibcudf/transpose.pyx | 12 +- python/pylibcudf/pylibcudf/unary.pxd | 15 +- python/pylibcudf/pylibcudf/unary.pyi | 16 +- python/pylibcudf/pylibcudf/unary.pyx | 59 ++-- python/pylibcudf/pylibcudf/utils.pxd | 6 +- python/pylibcudf/pylibcudf/utils.pyi | 9 +- python/pylibcudf/pylibcudf/utils.pyx | 6 +- python/pylibcudf/tests/test_experimental.py | 23 +- .../pylibcudf/tests/test_stream_protocol.py | 74 +++++ 357 files changed, 3470 insertions(+), 2967 deletions(-) create mode 100644 python/pylibcudf/tests/test_stream_protocol.py diff --git a/python/cudf_polars/cudf_polars/utils/cuda_stream.py b/python/cudf_polars/cudf_polars/utils/cuda_stream.py index a42252157b4..c0708d3bea8 100644 --- a/python/cudf_polars/cudf_polars/utils/cuda_stream.py +++ b/python/cudf_polars/cudf_polars/utils/cuda_stream.py @@ -13,6 +13,7 @@ if TYPE_CHECKING: from collections.abc import Callable, Sequence + from pylibcudf.utils import CudaStreamLike from rmm.pylibrmm.stream import Stream @@ -27,7 +28,7 @@ def get_cuda_stream() -> Stream: def join_cuda_streams( - *, downstreams: Sequence[Stream], upstreams: Sequence[Stream] + *, downstreams: Sequence[CudaStreamLike], upstreams: Sequence[CudaStreamLike] ) -> None: """ Join multiple CUDA streams. @@ -46,7 +47,7 @@ def join_cuda_streams( def get_joined_cuda_stream( - get_cuda_stream: Callable[[], Stream], *, upstreams: Sequence[Stream] + get_cuda_stream: Callable[[], Stream], *, upstreams: Sequence[CudaStreamLike] ) -> Stream: """ Return a CUDA stream that is joined to the given streams. diff --git a/python/pylibcudf/pylibcudf/binaryop.pxd b/python/pylibcudf/pylibcudf/binaryop.pxd index 29c9f3d98ea..a34a02b2191 100644 --- a/python/pylibcudf/pylibcudf/binaryop.pxd +++ b/python/pylibcudf/pylibcudf/binaryop.pxd @@ -1,10 +1,9 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool from pylibcudf.libcudf.binaryop cimport binary_operator from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream from .column cimport Column from .scalar cimport Scalar @@ -25,7 +24,7 @@ cpdef Column binary_operation( RightBinaryOperand rhs, binary_operator op, DataType output_type, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/binaryop.pyi b/python/pylibcudf/pylibcudf/binaryop.pyi index 52263440db3..1f3c9a2cb64 100644 --- a/python/pylibcudf/pylibcudf/binaryop.pyi +++ b/python/pylibcudf/pylibcudf/binaryop.pyi @@ -1,14 +1,14 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike class BinaryOperator(IntEnum): ADD = ... @@ -52,7 +52,7 @@ def binary_operation( rhs: Column | Scalar, op: BinaryOperator, output_type: DataType, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_supported_operation( diff --git a/python/pylibcudf/pylibcudf/binaryop.pyx b/python/pylibcudf/pylibcudf/binaryop.pyx index a46b6aaaa81..20a69d60727 100644 --- a/python/pylibcudf/pylibcudf/binaryop.pyx +++ b/python/pylibcudf/pylibcudf/binaryop.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator import dereference @@ -20,6 +20,7 @@ from .column cimport Column from .scalar cimport Scalar from .types cimport DataType from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["BinaryOperator", "binary_operation", "is_supported_operation"] @@ -28,7 +29,7 @@ cpdef Column binary_operation( RightBinaryOperand rhs, binary_operator op, DataType output_type, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a binary operation between a column and another column or scalar. @@ -61,7 +62,8 @@ cpdef Column binary_operation( The result of the binary operation """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if LeftBinaryOperand is Column and RightBinaryOperand is Column: @@ -71,7 +73,7 @@ cpdef Column binary_operation( rhs.view(), op, output_type.c_obj, - stream.view(), + _cs, mr.get_mr() ) elif LeftBinaryOperand is Column and RightBinaryOperand is Scalar: @@ -81,7 +83,7 @@ cpdef Column binary_operation( dereference(rhs.c_obj), op, output_type.c_obj, - stream.view(), + _cs, mr.get_mr() ) elif LeftBinaryOperand is Scalar and RightBinaryOperand is Column: @@ -91,13 +93,13 @@ cpdef Column binary_operation( rhs.view(), op, output_type.c_obj, - stream.view(), + _cs, mr.get_mr() ) else: raise ValueError(f"Invalid arguments {lhs} and {rhs}") - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef bool is_supported_operation( diff --git a/python/pylibcudf/pylibcudf/column.pxd b/python/pylibcudf/pylibcudf/column.pxd index 7348d68f6de..429f85f39b0 100644 --- a/python/pylibcudf/pylibcudf/column.pxd +++ b/python/pylibcudf/pylibcudf/column.pxd @@ -6,7 +6,6 @@ from libcpp.vector cimport vector from libc.stdint cimport uint64_t from rmm.librmm.device_buffer cimport device_buffer -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport ( @@ -27,7 +26,7 @@ cdef class OwnerWithCAI: cdef dict cai @staticmethod - cdef create(column_view cv, object owner, Stream stream) + cdef create(column_view cv, object owner, object stream) cdef class OwnerMaskWithCAI: @@ -38,7 +37,7 @@ cdef class OwnerMaskWithCAI: cdef create(column_view cv, object owner) -cdef gpumemoryview _copy_array_to_device(object buf, Stream stream=*) +cdef gpumemoryview _copy_array_to_device(object buf, object stream=*) cdef class Column: @@ -61,7 +60,7 @@ cdef class Column: @staticmethod cdef Column from_libcudf( unique_ptr[column] libcudf_col, - Stream stream, + object stream, DeviceMemoryResource mr ) @@ -72,7 +71,7 @@ cdef class Column: cdef Column from_column_view_of_arbitrary( const column_view& cv, object owner, - Stream stream, + object stream, ) @staticmethod @@ -81,10 +80,10 @@ cdef class Column: tuple shape, DataType dtype, Column base=*, - Stream stream=*, + object stream=*, ) - cpdef Scalar to_scalar(self, Stream stream=*, DeviceMemoryResource mr=*) + cpdef Scalar to_scalar(self, object stream=*, DeviceMemoryResource mr=*) cpdef DataType type(self) cpdef Column child(self, size_type index) cpdef size_type num_children(self) @@ -95,7 +94,7 @@ cdef class Column: cpdef object data(self) cpdef object null_mask(self) cpdef list children(self) - cpdef Column copy(self, Stream stream=*, DeviceMemoryResource mr=*) + cpdef Column copy(self, object stream=*, DeviceMemoryResource mr=*) cpdef uint64_t device_buffer_size(self) cpdef Column with_mask(self, object, size_type, bint validate=*) @@ -108,10 +107,10 @@ cdef class ListsColumnView: cpdef child(self) cpdef offsets(self) cdef lists_column_view view(self) nogil - cpdef Column get_sliced_child(self, Stream stream=*) + cpdef Column get_sliced_child(self, object stream=*) cdef class StructsColumnView: cdef Column _column cdef structs_column_view view(self) nogil - cpdef Column get_sliced_child(self, int index, Stream stream=*) + cpdef Column get_sliced_child(self, int index, object stream=*) diff --git a/python/pylibcudf/pylibcudf/column.pyi b/python/pylibcudf/pylibcudf/column.pyi index 3ac4641ac13..3ff7f53f356 100644 --- a/python/pylibcudf/pylibcudf/column.pyi +++ b/python/pylibcudf/pylibcudf/column.pyi @@ -6,12 +6,12 @@ from typing import Any, Protocol, TypedDict from rmm.pylibrmm.device_buffer import DeviceBuffer from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf._interop_helpers import ArrowLike, ColumnMetadata from pylibcudf.scalar import Scalar from pylibcudf.span import Span from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike class ArrayInterfaceBase(TypedDict): shape: tuple[int, ...] @@ -64,7 +64,7 @@ class Column: def num_children(self) -> int: ... def copy( self, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def device_buffer_size(self) -> int: ... @@ -77,19 +77,19 @@ class Column: def from_scalar( scalar: Scalar, size: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def to_scalar( self, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Scalar: ... @staticmethod def all_null_like( like: Column, size: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... @staticmethod @@ -99,32 +99,34 @@ class Column: def to_arrow( self, metadata: ColumnMetadata | str | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> ArrowLike: ... # Private methods below are included because polars is currently using them, # but we want to remove stubs for these private methods eventually def _to_schema(self, metadata: Any = None) -> Any: ... - def _to_host_array(self, stream: Stream) -> Any: ... + def _to_host_array(self, stream: CudaStreamLike) -> Any: ... @staticmethod def from_arrow( obj: ArrowLike, dtype: DataType | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... @classmethod def from_cuda_array_interface( - cls, obj: SupportsCudaArrayInterface, stream: Stream | None = None + cls, + obj: SupportsCudaArrayInterface, + stream: CudaStreamLike | None = None, ) -> Column: ... @classmethod def from_array_interface( - cls, obj: SupportsArrayInterface, stream: Stream | None = None + cls, obj: SupportsArrayInterface, stream: CudaStreamLike | None = None ) -> Column: ... @classmethod def from_array( cls, obj: SupportsCudaArrayInterface | SupportsArrayInterface, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> Column: ... @staticmethod def struct_from_children(children: Sequence[Column]) -> Column: ... @@ -132,21 +134,23 @@ class Column: def from_iterable_of_py( obj: Iterable, dtype: DataType | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> Column: ... class ListsColumnView: def __init__(self, column: Column): ... def child(self) -> Column: ... def offsets(self) -> Column: ... - def get_sliced_child(self, stream: Stream | None = None) -> Column: ... + def get_sliced_child( + self, stream: CudaStreamLike | None = None + ) -> Column: ... class StructsColumnView: def __init__(self, column: Column): ... def child(self) -> Column: ... def offsets(self) -> Column: ... def get_sliced_child( - self, index: int, stream: Stream | None = None + self, index: int, stream: CudaStreamLike | None = None ) -> Column: ... def is_c_contiguous( diff --git a/python/pylibcudf/pylibcudf/column.pyx b/python/pylibcudf/pylibcudf/column.pyx index 96137f96256..fc8745dae26 100644 --- a/python/pylibcudf/pylibcudf/column.pyx +++ b/python/pylibcudf/pylibcudf/column.pyx @@ -67,6 +67,7 @@ from itertools import accumulate import functools import operator from typing import Iterable +from cuda.bindings.cyruntime cimport cudaStream_t try: import pyarrow as pa @@ -96,7 +97,7 @@ cdef class _ArrowColumnHolder: cdef class OwnerWithCAI: """An interface for column view's data with gpumemoryview via CAI.""" @staticmethod - cdef create(column_view cv, object owner, Stream stream): + cdef create(column_view cv, object owner, object stream): obj = OwnerWithCAI() obj.owner = owner # The default size of 0 will be applied for any type that stores data in the @@ -108,7 +109,7 @@ cdef class OwnerWithCAI: # Cast to Python integers before multiplying to avoid overflow. size = int(cv.size()) * int(cpp_size_of(cv.type())) elif cv.type().id() == type_id.STRING: - size = strings_column_view(cv).chars_size(stream.view()) + size = strings_column_view(cv).chars_size((stream).view().value()) obj.cai = { "shape": (size,), @@ -156,7 +157,7 @@ class ArrayInterfaceWrapper: self.__array_interface__ = iface -cdef gpumemoryview _copy_array_to_device(object buf, Stream stream=None): +cdef gpumemoryview _copy_array_to_device(object buf, object stream=None): """ Copy a host-side array.array buffer to device memory. @@ -175,11 +176,11 @@ cdef gpumemoryview _copy_array_to_device(object buf, Stream stream=None): cdef memoryview mv = memoryview(buf) cdef uintptr_t ptr = mv.obj.buffer_info()[0] cdef size_t nbytes = len(mv) * mv.itemsize - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) return gpumemoryview(DeviceBuffer.to_device( ptr, - stream + _stream )) @@ -401,7 +402,7 @@ cdef class Column: def from_arrow( obj: ArrowLike, dtype: DataType | None = None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ) -> ArrowLike: """ @@ -453,7 +454,8 @@ cdef class Column: cdef _ArrowColumnHolder result cdef unique_ptr[arrow_column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if hasattr(obj, "__arrow_c_device_array__"): @@ -469,7 +471,7 @@ cdef class Column: c_result = make_unique[arrow_column]( move(dereference(c_schema)), move(dereference(c_device_array)), - stream.view(), + _cs, result.mr.get_mr(), ) result.col.swap(c_result) @@ -477,7 +479,7 @@ cdef class Column: return Column.from_column_view_of_arbitrary( result.col.get().view(), result, - stream, + _stream, ) elif hasattr(obj, "__arrow_c_array__"): schema, h_array = obj.__arrow_c_array__() @@ -490,7 +492,7 @@ cdef class Column: c_result = make_unique[arrow_column]( move(dereference(c_schema)), move(dereference(c_array)), - stream.view(), + _cs, result.mr.get_mr(), ) result.col.swap(c_result) @@ -498,7 +500,7 @@ cdef class Column: return Column.from_column_view_of_arbitrary( result.col.get().view(), result, - stream, + _stream, ) elif hasattr(obj, "__arrow_c_stream__"): arrow_stream = obj.__arrow_c_stream__() @@ -514,7 +516,7 @@ cdef class Column: with nogil: c_result = make_unique[arrow_column]( move(dereference(c_arrow_stream)), - stream.view(), + _cs, result.mr.get_mr(), ) result.col.swap(c_result) @@ -522,7 +524,7 @@ cdef class Column: return Column.from_column_view_of_arbitrary( result.col.get().view(), result, - stream, + _stream, ) elif hasattr(obj, "__arrow_c_device_stream__"): # TODO: When we add support for this case, it should be moved above @@ -656,7 +658,7 @@ cdef class Column: @staticmethod cdef Column from_libcudf( unique_ptr[column] libcudf_col, - Stream stream, + object stream, DeviceMemoryResource mr ): """Create a Column from a libcudf column. @@ -667,6 +669,7 @@ cdef class Column: """ assert stream is not None, "stream cannot be None" assert mr is not None, "mr cannot be None" + cdef Stream _stream = stream cdef DataType dtype = DataType.from_libcudf(libcudf_col.get().type()) cdef size_type size = libcudf_col.get().size() @@ -677,13 +680,13 @@ cdef class Column: # Note that when converting to cudf Column objects we'll need to pull # out the base object. cdef gpumemoryview data = gpumemoryview( - DeviceBuffer.c_from_unique_ptr(move(contents.data), stream, mr) + DeviceBuffer.c_from_unique_ptr(move(contents.data), _stream, mr) ) cdef gpumemoryview mask = None if null_count > 0: mask = gpumemoryview( - DeviceBuffer.c_from_unique_ptr(move(contents.null_mask), stream, mr) + DeviceBuffer.c_from_unique_ptr(move(contents.null_mask), _stream, mr) ) children = [] @@ -772,7 +775,7 @@ cdef class Column: cdef Column from_column_view_of_arbitrary( const column_view& cv, object owner, - Stream stream, + object stream, ): """Create a Column from a libcudf column_view into an arbitrary owner. @@ -818,7 +821,7 @@ cdef class Column: def from_scalar( Scalar slr, size_type size, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a Column from a Scalar. @@ -839,18 +842,19 @@ cdef class Column: """ cdef const scalar* c_scalar = slr.get() cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = make_column_from_scalar( dereference(c_scalar), size, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) - cpdef Scalar to_scalar(self, Stream stream=None, DeviceMemoryResource mr=None): + cpdef Scalar to_scalar(self, object stream=None, DeviceMemoryResource mr=None): """ Return the first value of 1-element column as a Scalar. @@ -873,11 +877,12 @@ cdef class Column: cdef column_view cv = self.view() cdef unique_ptr[scalar] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = get_element(cv, 0, stream.view(), mr.get_mr()) + result = get_element(cv, 0, _cs, mr.get_mr()) return Scalar.from_libcudf(move(result)) @@ -885,7 +890,7 @@ cdef class Column: def all_null_like( Column like, size_type size, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create an all null column from a template. @@ -904,18 +909,19 @@ cdef class Column: Column An all-null column of `size` rows and type matching `like`. """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) - cdef Scalar slr = Scalar.empty_like(like, stream, mr) + cdef Scalar slr = Scalar.empty_like(like, _stream, mr) cdef unique_ptr[column] c_result with nogil: c_result = make_column_from_scalar( dereference(slr.get()), size, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) @staticmethod cdef Column _wrap_nested_list_column( @@ -923,7 +929,7 @@ cdef class Column: tuple shape, DataType dtype, Column base=None, - Stream stream=None, + object stream=None, ): """ Construct a list Column from a gpumemoryview and array @@ -937,7 +943,7 @@ cdef class Column: """ ndim = len(shape) flat_size = functools.reduce(operator.mul, shape) - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) if base is None: base = Column( @@ -958,9 +964,9 @@ cdef class Column: offsets_col = sequence( outer_len + 1, - Scalar.from_py(0, int32_dtype, stream=stream), - Scalar.from_py(shape[i], int32_dtype, stream=stream), - stream, + Scalar.from_py(0, int32_dtype, stream=_stream), + Scalar.from_py(shape[i], int32_dtype, stream=_stream), + _stream, ) nested = Column( @@ -976,7 +982,7 @@ cdef class Column: return nested @classmethod - def from_array_interface(cls, obj, Stream stream=None): + def from_array_interface(cls, obj, object stream=None): """ Create a Column from an object implementing the NumPy Array Interface. @@ -1016,21 +1022,21 @@ cdef class Column: cdef const unsigned char* ptr cdef const unsigned char[:] view - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) if nbytes > 0: ptr = data_ptr view = ( ptr)[:nbytes] - dbuf = DeviceBuffer.to_device(view, stream) + dbuf = DeviceBuffer.to_device(view, _stream) else: - dbuf = DeviceBuffer(size=0, stream=stream) + dbuf = DeviceBuffer(size=0, stream=_stream) return Column._wrap_nested_list_column( - gpumemoryview(dbuf), shape, dtype, None, stream + gpumemoryview(dbuf), shape, dtype, None, _stream ) @classmethod - def from_cuda_array_interface(cls, obj, Stream stream=None): + def from_cuda_array_interface(cls, obj, object stream=None): """ Create a Column from an object implementing the CUDA Array Interface. @@ -1069,7 +1075,7 @@ cdef class Column: ) @classmethod - def from_array(cls, obj, Stream stream=None): + def from_array(cls, obj, object stream=None): """ Create a Column from any object which supports the NumPy or CUDA array interface. @@ -1115,7 +1121,7 @@ cdef class Column: def from_iterable_of_py( obj: Iterable, dtype: DataType | None = None, - Stream stream=None + object stream=None ) -> Column: """ Create a Column from a Python iterable of scalar values or nested iterables. @@ -1364,14 +1370,15 @@ cdef class Column: """The children of the column.""" return self._children - cpdef Column copy(self, Stream stream=None, DeviceMemoryResource mr=None): + cpdef Column copy(self, object stream=None, DeviceMemoryResource mr=None): """Create a copy of the column.""" cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = make_unique[column](self.view(), stream.view(), mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + c_result = make_unique[column](self.view(), _cs, mr.get_mr()) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef uint64_t device_buffer_size(self): """ @@ -1419,10 +1426,12 @@ cdef class Column: return PyCapsule_New(raw_schema_ptr, 'arrow_schema', _release_schema) - def _to_host_array(self, Stream stream): + def _to_host_array(self, object stream): cdef ArrowArray* raw_host_array_ptr + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() with nogil: - raw_host_array_ptr = to_arrow_host_raw(self.view(), stream.view()) + raw_host_array_ptr = to_arrow_host_raw(self.view(), _cs) return PyCapsule_New(raw_host_array_ptr, "arrow_array", _release_array) @@ -1484,7 +1493,7 @@ cdef class ListsColumnView: """ return lists_column_view(self._column.view()) - cpdef Column get_sliced_child(self, Stream stream=None): + cpdef Column get_sliced_child(self, object stream=None): """ Get the list elements child properly sliced to match parent's view. @@ -1498,9 +1507,9 @@ cdef class ListsColumnView: Column The sliced elements column """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) - cdef column_view c_child = self.view().get_sliced_child(stream.view()) + cdef column_view c_child = self.view().get_sliced_child(_stream.view().value()) return Column.from_column_view(c_child, self._column.child(1)) @@ -1522,7 +1531,7 @@ cdef class StructsColumnView: """ return structs_column_view(self._column.view()) - cpdef Column get_sliced_child(self, int index, Stream stream=None): + cpdef Column get_sliced_child(self, int index, object stream=None): """ Get the struct elements child properly sliced to match parent's view. @@ -1538,9 +1547,10 @@ cdef class StructsColumnView: Column The sliced elements column """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) - cdef column_view c_child = self.view().get_sliced_child(index, stream.view()) + cdef cudaStream_t _cs = _stream.view().value() + cdef column_view c_child = self.view().get_sliced_child(index, _cs) return Column.from_column_view(c_child, self._column.child(index)) diff --git a/python/pylibcudf/pylibcudf/column_factories.pxd b/python/pylibcudf/pylibcudf/column_factories.pxd index d26b3396e30..3f9841c045d 100644 --- a/python/pylibcudf/pylibcudf/column_factories.pxd +++ b/python/pylibcudf/pylibcudf/column_factories.pxd @@ -2,7 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 from pylibcudf.libcudf.types cimport mask_state from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream from .column cimport Column from .types cimport DataType, size_type, type_id @@ -20,7 +19,7 @@ cpdef Column make_numeric_column( DataType type_, size_type size, MaskArg mask, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -28,7 +27,7 @@ cpdef Column make_fixed_point_column( DataType type_, size_type size, MaskArg mask, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -36,7 +35,7 @@ cpdef Column make_timestamp_column( DataType type_, size_type size, MaskArg mask, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -44,7 +43,7 @@ cpdef Column make_duration_column( DataType type_, size_type size, MaskArg mask, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -52,18 +51,18 @@ cpdef Column make_fixed_width_column( DataType type_, size_type size, MaskArg mask, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) cpdef Column make_empty_column( MakeEmptyColumnOperand type_or_id, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) cpdef Column make_empty_lists_column( DataType child_type, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) diff --git a/python/pylibcudf/pylibcudf/column_factories.pyi b/python/pylibcudf/pylibcudf/column_factories.pyi index 66d46d88949..a9e92c5f823 100644 --- a/python/pylibcudf/pylibcudf/column_factories.pyi +++ b/python/pylibcudf/pylibcudf/column_factories.pyi @@ -1,53 +1,53 @@ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.types import DataType, MaskState, TypeId +from pylibcudf.utils import CudaStreamLike def make_numeric_column( type_: DataType, size: int, mstate: MaskState, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def make_fixed_point_column( type_: DataType, size: int, mstate: MaskState, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def make_timestamp_column( type_: DataType, size: int, mstate: MaskState, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def make_duration_column( type_: DataType, size: int, mstate: MaskState, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def make_fixed_width_column( type_: DataType, size: int, mstate: MaskState, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def make_empty_column( type_or_id: DataType | TypeId, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def make_empty_lists_column( child_type: DataType, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/column_factories.pyx b/python/pylibcudf/pylibcudf/column_factories.pyx index 0848f1aff03..45d590f4106 100644 --- a/python/pylibcudf/pylibcudf/column_factories.pyx +++ b/python/pylibcudf/pylibcudf/column_factories.pyx @@ -20,6 +20,7 @@ from .types cimport DataType, type_id from .types import MaskState, TypeId from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ @@ -34,7 +35,7 @@ __all__ = [ cpdef Column make_empty_column( MakeEmptyColumnOperand type_or_id, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Creates an empty column of the specified type. @@ -53,7 +54,7 @@ cpdef Column make_empty_column( """ cdef unique_ptr[column] result cdef type_id id - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) mr = _get_memory_resource(mr) if MakeEmptyColumnOperand is object: @@ -75,14 +76,14 @@ cpdef Column make_empty_column( raise TypeError( "Must pass a TypeId or DataType" ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column make_numeric_column( DataType type_, size_type size, MaskArg mstate, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Creates an empty numeric column. @@ -102,7 +103,8 @@ cpdef Column make_numeric_column( state = mstate else: raise TypeError("Invalid mask argument") - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -110,17 +112,17 @@ cpdef Column make_numeric_column( type_.c_obj, size, state, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column make_fixed_point_column( DataType type_, size_type size, MaskArg mstate, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): @@ -136,7 +138,8 @@ cpdef Column make_fixed_point_column( state = mstate else: raise TypeError("Invalid mask argument") - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -144,18 +147,18 @@ cpdef Column make_fixed_point_column( type_.c_obj, size, state, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column make_timestamp_column( DataType type_, size_type size, MaskArg mstate, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): @@ -171,7 +174,8 @@ cpdef Column make_timestamp_column( state = mstate else: raise TypeError("Invalid mask argument") - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -179,18 +183,18 @@ cpdef Column make_timestamp_column( type_.c_obj, size, state, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column make_duration_column( DataType type_, size_type size, MaskArg mstate, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): @@ -206,7 +210,8 @@ cpdef Column make_duration_column( state = mstate else: raise TypeError("Invalid mask argument") - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -214,18 +219,18 @@ cpdef Column make_duration_column( type_.c_obj, size, state, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column make_fixed_width_column( DataType type_, size_type size, MaskArg mstate, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): @@ -241,7 +246,8 @@ cpdef Column make_fixed_width_column( state = mstate else: raise TypeError("Invalid mask argument") - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -249,16 +255,16 @@ cpdef Column make_fixed_width_column( type_.c_obj, size, state, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column make_empty_lists_column( DataType child_type, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Creates an empty column of the specified type. @@ -276,10 +282,10 @@ cpdef Column make_empty_lists_column( An empty Column """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) mr = _get_memory_resource(mr) with nogil: result = cpp_make_empty_lists_column(child_type.c_obj) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/concatenate.pxd b/python/pylibcudf/pylibcudf/concatenate.pxd index 60adf27c9a3..60189ba4406 100644 --- a/python/pylibcudf/pylibcudf/concatenate.pxd +++ b/python/pylibcudf/pylibcudf/concatenate.pxd @@ -1,9 +1,8 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from .table cimport Table -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource @@ -11,4 +10,4 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource # unify the column and table paths without using runtime dispatch instead. In this case # we choose to prioritize API consistency over performance, so we use the same function # with a bit of runtime dispatch overhead. -cpdef concatenate(list objects, Stream stream=*, DeviceMemoryResource mr=*) +cpdef concatenate(list objects, object stream = *, DeviceMemoryResource mr=*) diff --git a/python/pylibcudf/pylibcudf/concatenate.pyi b/python/pylibcudf/pylibcudf/concatenate.pyi index 18e8bff2e2f..59379e01c46 100644 --- a/python/pylibcudf/pylibcudf/concatenate.pyi +++ b/python/pylibcudf/pylibcudf/concatenate.pyi @@ -1,14 +1,14 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike def concatenate[ColumnOrTable: (Column, Table)]( objects: list[ColumnOrTable], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> ColumnOrTable: ... diff --git a/python/pylibcudf/pylibcudf/concatenate.pyx b/python/pylibcudf/pylibcudf/concatenate.pyx index 36fa0984a68..9921d5b1a39 100644 --- a/python/pylibcudf/pylibcudf/concatenate.pyx +++ b/python/pylibcudf/pylibcudf/concatenate.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -16,10 +16,11 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column from .table cimport Table from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["concatenate"] -cpdef concatenate(list objects, Stream stream=None, DeviceMemoryResource mr=None): +cpdef concatenate(list objects, object stream=None, DeviceMemoryResource mr=None): """Concatenate columns or tables. Parameters @@ -41,7 +42,8 @@ cpdef concatenate(list objects, Stream stream=None, DeviceMemoryResource mr=None cdef vector[column_view] c_columns cdef vector[table_view] c_tables - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) cdef unique_ptr[column] c_col_result @@ -53,17 +55,17 @@ cpdef concatenate(list objects, Stream stream=None, DeviceMemoryResource mr=None with nogil: c_tbl_result = cpp_concatenate.concatenate( - c_tables, stream.view(), mr.get_mr() + c_tables, _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_tbl_result), stream, mr) + return Table.from_libcudf(move(c_tbl_result), _stream, mr) elif isinstance(objects[0], Column): for column in objects: c_columns.push_back((column).view()) with nogil: c_col_result = cpp_concatenate.concatenate( - c_columns, stream.view(), mr.get_mr() + c_columns, _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_col_result), stream, mr) + return Column.from_libcudf(move(c_col_result), _stream, mr) else: raise ValueError("input must be a list of Columns or Tables") diff --git a/python/pylibcudf/pylibcudf/contiguous_split.pxd b/python/pylibcudf/pylibcudf/contiguous_split.pxd index a294e70a4a6..95259723dfa 100644 --- a/python/pylibcudf/pylibcudf/contiguous_split.pxd +++ b/python/pylibcudf/pylibcudf/contiguous_split.pxd @@ -32,13 +32,13 @@ cdef class HostBuffer: cdef class PackedColumns: cdef unique_ptr[packed_columns] c_obj - cdef Stream stream + cdef object stream cdef DeviceMemoryResource mr @staticmethod cdef PackedColumns from_libcudf( unique_ptr[packed_columns] data, - Stream stream, + object stream, DeviceMemoryResource mr ) cpdef tuple release(self) @@ -58,10 +58,10 @@ cdef class ChunkedPack: cpdef PackedColumns pack(Table input) -cpdef Table unpack(PackedColumns input, Stream stream=*) +cpdef Table unpack(PackedColumns input, object stream = *) cpdef Table unpack_from_memoryviews( memoryview metadata, object gpu_data, - Stream stream=*, + object stream = *, ) diff --git a/python/pylibcudf/pylibcudf/contiguous_split.pyi b/python/pylibcudf/pylibcudf/contiguous_split.pyi index df241c079ae..6e0e653b5bb 100644 --- a/python/pylibcudf/pylibcudf/contiguous_split.pyi +++ b/python/pylibcudf/pylibcudf/contiguous_split.pyi @@ -2,28 +2,30 @@ # SPDX-License-Identifier: Apache-2.0 from rmm.mr import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.gpumemoryview import gpumemoryview from pylibcudf.span import Span from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike class PackedColumns: def __init__(self): ... def release( - self, stream: Stream | None = None + self, stream: CudaStreamLike | None = None ) -> tuple[memoryview[bytes], gpumemoryview]: ... def pack( input: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> PackedColumns: ... -def unpack(input: PackedColumns, stream: Stream | None = None) -> Table: ... +def unpack( + input: PackedColumns, stream: CudaStreamLike | None = None +) -> Table: ... def unpack_from_memoryviews( metadata: memoryview[bytes], gpu_data: Span, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> Table: ... class ChunkedPack: @@ -32,7 +34,7 @@ class ChunkedPack: def create( input: Table, user_buffer_size: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, temp_mr: DeviceMemoryResource | None = None, ) -> ChunkedPack: ... def has_next(self) -> bool: ... diff --git a/python/pylibcudf/pylibcudf/contiguous_split.pyx b/python/pylibcudf/pylibcudf/contiguous_split.pyx index 6b24def5dc8..239d89d6470 100644 --- a/python/pylibcudf/pylibcudf/contiguous_split.pyx +++ b/python/pylibcudf/pylibcudf/contiguous_split.pyx @@ -15,6 +15,8 @@ from cuda.bindings.cyruntime cimport ( cudaError_t, cudaMemcpyAsync, cudaMemcpyKind, + cudaStream_t, + cudaStreamSynchronize, ) from pylibcudf.libcudf.contiguous_split cimport ( @@ -27,7 +29,6 @@ from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.utilities.span cimport device_span -from rmm.librmm.cuda_stream_view cimport cuda_stream_view from rmm.pylibrmm.device_buffer cimport DeviceBuffer from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream @@ -36,6 +37,7 @@ from .gpumemoryview cimport gpumemoryview from .table cimport Table from .span import is_span from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ @@ -105,7 +107,7 @@ cdef class PackedColumns: @staticmethod cdef PackedColumns from_libcudf( unique_ptr[packed_columns] data, - Stream stream, + object stream, DeviceMemoryResource mr ): """Create a Python PackedColumns from a libcudf packed_columns.""" @@ -163,7 +165,7 @@ cdef class ChunkedPack: def create( Table input, size_t user_buffer_size, - Stream stream=None, + object stream=None, DeviceMemoryResource temp_mr=None, ): """ @@ -184,16 +186,16 @@ cdef class ChunkedPack: ------- New ChunkedPack object. """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) temp_mr = _get_memory_resource(temp_mr) cdef unique_ptr[chunked_pack] obj = chunked_pack.create( - input.view(), user_buffer_size, stream.view(), temp_mr.get_mr() + input.view(), user_buffer_size, _stream.view().value(), temp_mr.get_mr() ) cdef ChunkedPack out = ChunkedPack.__new__(ChunkedPack) out.table = input out.mr = temp_mr - out.stream = stream + out.stream = _stream out.c_obj = move(obj) return out @@ -292,7 +294,8 @@ cdef class ChunkedPack: dereference(self.c_obj).get_total_contiguous_size() ) ) - cdef cuda_stream_view stream = self.stream.view() + cdef Stream py_stream = self.stream + cdef cudaStream_t stream = py_stream.view().value() with nogil: while dereference(self.c_obj).has_next(): size = dereference(self.c_obj).next(d_span) @@ -301,22 +304,22 @@ cdef class ChunkedPack: d_span.data(), size, cudaMemcpyKind.cudaMemcpyDeviceToHost, - stream.value(), + stream, ) offset += size if err != cudaError.cudaSuccess: - stream.synchronize() + cudaStreamSynchronize(stream) raise RuntimeError( f"Memcpy in pack_to_host failed error: {err}" ) - stream.synchronize() + cudaStreamSynchronize(stream) return ( self.build_metadata(), memoryview(HostBuffer.from_unique_ptr(move(h_buf))), ) -cpdef PackedColumns pack(Table input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef PackedColumns pack(Table input, object stream=None, DeviceMemoryResource mr=None): """Deep-copy a table into a serialized contiguous memory format. Later use `unpack` or `unpack_from_memoryviews` to unpack the serialized @@ -346,16 +349,17 @@ cpdef PackedColumns pack(Table input, Stream stream=None, DeviceMemoryResource m For details, see :cpp:func:`pack`. """ cdef unique_ptr[packed_columns] pack - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: pack = move(make_unique[packed_columns]( - cpp_pack(input.view(), stream.view(), mr.get_mr()) + cpp_pack(input.view(), _cs, mr.get_mr()) )) - return PackedColumns.from_libcudf(move(pack), stream, mr) + return PackedColumns.from_libcudf(move(pack), _stream, mr) -cpdef Table unpack(PackedColumns input, Stream stream=None): +cpdef Table unpack(PackedColumns input, object stream=None): """Deserialize the result of `pack`. Copies the result of a serialized table into a table. @@ -375,16 +379,16 @@ cpdef Table unpack(PackedColumns input, Stream stream=None): Copy of the packed columns. """ cdef table_view v - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) with nogil: v = cpp_unpack(dereference(input.c_obj)) - return Table.from_table_view_of_arbitrary(v, input, stream) + return Table.from_table_view_of_arbitrary(v, input, _stream) cpdef Table unpack_from_memoryviews( memoryview metadata, object gpu_data, - Stream stream=None, + object stream=None, ): """Deserialize the result of `pack`. @@ -406,7 +410,7 @@ cpdef Table unpack_from_memoryviews( Table Copy of the packed columns. """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) cdef device_span[uint8_t] d_span = _get_device_span(gpu_data) if metadata.nbytes == 0: @@ -416,7 +420,7 @@ cpdef Table unpack_from_memoryviews( # used for any operations. return Table.from_libcudf( make_unique[table](table_view()), - stream, + _stream, _get_memory_resource(), ) @@ -428,4 +432,4 @@ cpdef Table unpack_from_memoryviews( cdef table_view v with nogil: v = cpp_unpack(metadata_ptr, gpu_data_ptr) - return Table.from_table_view_of_arbitrary(v, gpu_data, stream) + return Table.from_table_view_of_arbitrary(v, gpu_data, _stream) diff --git a/python/pylibcudf/pylibcudf/copying.pxd b/python/pylibcudf/pylibcudf/copying.pxd index caaa590de15..4143e846994 100644 --- a/python/pylibcudf/pylibcudf/copying.pxd +++ b/python/pylibcudf/pylibcudf/copying.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool as cbool @@ -9,7 +9,6 @@ from pylibcudf.libcudf.copying cimport ( from pylibcudf.libcudf.types cimport size_type from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream from .column cimport Column from .scalar cimport Scalar @@ -40,7 +39,7 @@ cpdef Table gather( Table source_table, Column gather_map, out_of_bounds_policy bounds_policy, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -48,19 +47,19 @@ cpdef Table scatter( TableOrListOfScalars source, Column scatter_map, Table target_table, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef ColumnOrTable empty_like( - ColumnOrTable input, Stream stream=*, DeviceMemoryResource mr=* + ColumnOrTable input, object stream = *, DeviceMemoryResource mr=* ) cpdef Column allocate_like( Column input_column, mask_allocation_policy policy, size=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -70,7 +69,7 @@ cpdef Column copy_range_in_place( size_type input_begin, size_type input_end, size_type target_begin, - Stream stream=*, + object stream = *, ) cpdef Column copy_range( @@ -79,7 +78,7 @@ cpdef Column copy_range( size_type input_begin, size_type input_end, size_type target_begin, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -87,19 +86,19 @@ cpdef Column shift( Column input, size_type offset, Scalar fill_value, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) -cpdef list slice(ColumnOrTable input, list indices, Stream stream=*) +cpdef list slice(ColumnOrTable input, list indices, object stream = *) -cpdef list split(ColumnOrTable input, list splits, Stream stream=*) +cpdef list split(ColumnOrTable input, list splits, object stream = *) cpdef Column copy_if_else( LeftCopyIfElseOperand lhs, RightCopyIfElseOperand rhs, Column boolean_mask, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -107,13 +106,13 @@ cpdef Table boolean_mask_scatter( TableOrListOfScalars input, Table target, Column boolean_mask, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Scalar get_element( Column input_column, size_type index, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/copying.pyi b/python/pylibcudf/pylibcudf/copying.pyi index 04acecc2f1b..bdff6cddad5 100644 --- a/python/pylibcudf/pylibcudf/copying.pyi +++ b/python/pylibcudf/pylibcudf/copying.pyi @@ -1,15 +1,15 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from enum import IntEnum from typing import TypeVar from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike class MaskAllocationPolicy(IntEnum): NEVER = ... @@ -26,26 +26,26 @@ def gather( source_table: Table, gather_map: Column, bounds_policy: OutOfBoundsPolicy, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def scatter( source: Table | list[Scalar], scatter_map: Column, target_table: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def empty_like( input: ColumnOrTable, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> ColumnOrTable: ... def allocate_like( input_column: Column, policy: MaskAllocationPolicy, size: int | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def copy_range_in_place( @@ -54,7 +54,7 @@ def copy_range_in_place( input_begin: int, input_end: int, target_begin: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> Column: ... def copy_range( input_column: Column, @@ -62,39 +62,43 @@ def copy_range( input_begin: int, input_end: int, target_begin: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def shift( input: Column, offset: int, fill_value: Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def slice( - input: ColumnOrTable, indices: list[int], stream: Stream | None = None + input: ColumnOrTable, + indices: list[int], + stream: CudaStreamLike | None = None, ) -> list[ColumnOrTable]: ... def split( - input: ColumnOrTable, splits: list[int], stream: Stream | None = None + input: ColumnOrTable, + splits: list[int], + stream: CudaStreamLike | None = None, ) -> list[ColumnOrTable]: ... def copy_if_else( lhs: Column | Scalar, rhs: Column | Scalar, boolean_mask: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def boolean_mask_scatter( input: Table | list[Scalar], target: Table, boolean_mask: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def get_element( input_column: Column, index: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Scalar: ... diff --git a/python/pylibcudf/pylibcudf/copying.pyx b/python/pylibcudf/pylibcudf/copying.pyx index f8f44e03938..30be1ea7d0a 100644 --- a/python/pylibcudf/pylibcudf/copying.pyx +++ b/python/pylibcudf/pylibcudf/copying.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator import dereference @@ -40,6 +40,7 @@ from .column cimport Column from .scalar cimport Scalar from .table cimport Table from .utils cimport _as_vector, _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ @@ -64,7 +65,7 @@ cpdef Table gather( Table source_table, Column gather_map, out_of_bounds_policy bounds_policy, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Select rows from source_table according to the provided gather_map. @@ -94,7 +95,8 @@ cpdef Table gather( If the gather_map contains nulls. """ cdef unique_ptr[table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -102,18 +104,18 @@ cpdef Table gather( source_table.view(), gather_map.view(), bounds_policy, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table scatter( TableOrListOfScalars source, Column scatter_map, Table target_table, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Scatter from source into target_table according to scatter_map. @@ -155,7 +157,8 @@ cpdef Table scatter( """ cdef unique_ptr[table] c_result cdef vector[reference_wrapper[const scalar]] source_scalars - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if TableOrListOfScalars is Table: @@ -164,7 +167,7 @@ cpdef Table scatter( source.view(), scatter_map.view(), target_table.view(), - stream.view(), + _cs, mr.get_mr() ) else: @@ -174,14 +177,14 @@ cpdef Table scatter( source_scalars, scatter_map.view(), target_table.view(), - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef ColumnOrTable empty_like( - ColumnOrTable input, Stream stream=None, DeviceMemoryResource mr=None + ColumnOrTable input, object stream=None, DeviceMemoryResource mr=None ): """Create an empty column or table with the same type as ``input``. @@ -201,23 +204,23 @@ cpdef ColumnOrTable empty_like( """ cdef unique_ptr[table] c_tbl_result cdef unique_ptr[column] c_col_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) mr = _get_memory_resource(mr) if ColumnOrTable is Column: with nogil: c_col_result = cpp_copying.empty_like(input.view()) - return Column.from_libcudf(move(c_col_result), stream, mr) + return Column.from_libcudf(move(c_col_result), _stream, mr) else: with nogil: c_tbl_result = cpp_copying.empty_like(input.view()) - return Table.from_libcudf(move(c_tbl_result), stream, mr) + return Table.from_libcudf(move(c_tbl_result), _stream, mr) cpdef Column allocate_like( Column input_column, mask_allocation_policy policy, size=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Allocate a column with the same type as input_column. @@ -244,7 +247,8 @@ cpdef Column allocate_like( cdef unique_ptr[column] c_result cdef size_type c_size = size if size is not None else input_column.size() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -252,11 +256,11 @@ cpdef Column allocate_like( input_column.view(), c_size, policy, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column copy_range_in_place( @@ -265,7 +269,7 @@ cpdef Column copy_range_in_place( size_type input_begin, size_type input_end, size_type target_begin, - Stream stream=None + object stream=None ): """Copy a range of elements from input_column to target_column. @@ -301,7 +305,8 @@ cpdef Column copy_range_in_place( """ cdef mutable_column_view target_view = target_column.mutable_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() with nogil: cpp_copying.copy_range_in_place( @@ -310,7 +315,7 @@ cpdef Column copy_range_in_place( input_begin, input_end, target_begin, - stream.view() + _cs ) target_column.set_null_count(target_view.null_count()) @@ -321,7 +326,7 @@ cpdef Column copy_range( size_type input_begin, size_type input_end, size_type target_begin, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Copy a range of elements from input_column to target_column. @@ -357,7 +362,8 @@ cpdef Column copy_range( If target and source have different types. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -367,18 +373,18 @@ cpdef Column copy_range( input_begin, input_end, target_begin, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column shift( Column input, size_type offset, Scalar fill_value, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Shift the elements of input by offset. @@ -409,7 +415,8 @@ cpdef Column shift( of fixed width or string type. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -417,13 +424,13 @@ cpdef Column shift( input.view(), offset, dereference(fill_value.c_obj), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) -cpdef list slice(ColumnOrTable input, list indices, Stream stream=None): +cpdef list slice(ColumnOrTable input, list indices, object stream=None): """Slice input according to indices. For details on the implementation, see :cpp:func:`slice`. @@ -454,11 +461,12 @@ cpdef list slice(ColumnOrTable input, list indices, Stream stream=None): cdef vector[column_view] c_col_result cdef vector[table_view] c_tbl_result cdef int i - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() if ColumnOrTable is Column: with nogil: - c_col_result = cpp_copying.slice(input.view(), c_indices, stream.view()) + c_col_result = cpp_copying.slice(input.view(), c_indices, _cs) return [ Column.from_column_view(c_col_result[i], input) @@ -466,7 +474,7 @@ cpdef list slice(ColumnOrTable input, list indices, Stream stream=None): ] else: with nogil: - c_tbl_result = cpp_copying.slice(input.view(), c_indices, stream.view()) + c_tbl_result = cpp_copying.slice(input.view(), c_indices, _cs) return [ Table.from_table_view(c_tbl_result[i], input) @@ -474,7 +482,7 @@ cpdef list slice(ColumnOrTable input, list indices, Stream stream=None): ] -cpdef list split(ColumnOrTable input, list splits, Stream stream=None): +cpdef list split(ColumnOrTable input, list splits, object stream=None): """Split input into multiple. For details on the implementation, see :cpp:func:`split`. @@ -497,11 +505,12 @@ cpdef list split(ColumnOrTable input, list splits, Stream stream=None): cdef vector[column_view] c_col_result cdef vector[table_view] c_tbl_result cdef int i - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() if ColumnOrTable is Column: with nogil: - c_col_result = cpp_copying.split(input.view(), c_splits, stream.view()) + c_col_result = cpp_copying.split(input.view(), c_splits, _cs) return [ Column.from_column_view(c_col_result[i], input) @@ -509,7 +518,7 @@ cpdef list split(ColumnOrTable input, list splits, Stream stream=None): ] else: with nogil: - c_tbl_result = cpp_copying.split(input.view(), c_splits, stream.view()) + c_tbl_result = cpp_copying.split(input.view(), c_splits, _cs) return [ Table.from_table_view(c_tbl_result[i], input) @@ -521,7 +530,7 @@ cpdef Column copy_if_else( LeftCopyIfElseOperand lhs, RightCopyIfElseOperand rhs, Column boolean_mask, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Copy elements from lhs or rhs into a new column according to boolean_mask. @@ -556,7 +565,8 @@ cpdef Column copy_if_else( columns), or if lhs and rhs are not of the same length (if both are columns). """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if LeftCopyIfElseOperand is Column and RightCopyIfElseOperand is Column: @@ -565,7 +575,7 @@ cpdef Column copy_if_else( lhs.view(), rhs.view(), boolean_mask.view(), - stream.view(), + _cs, mr.get_mr() ) elif LeftCopyIfElseOperand is Column and RightCopyIfElseOperand is Scalar: @@ -574,7 +584,7 @@ cpdef Column copy_if_else( lhs.view(), dereference(rhs.c_obj), boolean_mask.view(), - stream.view(), + _cs, mr.get_mr() ) elif LeftCopyIfElseOperand is Scalar and RightCopyIfElseOperand is Column: @@ -583,7 +593,7 @@ cpdef Column copy_if_else( dereference(lhs.c_obj), rhs.view(), boolean_mask.view(), - stream.view(), + _cs, mr.get_mr() ) else: @@ -592,18 +602,18 @@ cpdef Column copy_if_else( dereference(lhs.c_obj), dereference(rhs.c_obj), boolean_mask.view(), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Table boolean_mask_scatter( TableOrListOfScalars input, Table target, Column boolean_mask, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Scatter rows from input into target according to boolean_mask. @@ -641,7 +651,8 @@ cpdef Table boolean_mask_scatter( """ cdef unique_ptr[table] result cdef vector[reference_wrapper[const scalar]] source_scalars - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if TableOrListOfScalars is Table: @@ -650,7 +661,7 @@ cpdef Table boolean_mask_scatter( input.view(), target.view(), boolean_mask.view(), - stream.view(), + _cs, mr.get_mr() ) else: @@ -660,17 +671,17 @@ cpdef Table boolean_mask_scatter( source_scalars, target.view(), boolean_mask.view(), - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(result), stream, mr) + return Table.from_libcudf(move(result), _stream, mr) cpdef Scalar get_element( Column input_column, size_type index, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Get the element at index from input_column. @@ -697,12 +708,13 @@ cpdef Scalar get_element( If index is out of bounds. """ cdef unique_ptr[scalar] c_output - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_output = cpp_copying.get_element( - input_column.view(), index, stream.view(), mr.get_mr() + input_column.view(), index, _cs, mr.get_mr() ) return Scalar.from_libcudf(move(c_output)) diff --git a/python/pylibcudf/pylibcudf/datetime.pxd b/python/pylibcudf/pylibcudf/datetime.pxd index 1a93ee62c43..d7d15f0c19f 100644 --- a/python/pylibcudf/pylibcudf/datetime.pxd +++ b/python/pylibcudf/pylibcudf/datetime.pxd @@ -1,11 +1,10 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.libcudf.datetime cimport datetime_component, rounding_frequency from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream ctypedef fused ColumnOrScalar: Column @@ -14,54 +13,54 @@ ctypedef fused ColumnOrScalar: cpdef Column extract_datetime_component( Column input, datetime_component component, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) cpdef Column ceil_datetimes( Column input, rounding_frequency freq, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) cpdef Column floor_datetimes( Column input, rounding_frequency freq, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) cpdef Column round_datetimes( Column input, rounding_frequency freq, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) cpdef Column add_calendrical_months( Column timestamps, ColumnOrScalar months, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) cpdef Column day_of_year( - Column input, Stream stream = *, DeviceMemoryResource mr = * + Column input, object stream = *, DeviceMemoryResource mr = * ) cpdef Column is_leap_year( - Column input, Stream stream = *, DeviceMemoryResource mr = * + Column input, object stream = *, DeviceMemoryResource mr = * ) cpdef Column last_day_of_month( - Column input, Stream stream = *, DeviceMemoryResource mr = * + Column input, object stream = *, DeviceMemoryResource mr = * ) cpdef Column extract_quarter( - Column input, Stream stream = *, DeviceMemoryResource mr = * + Column input, object stream = *, DeviceMemoryResource mr = * ) cpdef Column days_in_month( - Column input, Stream stream = *, DeviceMemoryResource mr = * + Column input, object stream = *, DeviceMemoryResource mr = * ) diff --git a/python/pylibcudf/pylibcudf/datetime.pyi b/python/pylibcudf/pylibcudf/datetime.pyi index abcc608daa4..e671d2d18cf 100644 --- a/python/pylibcudf/pylibcudf/datetime.pyi +++ b/python/pylibcudf/pylibcudf/datetime.pyi @@ -1,13 +1,13 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike class DatetimeComponent(IntEnum): YEAR = ... @@ -33,55 +33,55 @@ class RoundingFrequency(IntEnum): def extract_datetime_component( input: Column, component: DatetimeComponent, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def ceil_datetimes( input: Column, freq: RoundingFrequency, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def floor_datetimes( input: Column, freq: RoundingFrequency, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def round_datetimes( input: Column, freq: RoundingFrequency, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def add_calendrical_months( input: Column, months: Column | Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def day_of_year( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_leap_year( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def last_day_of_month( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def extract_quarter( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def days_in_month( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/datetime.pyx b/python/pylibcudf/pylibcudf/datetime.pyx index 2a837c5b749..1e5270bad92 100644 --- a/python/pylibcudf/pylibcudf/datetime.pyx +++ b/python/pylibcudf/pylibcudf/datetime.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr from libcpp.utility cimport move @@ -30,6 +30,7 @@ from rmm.pylibrmm.stream cimport Stream from .column cimport Column from .scalar cimport Scalar from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "DatetimeComponent", @@ -49,7 +50,7 @@ __all__ = [ cpdef Column extract_datetime_component( Column input, datetime_component component, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -73,19 +74,20 @@ cpdef Column extract_datetime_component( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: result = cpp_extract_datetime_component( - input.view(), component, stream.view(), mr.get_mr() + input.view(), component, _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column ceil_datetimes( Column input, rounding_frequency freq, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -109,17 +111,18 @@ cpdef Column ceil_datetimes( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_ceil_datetimes(input.view(), freq, stream.view(), mr.get_mr()) - return Column.from_libcudf(move(result), stream, mr) + result = cpp_ceil_datetimes(input.view(), freq, _cs, mr.get_mr()) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column floor_datetimes( Column input, rounding_frequency freq, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -143,17 +146,18 @@ cpdef Column floor_datetimes( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_floor_datetimes(input.view(), freq, stream.view(), mr.get_mr()) - return Column.from_libcudf(move(result), stream, mr) + result = cpp_floor_datetimes(input.view(), freq, _cs, mr.get_mr()) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column round_datetimes( Column input, rounding_frequency freq, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -177,17 +181,18 @@ cpdef Column round_datetimes( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_round_datetimes(input.view(), freq, stream.view(), mr.get_mr()) - return Column.from_libcudf(move(result), stream, mr) + result = cpp_round_datetimes(input.view(), freq, _cs, mr.get_mr()) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column add_calendrical_months( Column input, ColumnOrScalar months, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -216,7 +221,8 @@ cpdef Column add_calendrical_months( cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -224,13 +230,13 @@ cpdef Column add_calendrical_months( input.view(), months.view() if ColumnOrScalar is Column else dereference(months.get()), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column day_of_year( - Column input, Stream stream=None, DeviceMemoryResource mr=None + Column input, object stream=None, DeviceMemoryResource mr=None ): """ Computes the day number since the start of @@ -253,15 +259,16 @@ cpdef Column day_of_year( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_day_of_year(input.view(), stream.view(), mr.get_mr()) - return Column.from_libcudf(move(result), stream, mr) + result = cpp_day_of_year(input.view(), _cs, mr.get_mr()) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column is_leap_year( - Column input, Stream stream=None, DeviceMemoryResource mr=None + Column input, object stream=None, DeviceMemoryResource mr=None ): """ Check if the year of the given date is a leap year. @@ -283,15 +290,16 @@ cpdef Column is_leap_year( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_is_leap_year(input.view(), stream.view(), mr.get_mr()) - return Column.from_libcudf(move(result), stream, mr) + result = cpp_is_leap_year(input.view(), _cs, mr.get_mr()) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column last_day_of_month( - Column input, Stream stream=None, DeviceMemoryResource mr=None + Column input, object stream=None, DeviceMemoryResource mr=None ): """ Computes the last day of the month. @@ -313,15 +321,16 @@ cpdef Column last_day_of_month( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_last_day_of_month(input.view(), stream.view(), mr.get_mr()) - return Column.from_libcudf(move(result), stream, mr) + result = cpp_last_day_of_month(input.view(), _cs, mr.get_mr()) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column extract_quarter( - Column input, Stream stream=None, DeviceMemoryResource mr=None + Column input, object stream=None, DeviceMemoryResource mr=None ): """ Returns the quarter (ie. a value from {1, 2, 3, 4}) @@ -343,15 +352,16 @@ cpdef Column extract_quarter( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_extract_quarter(input.view(), stream.view(), mr.get_mr()) - return Column.from_libcudf(move(result), stream, mr) + result = cpp_extract_quarter(input.view(), _cs, mr.get_mr()) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column days_in_month( - Column input, Stream stream=None, DeviceMemoryResource mr=None + Column input, object stream=None, DeviceMemoryResource mr=None ): """ Extract the number of days in the month. @@ -372,12 +382,13 @@ cpdef Column days_in_month( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_days_in_month(input.view(), stream.view(), mr.get_mr()) - return Column.from_libcudf(move(result), stream, mr) + result = cpp_days_in_month(input.view(), _cs, mr.get_mr()) + return Column.from_libcudf(move(result), _stream, mr) DatetimeComponent.__str__ = DatetimeComponent.__repr__ RoundingFrequency.__str__ = RoundingFrequency.__repr__ diff --git a/python/pylibcudf/pylibcudf/experimental/_join_streams.pxd b/python/pylibcudf/pylibcudf/experimental/_join_streams.pxd index db9ca865197..832d572b467 100644 --- a/python/pylibcudf/pylibcudf/experimental/_join_streams.pxd +++ b/python/pylibcudf/pylibcudf/experimental/_join_streams.pxd @@ -1,6 +1,5 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 -from rmm.pylibrmm.stream cimport Stream -cpdef void join_streams(list streams, Stream stream) +cpdef void join_streams(list streams, object stream) diff --git a/python/pylibcudf/pylibcudf/experimental/_join_streams.pyi b/python/pylibcudf/pylibcudf/experimental/_join_streams.pyi index 522239c6a80..c9c2ba79e36 100644 --- a/python/pylibcudf/pylibcudf/experimental/_join_streams.pyi +++ b/python/pylibcudf/pylibcudf/experimental/_join_streams.pyi @@ -1,6 +1,8 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 -from rmm.pylibrmm.stream import Stream +from pylibcudf.utils import CudaStreamLike -def join_streams(streams: list[Stream], stream: Stream) -> None: ... +def join_streams( + streams: list[CudaStreamLike], stream: CudaStreamLike +) -> None: ... diff --git a/python/pylibcudf/pylibcudf/experimental/_join_streams.pyx b/python/pylibcudf/pylibcudf/experimental/_join_streams.pyx index 7f3d2f228fb..d9efcb19ed9 100644 --- a/python/pylibcudf/pylibcudf/experimental/_join_streams.pyx +++ b/python/pylibcudf/pylibcudf/experimental/_join_streams.pyx @@ -1,21 +1,22 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 +from cuda.bindings.cyruntime cimport cudaStream_t from libcpp.vector cimport vector from pylibcudf.libcudf.detail.utilities cimport stream_pool as cpp_stream_pool +from pylibcudf.libcudf.detail.utilities.stream_pool cimport const_cudaStream_t from pylibcudf.libcudf.utilities.span cimport host_span -from rmm.librmm.cuda_stream_view cimport cuda_stream_view from rmm.pylibrmm.stream cimport Stream -ctypedef const cuda_stream_view const_cuda_stream_view +from ..utils cimport _get_stream __all__ = ["join_streams"] -cpdef void join_streams(list streams, Stream stream): +cpdef void join_streams(list streams, object stream): """Synchronize a stream to an event on a set of streams. This function synchronizes the joined stream with the waited-on streams @@ -42,15 +43,16 @@ cpdef void join_streams(list streams, Stream stream): >>> plc.experimental.join_streams([stream1, stream2], join_stream) >>> # ... continue work on join_stream ... """ - cdef Stream c_stream = stream - cdef vector[cuda_stream_view] c_streams + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() + cdef vector[cudaStream_t] c_streams c_streams.reserve(len(streams)) for s in streams: - c_streams.push_back((s).view()) + c_streams.push_back((_get_stream(s)).view().value()) with nogil: cpp_stream_pool.join_streams( - host_span[const_cuda_stream_view](c_streams.data(), c_streams.size()), - c_stream.view() + host_span[const_cudaStream_t](c_streams.data(), c_streams.size()), + _cs ) diff --git a/python/pylibcudf/pylibcudf/filling.pxd b/python/pylibcudf/pylibcudf/filling.pxd index b90d567b2c2..acb92e0212a 100644 --- a/python/pylibcudf/pylibcudf/filling.pxd +++ b/python/pylibcudf/pylibcudf/filling.pxd @@ -1,7 +1,6 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.libcudf.types cimport size_type -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column @@ -17,7 +16,7 @@ cpdef Column fill( size_type begin, size_type end, Scalar value, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -26,21 +25,21 @@ cpdef void fill_in_place( size_type c_begin, size_type c_end, Scalar value, - Stream stream = *, + object stream = *, ) cpdef Column sequence( size_type size, Scalar init, Scalar step, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) cpdef Table repeat( Table input_table, ColumnOrSize count, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -48,6 +47,6 @@ cpdef Column calendrical_month_sequence( size_type n, Scalar init, size_type months, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) diff --git a/python/pylibcudf/pylibcudf/filling.pyi b/python/pylibcudf/pylibcudf/filling.pyi index a1023f8016c..2789ecd5aca 100644 --- a/python/pylibcudf/pylibcudf/filling.pyi +++ b/python/pylibcudf/pylibcudf/filling.pyi @@ -1,32 +1,33 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 -from rmm.pylibrmm.stream import Stream - from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike def fill( destination: Column, begin: int, end: int, value: Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> Column: ... def fill_in_place( destination: Column, begin: int, end: int, value: Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> None: ... def sequence( - size: int, init: Scalar, step: Scalar, stream: Stream | None = None + size: int, init: Scalar, step: Scalar, stream: CudaStreamLike | None = None ) -> Column: ... def repeat( - input_table: Table, count: Column | int, stream: Stream | None = None + input_table: Table, + count: Column | int, + stream: CudaStreamLike | None = None, ) -> Table: ... def calendrical_month_sequence( - n: int, init: Scalar, months: int, stream: Stream | None = None + n: int, init: Scalar, months: int, stream: CudaStreamLike | None = None ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/filling.pyx b/python/pylibcudf/pylibcudf/filling.pyx index 68e4862dfb8..ce6002eb24e 100644 --- a/python/pylibcudf/pylibcudf/filling.pyx +++ b/python/pylibcudf/pylibcudf/filling.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator cimport dereference @@ -22,6 +22,7 @@ from .column cimport Column from .scalar cimport Scalar from .table cimport Table from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ @@ -37,7 +38,7 @@ cpdef Column fill( size_type begin, size_type end, Scalar value, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): @@ -68,7 +69,8 @@ cpdef Column fill( cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -77,17 +79,17 @@ cpdef Column fill( begin, end, dereference(( value).c_obj), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef void fill_in_place( Column destination, size_type begin, size_type end, Scalar value, - Stream stream=None, + object stream=None, ): """Fill destination column in place from begin to end with value. @@ -112,7 +114,8 @@ cpdef void fill_in_place( None """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() cdef mutable_column_view c_destination = destination.mutable_view() with nogil: @@ -121,7 +124,7 @@ cpdef void fill_in_place( begin, end, dereference(value.c_obj), - stream.view() + _cs ) destination.set_null_count(c_destination.null_count()) @@ -129,7 +132,7 @@ cpdef Column sequence( size_type size, Scalar init, Scalar step, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a sequence column of size ``size`` with initial value ``init`` and step @@ -157,7 +160,8 @@ cpdef Column sequence( cdef unique_ptr[column] result cdef size_type c_size = size - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -165,16 +169,16 @@ cpdef Column sequence( c_size, dereference(init.c_obj), dereference(step.c_obj), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Table repeat( Table input_table, ColumnOrSize count, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Repeat rows of a Table. @@ -203,7 +207,8 @@ cpdef Table repeat( cdef unique_ptr[table] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if ColumnOrSize is Column: @@ -211,7 +216,7 @@ cpdef Table repeat( result = cpp_repeat( input_table.view(), count.view(), - stream.view(), + _cs, mr.get_mr() ) if ColumnOrSize is size_type: @@ -219,17 +224,17 @@ cpdef Table repeat( result = cpp_repeat( input_table.view(), count, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(result), stream, mr) + return Table.from_libcudf(move(result), _stream, mr) cpdef Column calendrical_month_sequence( size_type n, Scalar init, size_type months, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): @@ -256,7 +261,8 @@ cpdef Column calendrical_month_sequence( cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -264,7 +270,7 @@ cpdef Column calendrical_month_sequence( n, dereference(init.c_obj), months, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/groupby.pxd b/python/pylibcudf/pylibcudf/groupby.pxd index b5654ff6df8..a46146a145a 100644 --- a/python/pylibcudf/pylibcudf/groupby.pxd +++ b/python/pylibcudf/pylibcudf/groupby.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -19,7 +19,6 @@ from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.types cimport null_order, order from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream from .column cimport Column from .table cimport Table @@ -46,31 +45,31 @@ cdef class GroupBy: cdef unique_ptr[vector[null_order]] _null_precedence cpdef tuple aggregate( - self, list requests, Stream stream=*, DeviceMemoryResource mr=* + self, list requests, object stream = *, DeviceMemoryResource mr=* ) - cpdef tuple scan(self, list requests, Stream stream=*, DeviceMemoryResource mr=*) + cpdef tuple scan(self, list requests, object stream = *, DeviceMemoryResource mr=*) cpdef tuple shift( self, Table values, list offset, list fill_values, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef tuple replace_nulls( self, Table values, list replace_policies, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef tuple get_groups( - self, Table values=*, Stream stream=*, DeviceMemoryResource mr=* + self, Table values=*, object stream = *, DeviceMemoryResource mr=* ) @staticmethod cdef tuple _parse_outputs( pair[unique_ptr[table], vector[aggregation_result]] c_res, - Stream stream, + object stream, DeviceMemoryResource mr, ) diff --git a/python/pylibcudf/pylibcudf/groupby.pyi b/python/pylibcudf/pylibcudf/groupby.pyi index 75322706187..01c732175f4 100644 --- a/python/pylibcudf/pylibcudf/groupby.pyi +++ b/python/pylibcudf/pylibcudf/groupby.pyi @@ -1,8 +1,7 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.aggregation import Aggregation from pylibcudf.column import Column @@ -10,6 +9,7 @@ from pylibcudf.replace import ReplacePolicy from pylibcudf.scalar import Scalar from pylibcudf.table import Table from pylibcudf.types import NullOrder, NullPolicy, Order, Sorted +from pylibcudf.utils import CudaStreamLike class GroupByRequest: def __init__( @@ -28,13 +28,13 @@ class GroupBy: def aggregate( self, requests: list[GroupByRequest], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Table, list[Table]]: ... def scan( self, requests: list[GroupByRequest], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Table, list[Table]]: ... def shift( @@ -42,19 +42,19 @@ class GroupBy: values: Table, offset: list[int], fill_values: list[Scalar], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Table, Table]: ... def replace_nulls( self, value: Table, replace_policies: list[ReplacePolicy], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Table, Table]: ... def get_groups( self, values: Table | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[list[int], Table, Table]: ... diff --git a/python/pylibcudf/pylibcudf/groupby.pyx b/python/pylibcudf/pylibcudf/groupby.pyx index 94a292996a0..4b2f842a360 100644 --- a/python/pylibcudf/pylibcudf/groupby.pyx +++ b/python/pylibcudf/pylibcudf/groupby.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator cimport dereference @@ -28,6 +28,7 @@ from .column cimport Column from .table cimport Table from .types cimport null_order, null_policy, order, sorted from .utils cimport _as_vector, _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["GroupBy", "GroupByRequest"] @@ -141,12 +142,13 @@ cdef class GroupBy: @staticmethod cdef tuple _parse_outputs( pair[unique_ptr[table], vector[aggregation_result]] c_res, - Stream stream, + object stream, DeviceMemoryResource mr, ): # Convert libcudf aggregation/scan outputs into pylibcudf objects. # This function is for internal use only. - cdef Table group_keys = Table.from_libcudf(move(c_res.first), stream, mr) + cdef Stream _stream = stream + cdef Table group_keys = Table.from_libcudf(move(c_res.first), _stream, mr) cdef int i, j cdef list results = [] @@ -155,13 +157,13 @@ cdef class GroupBy: inner_results = [] for j in range(c_res.second[i].results.size()): inner_results.append( - Column.from_libcudf(move(c_res.second[i].results[j]), stream, mr) + Column.from_libcudf(move(c_res.second[i].results[j]), _stream, mr) ) results.append(Table(inner_results)) return group_keys, results cpdef tuple aggregate( - self, list requests, Stream stream=None, DeviceMemoryResource mr=None + self, list requests, object stream=None, DeviceMemoryResource mr=None ): """Compute aggregations on columns. @@ -189,19 +191,20 @@ cdef class GroupBy: c_requests.push_back(move(request._to_libcudf_agg_request())) cdef pair[unique_ptr[table], vector[aggregation_result]] c_res - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) # TODO: Need to capture C++ exceptions indicating that an invalid type was used. # We rely on libcudf to tell us this rather than checking the types beforehand # ourselves. with nogil: c_res = dereference(self.c_obj).aggregate( - c_requests, stream.view(), mr.get_mr() + c_requests, _cs, mr.get_mr() ) - return GroupBy._parse_outputs(move(c_res), stream, mr) + return GroupBy._parse_outputs(move(c_res), _stream, mr) cpdef tuple scan( - self, list requests, Stream stream=None, DeviceMemoryResource mr=None + self, list requests, object stream=None, DeviceMemoryResource mr=None ): """Compute scans on columns. @@ -229,18 +232,23 @@ cdef class GroupBy: c_requests.push_back(move(request._to_libcudf_scan_request())) cdef pair[unique_ptr[table], vector[aggregation_result]] c_res - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_res = dereference(self.c_obj).scan(c_requests, stream.view(), mr.get_mr()) - return GroupBy._parse_outputs(move(c_res), stream, mr) + c_res = dereference(self.c_obj).scan( + c_requests, + _cs, + mr.get_mr(), + ) + return GroupBy._parse_outputs(move(c_res), _stream, mr) cpdef tuple shift( self, Table values, list offset, list fill_values, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Compute shifts on columns. @@ -269,26 +277,27 @@ cdef class GroupBy: cdef vector[size_type] c_offset = offset cdef pair[unique_ptr[table], unique_ptr[table]] c_res - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_res = dereference(self.c_obj).shift( values.view(), c_offset, c_fill_values, - stream.view(), + _cs, mr.get_mr() ) return ( - Table.from_libcudf(move(c_res.first), stream, mr), - Table.from_libcudf(move(c_res.second), stream, mr), + Table.from_libcudf(move(c_res.first), _stream, mr), + Table.from_libcudf(move(c_res.second), _stream, mr), ) cpdef tuple replace_nulls( self, Table value, list replace_policies, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Replace nulls in columns. @@ -312,22 +321,23 @@ cdef class GroupBy: """ cdef pair[unique_ptr[table], unique_ptr[table]] c_res cdef vector[replace_policy] c_replace_policies = replace_policies - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_res = dereference(self.c_obj).replace_nulls( value.view(), c_replace_policies, - stream.view(), + _cs, mr.get_mr() ) return ( - Table.from_libcudf(move(c_res.first), stream, mr), - Table.from_libcudf(move(c_res.second), stream, mr), + Table.from_libcudf(move(c_res.first), _stream, mr), + Table.from_libcudf(move(c_res.second), _stream, mr), ) cpdef tuple get_groups( - self, Table values=None, Stream stream=None, DeviceMemoryResource mr=None + self, Table values=None, object stream=None, DeviceMemoryResource mr=None ): """Get the grouped keys and values labels for each row. @@ -352,24 +362,24 @@ cdef class GroupBy: cdef groups c_groups cdef table_view empty_view - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) mr = _get_memory_resource(mr) if values: c_groups = dereference(self.c_obj).get_groups( - values.view(), stream.view(), mr.get_mr() + values.view(), _stream.view().value(), mr.get_mr() ) return ( c_groups.offsets, - Table.from_libcudf(move(c_groups.keys), stream, mr), - Table.from_libcudf(move(c_groups.values), stream, mr), + Table.from_libcudf(move(c_groups.keys), _stream, mr), + Table.from_libcudf(move(c_groups.values), _stream, mr), ) else: # c_groups.values is nullptr - call get_groups with empty table view c_groups = dereference(self.c_obj).get_groups( - empty_view, stream.view(), mr.get_mr() + empty_view, _stream.view().value(), mr.get_mr() ) return ( c_groups.offsets, - Table.from_libcudf(move(c_groups.keys), stream, mr), + Table.from_libcudf(move(c_groups.keys), _stream, mr), None, ) diff --git a/python/pylibcudf/pylibcudf/hashing.pxd b/python/pylibcudf/pylibcudf/hashing.pxd index 4febd6e4949..b824f2dbcb8 100644 --- a/python/pylibcudf/pylibcudf/hashing.pxd +++ b/python/pylibcudf/pylibcudf/hashing.pxd @@ -1,9 +1,8 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libc.stdint cimport uint32_t, uint64_t from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream from .column cimport Column from .table cimport Table @@ -12,34 +11,34 @@ from .table cimport Table cpdef Column murmurhash3_x86_32( Table input, uint32_t seed=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) cpdef Table murmurhash3_x64_128( Table input, uint64_t seed=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) cpdef Column xxhash_32( Table input, uint32_t seed=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) cpdef Column xxhash_64( Table input, uint64_t seed=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) -cpdef Column md5(Table input, Stream stream=*, DeviceMemoryResource mr=*) -cpdef Column sha1(Table input, Stream stream=*, DeviceMemoryResource mr=*) -cpdef Column sha224(Table input, Stream stream=*, DeviceMemoryResource mr=*) -cpdef Column sha256(Table input, Stream stream=*, DeviceMemoryResource mr=*) -cpdef Column sha384(Table input, Stream stream=*, DeviceMemoryResource mr=*) -cpdef Column sha512(Table input, Stream stream=*, DeviceMemoryResource mr=*) +cpdef Column md5(Table input, object stream = *, DeviceMemoryResource mr=*) +cpdef Column sha1(Table input, object stream = *, DeviceMemoryResource mr=*) +cpdef Column sha224(Table input, object stream = *, DeviceMemoryResource mr=*) +cpdef Column sha256(Table input, object stream = *, DeviceMemoryResource mr=*) +cpdef Column sha384(Table input, object stream = *, DeviceMemoryResource mr=*) +cpdef Column sha512(Table input, object stream = *, DeviceMemoryResource mr=*) diff --git a/python/pylibcudf/pylibcudf/hashing.pyi b/python/pylibcudf/pylibcudf/hashing.pyi index 1b8d055368a..dae03796b9c 100644 --- a/python/pylibcudf/pylibcudf/hashing.pyi +++ b/python/pylibcudf/pylibcudf/hashing.pyi @@ -1,67 +1,67 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from typing import Final from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike LIBCUDF_DEFAULT_HASH_SEED: Final[int] def murmurhash3_x86_32( input: Table, seed: int = ..., - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def murmurhash3_x64_128( input: Table, seed: int = ..., - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def xxhash_32( input: Table, seed: int = ..., - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def xxhash_64( input: Table, seed: int = ..., - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def md5( input: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def sha1( input: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def sha224( input: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def sha256( input: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def sha384( input: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def sha512( input: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/hashing.pyx b/python/pylibcudf/pylibcudf/hashing.pyx index d9db52720bf..941393cf949 100644 --- a/python/pylibcudf/pylibcudf/hashing.pyx +++ b/python/pylibcudf/pylibcudf/hashing.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libc.stdint cimport uint32_t, uint64_t from libcpp.memory cimport unique_ptr @@ -24,6 +24,7 @@ from rmm.pylibrmm.stream cimport Stream from .column cimport Column from .table cimport Table from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "LIBCUDF_DEFAULT_HASH_SEED", @@ -44,7 +45,7 @@ LIBCUDF_DEFAULT_HASH_SEED = DEFAULT_HASH_SEED cpdef Column murmurhash3_x86_32( Table input, uint32_t seed=DEFAULT_HASH_SEED, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes the MurmurHash3 32-bit hash value of each row in the given table. @@ -65,24 +66,25 @@ cpdef Column murmurhash3_x86_32( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_murmurhash3_x86_32( input.view(), seed, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Table murmurhash3_x64_128( Table input, uint64_t seed=DEFAULT_HASH_SEED, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes the MurmurHash3 64-bit hash value of each row in the given table. @@ -103,24 +105,25 @@ cpdef Table murmurhash3_x64_128( """ cdef unique_ptr[table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_murmurhash3_x64_128( input.view(), seed, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Column xxhash_32( Table input, uint32_t seed=DEFAULT_HASH_SEED, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes the xxHash 32-bit hash value of each row in the given table. @@ -142,24 +145,25 @@ cpdef Column xxhash_32( cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_xxhash_32( input.view(), seed, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column xxhash_64( Table input, uint64_t seed=DEFAULT_HASH_SEED, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes the xxHash 64-bit hash value of each row in the given table. @@ -181,23 +185,24 @@ cpdef Column xxhash_64( cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_xxhash_64( input.view(), seed, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column md5( Table input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes the MD5 hash value of each row in the given table. @@ -220,16 +225,17 @@ cpdef Column md5( cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_md5(input.view(), stream.view(), mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + c_result = cpp_md5(input.view(), _cs, mr.get_mr()) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column sha1( Table input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes the SHA-1 hash value of each row in the given table. @@ -250,17 +256,18 @@ cpdef Column sha1( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_sha1(input.view(), stream.view(), mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + c_result = cpp_sha1(input.view(), _cs, mr.get_mr()) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column sha224( Table input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes the SHA-224 hash value of each row in the given table. @@ -281,17 +288,18 @@ cpdef Column sha224( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_sha224(input.view(), stream.view(), mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + c_result = cpp_sha224(input.view(), _cs, mr.get_mr()) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column sha256( Table input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes the SHA-256 hash value of each row in the given table. @@ -312,17 +320,18 @@ cpdef Column sha256( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_sha256(input.view(), stream.view(), mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + c_result = cpp_sha256(input.view(), _cs, mr.get_mr()) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column sha384( Table input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes the SHA-384 hash value of each row in the given table. @@ -343,17 +352,18 @@ cpdef Column sha384( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_sha384(input.view(), stream.view(), mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + c_result = cpp_sha384(input.view(), _cs, mr.get_mr()) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column sha512( Table input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes the SHA-512 hash value of each row in the given table. @@ -374,9 +384,10 @@ cpdef Column sha512( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_sha512(input.view(), stream.view(), mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + c_result = cpp_sha512(input.view(), _cs, mr.get_mr()) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/interop.pxd b/python/pylibcudf/pylibcudf/interop.pxd index dfa62233541..942b9e806bc 100644 --- a/python/pylibcudf/pylibcudf/interop.pxd +++ b/python/pylibcudf/pylibcudf/interop.pxd @@ -1,12 +1,11 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.table cimport Table -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource cpdef Table from_dlpack( - object managed_tensor, Stream stream=*, DeviceMemoryResource mr=* + object managed_tensor, object stream = *, DeviceMemoryResource mr=* ) -cpdef object to_dlpack(Table input, Stream stream=*, DeviceMemoryResource mr=*) +cpdef object to_dlpack(Table input, object stream = *, DeviceMemoryResource mr=*) diff --git a/python/pylibcudf/pylibcudf/interop.pyi b/python/pylibcudf/pylibcudf/interop.pyi index 0c10d71ec4f..34fe9394f7d 100644 --- a/python/pylibcudf/pylibcudf/interop.pyi +++ b/python/pylibcudf/pylibcudf/interop.pyi @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from collections.abc import Iterable, Mapping @@ -8,12 +8,12 @@ from typing import Any, overload import pyarrow as pa from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.table import Table from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike @dataclass class ColumnMetadata: @@ -33,14 +33,14 @@ def from_arrow( obj: pa.Array[Any], *, data_type: DataType | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... @overload def from_arrow( obj: pa.Table, *, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... @overload @@ -67,11 +67,11 @@ def to_arrow( ) -> pa.Scalar[Any]: ... def from_dlpack( managed_tensor: Any, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def to_dlpack( input: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Any: ... diff --git a/python/pylibcudf/pylibcudf/interop.pyx b/python/pylibcudf/pylibcudf/interop.pyx index ffc14415470..23c47bb090f 100644 --- a/python/pylibcudf/pylibcudf/interop.pyx +++ b/python/pylibcudf/pylibcudf/interop.pyx @@ -23,6 +23,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .table cimport Table from .utils cimport _get_stream, _get_memory_resource from ._interop_helpers import ColumnMetadata +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ @@ -35,7 +36,7 @@ __all__ = [ cpdef Table from_dlpack( - object managed_tensor, Stream stream=None, DeviceMemoryResource mr=None + object managed_tensor, object stream=None, DeviceMemoryResource mr=None ): """ Convert a DLPack DLTensor into a cudf table. @@ -65,7 +66,8 @@ cpdef Table from_dlpack( if dlpack_tensor is NULL: raise ValueError("PyCapsule object contained a NULL pointer") PyCapsule_SetName(managed_tensor, "used_dltensor") - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) # Note: A copy is always performed when converting the dlpack @@ -74,14 +76,14 @@ cpdef Table from_dlpack( # TODO: https://github.com/rapidsai/cudf/issues/10874 # TODO: https://github.com/rapidsai/cudf/issues/10849 with nogil: - c_result = cpp_from_dlpack(dlpack_tensor, stream.view(), mr.get_mr()) + c_result = cpp_from_dlpack(dlpack_tensor, _cs, mr.get_mr()) - cdef Table result = Table.from_libcudf(move(c_result), stream, mr) + cdef Table result = Table.from_libcudf(move(c_result), _stream, mr) dlpack_tensor.deleter(dlpack_tensor) return result -cpdef object to_dlpack(Table input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef object to_dlpack(Table input, object stream=None, DeviceMemoryResource mr=None): """ Convert a cudf table into a DLPack DLTensor. @@ -109,11 +111,12 @@ cpdef object to_dlpack(Table input, Stream stream=None, DeviceMemoryResource mr= "Input is required to have null count as zero." ) cdef DLManagedTensor *dlpack_tensor - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - dlpack_tensor = cpp_to_dlpack(input.view(), stream.view(), mr.get_mr()) + dlpack_tensor = cpp_to_dlpack(input.view(), _cs, mr.get_mr()) return PyCapsule_New( dlpack_tensor, diff --git a/python/pylibcudf/pylibcudf/io/avro.pxd b/python/pylibcudf/pylibcudf/io/avro.pxd index d76f2c1e628..0e8cb7ee283 100644 --- a/python/pylibcudf/pylibcudf/io/avro.pxd +++ b/python/pylibcudf/pylibcudf/io/avro.pxd @@ -1,6 +1,5 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from pylibcudf.io.types cimport SourceInfo, TableWithMetadata @@ -29,5 +28,5 @@ cdef class AvroReaderOptionsBuilder: cpdef AvroReaderOptions build(self) cpdef TableWithMetadata read_avro( - AvroReaderOptions options, Stream stream = *, DeviceMemoryResource mr=* + AvroReaderOptions options, object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/io/avro.pyi b/python/pylibcudf/pylibcudf/io/avro.pyi index d7b6c87d388..7e41c39a2be 100644 --- a/python/pylibcudf/pylibcudf/io/avro.pyi +++ b/python/pylibcudf/pylibcudf/io/avro.pyi @@ -1,9 +1,9 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.io.types import SourceInfo, TableWithMetadata +from pylibcudf.utils import CudaStreamLike __all__ = ["AvroReaderOptions", "AvroReaderOptionsBuilder", "read_avro"] @@ -21,6 +21,6 @@ class AvroReaderOptionsBuilder: def read_avro( options: AvroReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> TableWithMetadata: ... diff --git a/python/pylibcudf/pylibcudf/io/avro.pyx b/python/pylibcudf/pylibcudf/io/avro.pyx index 9c5e2c05b11..f2bd021cdde 100644 --- a/python/pylibcudf/pylibcudf/io/avro.pyx +++ b/python/pylibcudf/pylibcudf/io/avro.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.string cimport string @@ -6,6 +6,7 @@ from libcpp.utility cimport move from libcpp.vector cimport vector from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from pylibcudf.io.types cimport SourceInfo, TableWithMetadata @@ -152,7 +153,7 @@ cdef class AvroReaderOptionsBuilder: cpdef TableWithMetadata read_avro( AvroReaderOptions options, - Stream stream = None, + object stream = None, DeviceMemoryResource mr=None, ): """ @@ -173,8 +174,9 @@ cpdef TableWithMetadata read_avro( Device memory resource used to allocate the returned table's device memory. """ cdef Stream s = _get_stream(stream) + cdef cudaStream_t _cs = s.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = move(cpp_read_avro(options.c_obj, s.view(), mr.get_mr())) + c_result = move(cpp_read_avro(options.c_obj, _cs, mr.get_mr())) return TableWithMetadata.from_libcudf(c_result, s, mr) diff --git a/python/pylibcudf/pylibcudf/io/csv.pxd b/python/pylibcudf/pylibcudf/io/csv.pxd index 2f138e3aaa1..4293452311d 100644 --- a/python/pylibcudf/pylibcudf/io/csv.pxd +++ b/python/pylibcudf/pylibcudf/io/csv.pxd @@ -1,10 +1,9 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool from libcpp.string cimport string from libcpp.vector cimport vector -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from pylibcudf.io.types cimport SinkInfo, SourceInfo, TableWithMetadata @@ -74,7 +73,7 @@ cdef class CsvReaderOptionsBuilder: cpdef CsvReaderOptions build(self) cpdef TableWithMetadata read_csv( - CsvReaderOptions options, Stream stream = *, DeviceMemoryResource mr=* + CsvReaderOptions options, object stream = *, DeviceMemoryResource mr=* ) cdef class CsvWriterOptions: @@ -98,6 +97,6 @@ cdef class CsvWriterOptionsBuilder: cpdef CsvWriterOptions build(self) -cpdef void write_csv(CsvWriterOptions options, Stream stream = *) +cpdef void write_csv(CsvWriterOptions options, object stream = *) cpdef bool is_supported_write_csv(DataType type) diff --git a/python/pylibcudf/pylibcudf/io/csv.pyi b/python/pylibcudf/pylibcudf/io/csv.pyi index ade964da509..41465b3ba43 100644 --- a/python/pylibcudf/pylibcudf/io/csv.pyi +++ b/python/pylibcudf/pylibcudf/io/csv.pyi @@ -4,7 +4,6 @@ from typing import Self from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.io.types import ( CompressionType, @@ -15,6 +14,7 @@ from pylibcudf.io.types import ( ) from pylibcudf.table import Table from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike class CsvReaderOptions: def __init__(self): ... @@ -61,10 +61,12 @@ class CsvReaderOptionsBuilder: def read_csv( options: CsvReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> TableWithMetadata: ... -def write_csv(options: CsvWriterOptions, stream: Stream | None = None): ... +def write_csv( + options: CsvWriterOptions, stream: CudaStreamLike | None = None +): ... class CsvWriterOptions: def __init__(self): ... diff --git a/python/pylibcudf/pylibcudf/io/csv.pyx b/python/pylibcudf/pylibcudf/io/csv.pyx index 749cd45fcb5..1c3ae9cb0bf 100644 --- a/python/pylibcudf/pylibcudf/io/csv.pyx +++ b/python/pylibcudf/pylibcudf/io/csv.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool @@ -8,6 +8,7 @@ from libcpp.utility cimport move from libcpp.vector cimport vector from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from pylibcudf.io.types cimport SourceInfo, SinkInfo, TableWithMetadata @@ -672,7 +673,7 @@ cdef class CsvReaderOptionsBuilder: cpdef TableWithMetadata read_csv( CsvReaderOptions options, - Stream stream = None, + object stream = None, DeviceMemoryResource mr=None, ): """ @@ -694,9 +695,10 @@ cpdef TableWithMetadata read_csv( """ cdef table_with_metadata c_result cdef Stream s = _get_stream(stream) + cdef cudaStream_t _cs = s.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = move(cpp_read_csv(options.c_obj, s.view(), mr.get_mr())) + c_result = move(cpp_read_csv(options.c_obj, _cs, mr.get_mr())) cdef TableWithMetadata tbl_meta = TableWithMetadata.from_libcudf(c_result, s, mr) return tbl_meta @@ -882,7 +884,7 @@ cdef class CsvWriterOptionsBuilder: cpdef void write_csv( CsvWriterOptions options, - Stream stream = None, + object stream = None, ): """ Write to CSV format. @@ -900,8 +902,9 @@ cpdef void write_csv( CUDA stream used for device memory operations and kernel launches """ cdef Stream s = _get_stream(stream) + cdef cudaStream_t _cs = s.view().value() with nogil: - cpp_write_csv(move(options.c_obj), s.view()) + cpp_write_csv(move(options.c_obj), _cs) cpdef bool is_supported_write_csv(DataType type): diff --git a/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pxd b/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pxd index 298b36651c3..8c471831823 100644 --- a/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pxd +++ b/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pxd @@ -32,5 +32,5 @@ cdef class FileMetaData: cdef class HybridScanReader: cdef unique_ptr[cpp_hybrid_scan_reader] c_obj - cdef Stream stream + cdef Stream _stream cdef DeviceMemoryResource mr diff --git a/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pyi b/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pyi index 0f0429a66db..6f1fbc250d8 100644 --- a/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pyi +++ b/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pyi @@ -4,13 +4,13 @@ from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.io.parquet import ParquetReaderOptions from pylibcudf.io.text import ByteRangeInfo from pylibcudf.io.types import TableWithMetadata from pylibcudf.span import Span +from pylibcudf.utils import CudaStreamLike class UseDataPageMask(IntEnum): YES: int @@ -44,7 +44,7 @@ class HybridScanReader: self, row_group_indices: list[int], options: ParquetReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> list[int]: ... def secondary_filters_byte_ranges( self, row_group_indices: list[int], options: ParquetReaderOptions @@ -54,20 +54,20 @@ class HybridScanReader: dictionary_page_data: list[Span], row_group_indices: list[int], options: ParquetReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> list[int]: ... def filter_row_groups_with_bloom_filters( self, bloom_filter_data: list[Span], row_group_indices: list[int], options: ParquetReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> list[int]: ... def build_row_mask_with_page_index_stats( self, row_group_indices: list[int], options: ParquetReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def filter_column_chunks_byte_ranges( @@ -80,7 +80,7 @@ class HybridScanReader: row_mask: Column, mask_data_pages: UseDataPageMask, options: ParquetReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> TableWithMetadata: ... def payload_column_chunks_byte_ranges( @@ -93,7 +93,7 @@ class HybridScanReader: row_mask: Column, mask_data_pages: UseDataPageMask, options: ParquetReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> TableWithMetadata: ... def all_column_chunks_byte_ranges( @@ -104,7 +104,7 @@ class HybridScanReader: row_group_indices: list[int], column_chunk_data: list[Span], options: ParquetReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> TableWithMetadata: ... def setup_chunking_for_filter_columns( @@ -116,7 +116,7 @@ class HybridScanReader: mask_data_pages: UseDataPageMask, column_chunk_data: list[Span], options: ParquetReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> None: ... def materialize_filter_columns_chunk( @@ -132,7 +132,7 @@ class HybridScanReader: mask_data_pages: UseDataPageMask, column_chunk_data: list[Span], options: ParquetReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> None: ... def materialize_payload_columns_chunk( diff --git a/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pyx b/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pyx index beb28f6a1b0..4d25a05d362 100644 --- a/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pyx +++ b/python/pylibcudf/pylibcudf/io/experimental/hybrid_scan.pyx @@ -225,7 +225,7 @@ cdef class HybridScanReader: self, list row_group_indices, ParquetReaderOptions options, - Stream stream=None + object stream=None ): """Filter row groups using column chunk statistics. @@ -243,7 +243,7 @@ cdef class HybridScanReader: list[int] Filtered row group indices """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) cdef vector[size_type] indices_vec = row_group_indices cdef vector[size_type] filtered = ( self.c_obj.get()[0].filter_row_groups_with_stats( @@ -251,7 +251,7 @@ cdef class HybridScanReader: indices_vec.data(), indices_vec.size() ), options.c_obj, - stream.view() + _stream.view().value() ) ) return list(filtered) @@ -295,7 +295,7 @@ cdef class HybridScanReader: list dictionary_page_data, list row_group_indices, ParquetReaderOptions options, - Stream stream=None + object stream=None ): """Filter row groups using column chunk dictionary pages. @@ -316,7 +316,7 @@ cdef class HybridScanReader: Filtered row group indices """ cdef vector[device_span[const_uint8_t]] spans_vec - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) for span in dictionary_page_data: spans_vec.push_back(_get_device_span(span)) @@ -329,7 +329,7 @@ cdef class HybridScanReader: ), host_span[const_size_type](indices_vec.data(), indices_vec.size()), options.c_obj, - stream.view() + _stream.view().value() ) return list(filtered) @@ -338,7 +338,7 @@ cdef class HybridScanReader: list bloom_filter_data, list row_group_indices, ParquetReaderOptions options, - Stream stream=None + object stream=None ): """Filter row groups using column chunk bloom filters. @@ -359,7 +359,7 @@ cdef class HybridScanReader: Filtered row group indices """ cdef vector[device_span[const_uint8_t]] spans_vec - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) for span in bloom_filter_data: spans_vec.push_back(_get_device_span(span)) @@ -372,7 +372,7 @@ cdef class HybridScanReader: ), host_span[const_size_type](indices_vec.data(), indices_vec.size()), options.c_obj, - stream.view() + _stream.view().value() ) return list(filtered) @@ -380,7 +380,7 @@ cdef class HybridScanReader: self, list row_group_indices, ParquetReaderOptions options, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Build a boolean column indicating surviving rows from page stats. @@ -402,16 +402,16 @@ cdef class HybridScanReader: Boolean column indicating surviving rows """ cdef vector[size_type] indices_vec = row_group_indices - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) mr = _get_memory_resource(mr) cdef unique_ptr[column] c_result = \ self.c_obj.get()[0].build_row_mask_with_page_index_stats( host_span[const_size_type](indices_vec.data(), indices_vec.size()), options.c_obj, - stream.view(), + _stream.view().value(), mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) def filter_column_chunks_byte_ranges( self, @@ -447,7 +447,7 @@ cdef class HybridScanReader: Column row_mask, cpp_use_data_page_mask mask_data_pages, ParquetReaderOptions options, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Materialize filter columns and update the row mask. @@ -477,7 +477,7 @@ cdef class HybridScanReader: cdef vector[size_type] indices_vec = row_group_indices cdef vector[device_span[const_uint8_t]] spans_vec - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) mr = _get_memory_resource(mr) for span in column_chunk_data: spans_vec.push_back(_get_device_span(span)) @@ -492,10 +492,10 @@ cdef class HybridScanReader: mask_view, mask_data_pages, options.c_obj, - stream.view(), + _stream.view().value(), mr.get_mr() ) - return TableWithMetadata.from_libcudf(c_result, stream, mr) + return TableWithMetadata.from_libcudf(c_result, _stream, mr) def payload_column_chunks_byte_ranges( self, @@ -531,7 +531,7 @@ cdef class HybridScanReader: Column row_mask, cpp_use_data_page_mask mask_data_pages, ParquetReaderOptions options, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Materialize payload columns and apply the row mask. @@ -561,7 +561,7 @@ cdef class HybridScanReader: cdef vector[size_type] indices_vec = row_group_indices cdef vector[device_span[const_uint8_t]] spans_vec - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) mr = _get_memory_resource(mr) for span in column_chunk_data: spans_vec.push_back(_get_device_span(span)) @@ -576,10 +576,10 @@ cdef class HybridScanReader: mask_view, mask_data_pages, options.c_obj, - stream.view(), + _stream.view().value(), mr.get_mr() ) - return TableWithMetadata.from_libcudf(c_result, stream, mr) + return TableWithMetadata.from_libcudf(c_result, _stream, mr) def all_column_chunks_byte_ranges( self, @@ -613,7 +613,7 @@ cdef class HybridScanReader: list row_group_indices, list column_chunk_data, ParquetReaderOptions options, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Materialize all columns. @@ -639,7 +639,7 @@ cdef class HybridScanReader: cdef vector[size_type] indices_vec = row_group_indices cdef vector[device_span[const_uint8_t]] spans_vec - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) mr = _get_memory_resource(mr) for span in column_chunk_data: spans_vec.push_back(_get_device_span(span)) @@ -650,10 +650,10 @@ cdef class HybridScanReader: spans_vec.data(), spans_vec.size() ), options.c_obj, - stream.view(), + _stream.view().value(), mr.get_mr() ) - return TableWithMetadata.from_libcudf(c_result, stream, mr) + return TableWithMetadata.from_libcudf(c_result, _stream, mr) def setup_chunking_for_filter_columns( self, @@ -664,7 +664,7 @@ cdef class HybridScanReader: cpp_use_data_page_mask mask_data_pages, list column_chunk_data, ParquetReaderOptions options, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Setup chunking information for filter columns. @@ -696,7 +696,7 @@ cdef class HybridScanReader: for span in column_chunk_data: spans_vec.push_back(_get_device_span(span)) - self.stream = _get_stream(stream) + self._stream = _get_stream(stream) self.mr = _get_memory_resource(mr) cdef column_view mask_view = row_mask.view() @@ -710,7 +710,7 @@ cdef class HybridScanReader: spans_vec.data(), spans_vec.size() ), options.c_obj, - self.stream.view(), + self._stream.view().value(), self.mr.get_mr() ) @@ -735,7 +735,7 @@ cdef class HybridScanReader: mask_view ) return TableWithMetadata.from_libcudf( - c_result, self.stream, self.mr + c_result, self._stream, self.mr ) def setup_chunking_for_payload_columns( @@ -747,7 +747,7 @@ cdef class HybridScanReader: cpp_use_data_page_mask mask_data_pages, list column_chunk_data, ParquetReaderOptions options, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Setup chunking information for payload columns. @@ -779,7 +779,7 @@ cdef class HybridScanReader: for span in column_chunk_data: spans_vec.push_back(_get_device_span(span)) - self.stream = _get_stream(stream) + self._stream = _get_stream(stream) self.mr = _get_memory_resource(mr) cdef column_view mask_view = row_mask.view() @@ -793,7 +793,7 @@ cdef class HybridScanReader: spans_vec.data(), spans_vec.size() ), options.c_obj, - self.stream.view(), + self._stream.view().value(), self.mr.get_mr() ) @@ -818,7 +818,7 @@ cdef class HybridScanReader: mask_view ) return TableWithMetadata.from_libcudf( - c_result, self.stream, self.mr + c_result, self._stream, self.mr ) def construct_row_group_passes( diff --git a/python/pylibcudf/pylibcudf/io/json.pxd b/python/pylibcudf/pylibcudf/io/json.pxd index 96bc102ef0b..e46942ea14b 100644 --- a/python/pylibcudf/pylibcudf/io/json.pxd +++ b/python/pylibcudf/pylibcudf/io/json.pxd @@ -1,10 +1,9 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool from libcpp.map cimport map from libcpp.vector cimport vector -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from pylibcudf.io.types cimport ( @@ -83,7 +82,7 @@ cdef class JsonReaderOptionsBuilder: cpdef build(self) cpdef TableWithMetadata read_json( - JsonReaderOptions options, Stream stream = *, DeviceMemoryResource mr = * + JsonReaderOptions options, object stream = *, DeviceMemoryResource mr = * ) cpdef TableWithMetadata read_json_from_string_column( @@ -93,7 +92,7 @@ cpdef TableWithMetadata read_json_from_string_column( list dtypes = *, compression_type compression = *, json_recovery_mode_t recovery_mode = *, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *) cdef class JsonWriterOptions: @@ -117,13 +116,13 @@ cdef class JsonWriterOptionsBuilder: cpdef JsonWriterOptionsBuilder utf8_escaped(self, bool val) cpdef JsonWriterOptions build(self) -cpdef void write_json(JsonWriterOptions options, Stream stream = *) +cpdef void write_json(JsonWriterOptions options, object stream = *) cpdef bool is_supported_write_json(DataType type) cpdef tuple chunked_read_json( JsonReaderOptions options, int chunk_size= *, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) diff --git a/python/pylibcudf/pylibcudf/io/json.pyi b/python/pylibcudf/pylibcudf/io/json.pyi index f19da874a0d..a03d8ef407c 100644 --- a/python/pylibcudf/pylibcudf/io/json.pyi +++ b/python/pylibcudf/pylibcudf/io/json.pyi @@ -4,7 +4,6 @@ from collections.abc import Mapping from typing import Self, TypeAlias from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.io.types import ( @@ -17,6 +16,7 @@ from pylibcudf.io.types import ( from pylibcudf.scalar import Scalar from pylibcudf.table import Table from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike ChildNameToTypeMap: TypeAlias = Mapping[str, ChildNameToTypeMap] @@ -73,7 +73,7 @@ class JsonReaderOptionsBuilder: def read_json( options: JsonReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> TableWithMetadata: ... def read_json_from_string_column( @@ -83,7 +83,7 @@ def read_json_from_string_column( dtypes: list | None = None, compression: CompressionType = CompressionType.NONE, recovery_mode: JSONRecoveryMode = JSONRecoveryMode.RECOVER_WITH_NULL, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> TableWithMetadata: ... @@ -105,12 +105,12 @@ class JsonWriterOptionsBuilder: def build(self) -> JsonWriterOptions: ... def write_json( - options: JsonWriterOptions, stream: Stream | None = None + options: JsonWriterOptions, stream: CudaStreamLike | None = None ) -> None: ... def chunked_read_json( options: JsonReaderOptions, chunk_size: int = 100_000_000, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[list[Column], list[str], ChildNameToTypeMap]: ... def is_supported_write_json(type: DataType) -> bool: ... diff --git a/python/pylibcudf/pylibcudf/io/json.pyx b/python/pylibcudf/pylibcudf/io/json.pyx index aa66c6fe5c2..1bce364fdd8 100644 --- a/python/pylibcudf/pylibcudf/io/json.pyx +++ b/python/pylibcudf/pylibcudf/io/json.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool from libcpp.map cimport map @@ -49,6 +49,7 @@ from pylibcudf.utils cimport _get_stream from cython.operator import dereference from rmm.pylibrmm.device_buffer cimport DeviceBuffer +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "chunked_read_json", @@ -704,7 +705,7 @@ cdef class JsonReaderOptionsBuilder: cpdef tuple chunked_read_json( JsonReaderOptions options, int chunk_size=100_000_000, - Stream stream = None, + object stream = None, DeviceMemoryResource mr = None, ): """ @@ -735,6 +736,7 @@ cpdef tuple chunked_read_json( child_names = None i = 0 cdef Stream s = _get_stream(stream) + cdef cudaStream_t _cs = s.view().value() mr = _get_memory_resource(mr) while True: options.enable_lines(True) @@ -743,7 +745,7 @@ cpdef tuple chunked_read_json( try: with nogil: - c_result = move(cpp_read_json(options.c_obj, s.view(), mr.get_mr())) + c_result = move(cpp_read_json(options.c_obj, _cs, mr.get_mr())) except (ValueError, OverflowError): break if meta_names is None: @@ -772,7 +774,7 @@ cpdef tuple chunked_read_json( cpdef TableWithMetadata read_json( JsonReaderOptions options, - Stream stream = None, + object stream = None, DeviceMemoryResource mr = None ): """ @@ -797,9 +799,10 @@ cpdef TableWithMetadata read_json( """ cdef table_with_metadata c_result cdef Stream s = _get_stream(stream) + cdef cudaStream_t _cs = s.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = move(cpp_read_json(options.c_obj, s.view(), mr.get_mr())) + c_result = move(cpp_read_json(options.c_obj, _cs, mr.get_mr())) return TableWithMetadata.from_libcudf(c_result, s, mr) @@ -810,7 +813,7 @@ cpdef TableWithMetadata read_json_from_string_column( list dtypes = None, compression_type compression = compression_type.NONE, json_recovery_mode_t recovery_mode = json_recovery_mode_t.RECOVER_WITH_NULL, - Stream stream = None, + object stream = None, DeviceMemoryResource mr = None ): """ @@ -852,7 +855,8 @@ cpdef TableWithMetadata read_json_from_string_column( cdef unique_ptr[column] c_join_string_column cdef column_contents c_contents cdef table_with_metadata c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) # Join the string column into a single string @@ -862,7 +866,7 @@ cpdef TableWithMetadata read_json_from_string_column( input.view(), dereference(c_separator), dereference(c_narep), - stream.view(), + _cs, mr.get_mr() ) ) @@ -870,7 +874,7 @@ cpdef TableWithMetadata read_json_from_string_column( # Create a new source from the joined string data cdef SourceInfo joined_source = SourceInfo( - [DeviceBuffer.c_from_unique_ptr(move(c_contents.data), stream, mr)]) + [DeviceBuffer.c_from_unique_ptr(move(c_contents.data), _stream, mr)]) # Create new options using the joined string as source cdef JsonReaderOptions options = ( @@ -886,9 +890,9 @@ cpdef TableWithMetadata read_json_from_string_column( # Read JSON from the joined string with nogil: - c_result = move(cpp_read_json(options.c_obj, stream.view(), mr.get_mr())) + c_result = move(cpp_read_json(options.c_obj, _cs, mr.get_mr())) - return TableWithMetadata.from_libcudf(c_result, stream, mr) + return TableWithMetadata.from_libcudf(c_result, _stream, mr) cdef class JsonWriterOptions: """ @@ -1090,7 +1094,7 @@ cdef class JsonWriterOptionsBuilder: return json_options -cpdef void write_json(JsonWriterOptions options, Stream stream = None): +cpdef void write_json(JsonWriterOptions options, object stream = None): """ Writes a set of columns to JSON format. @@ -1106,8 +1110,9 @@ cpdef void write_json(JsonWriterOptions options, Stream stream = None): None """ cdef Stream s = _get_stream(stream) + cdef cudaStream_t _cs = s.view().value() with nogil: - cpp_write_json(options.c_obj, s.view()) + cpp_write_json(options.c_obj, _cs) cpdef bool is_supported_write_json(DataType type): """Check if the dtype is supported for JSON writing diff --git a/python/pylibcudf/pylibcudf/io/orc.pxd b/python/pylibcudf/pylibcudf/io/orc.pxd index 24221163917..72ad5aac534 100644 --- a/python/pylibcudf/pylibcudf/io/orc.pxd +++ b/python/pylibcudf/pylibcudf/io/orc.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libc.stdint cimport uint64_t, int64_t @@ -9,7 +9,6 @@ from libcpp.optional cimport optional from libcpp.string cimport string from libcpp.vector cimport vector -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from pylibcudf.io.types cimport ( @@ -65,7 +64,7 @@ cdef class OrcReaderOptionsBuilder: cpdef OrcReaderOptions build(self) cpdef TableWithMetadata read_orc( - OrcReaderOptions options, Stream stream = *, DeviceMemoryResource mr=* + OrcReaderOptions options, object stream = *, DeviceMemoryResource mr=* ) cdef class OrcColumnStatistics: @@ -89,7 +88,7 @@ cdef class ParsedOrcStatistics: cpdef ParsedOrcStatistics read_parsed_orc_statistics( SourceInfo source_info, - Stream stream=* + object stream = * ) cdef class OrcWriterOptions: @@ -110,7 +109,7 @@ cdef class OrcWriterOptionsBuilder: cpdef OrcWriterOptionsBuilder metadata(self, TableInputMetadata meta) cpdef OrcWriterOptions build(self) -cpdef void write_orc(OrcWriterOptions options, Stream stream = *) +cpdef void write_orc(OrcWriterOptions options, object stream = *) cdef class OrcChunkedWriter: cdef unique_ptr[orc_chunked_writer] c_obj diff --git a/python/pylibcudf/pylibcudf/io/orc.pyi b/python/pylibcudf/pylibcudf/io/orc.pyi index dcf2b731bac..3cb6daff240 100644 --- a/python/pylibcudf/pylibcudf/io/orc.pyi +++ b/python/pylibcudf/pylibcudf/io/orc.pyi @@ -4,7 +4,6 @@ from typing import Any, Self from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.io.types import ( CompressionType, @@ -16,6 +15,7 @@ from pylibcudf.io.types import ( ) from pylibcudf.table import Table from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike class OrcReaderOptions: def set_num_rows(self, nrows: int) -> None: ... @@ -34,7 +34,7 @@ class OrcReaderOptionsBuilder: def read_orc( options: OrcReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> TableWithMetadata: ... @@ -59,7 +59,7 @@ class ParsedOrcStatistics: def read_parsed_orc_statistics( source_info: SourceInfo, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> ParsedOrcStatistics: ... class OrcWriterOptions: @@ -79,7 +79,7 @@ class OrcWriterOptionsBuilder: def build(self) -> OrcWriterOptions: ... def write_orc( - options: OrcWriterOptions, stream: Stream | None = None + options: OrcWriterOptions, stream: CudaStreamLike | None = None ) -> None: ... def is_supported_read_orc(compression: CompressionType) -> bool: ... def is_supported_write_orc(compression: CompressionType) -> bool: ... @@ -90,7 +90,7 @@ class OrcChunkedWriter: def write(self, table: Table) -> None: ... @staticmethod def from_options( - options: ChunkedOrcWriterOptions, stream: Stream | None = None + options: ChunkedOrcWriterOptions, stream: CudaStreamLike | None = None ) -> OrcChunkedWriter: ... class ChunkedOrcWriterOptions: diff --git a/python/pylibcudf/pylibcudf/io/orc.pyx b/python/pylibcudf/pylibcudf/io/orc.pyx index 8c3687ec232..3a2fabc5683 100644 --- a/python/pylibcudf/pylibcudf/io/orc.pyx +++ b/python/pylibcudf/pylibcudf/io/orc.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool from libcpp.string cimport string @@ -8,6 +8,7 @@ from libcpp.vector cimport vector import datetime from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from pylibcudf.io.types cimport SourceInfo, TableWithMetadata, SinkInfo @@ -444,7 +445,7 @@ cdef class OrcReaderOptionsBuilder: cpdef TableWithMetadata read_orc( - OrcReaderOptions options, Stream stream = None, DeviceMemoryResource mr=None + OrcReaderOptions options, object stream = None, DeviceMemoryResource mr=None ): """ Read from ORC format. @@ -465,17 +466,17 @@ cpdef TableWithMetadata read_orc( """ cdef table_with_metadata c_result cdef Stream s = _get_stream(stream) + cdef cudaStream_t _cs = s.view().value() mr = _get_memory_resource(mr) - with nogil: - c_result = move(cpp_read_orc(options.c_obj, s.view(), mr.get_mr())) + c_result = move(cpp_read_orc(options.c_obj, _cs, mr.get_mr())) return TableWithMetadata.from_libcudf(c_result, s, mr) cpdef ParsedOrcStatistics read_parsed_orc_statistics( SourceInfo source_info, - Stream stream=None + object stream=None ): """ Read ORC statistics from a source. @@ -494,8 +495,9 @@ cpdef ParsedOrcStatistics read_parsed_orc_statistics( """ cdef Stream s = _get_stream(stream) cdef parsed_orc_statistics parsed + cdef cudaStream_t _cs = s.view().value() with nogil: - parsed = cpp_read_parsed_orc_statistics(source_info.c_obj, s.view()) + parsed = cpp_read_parsed_orc_statistics(source_info.c_obj, _cs) return ParsedOrcStatistics.from_libcudf(parsed) @@ -667,7 +669,7 @@ cdef class OrcWriterOptionsBuilder: return orc_options -cpdef void write_orc(OrcWriterOptions options, Stream stream = None): +cpdef void write_orc(OrcWriterOptions options, object stream = None): """ Write to ORC format. @@ -688,8 +690,9 @@ cpdef void write_orc(OrcWriterOptions options, Stream stream = None): None """ cdef Stream s = _get_stream(stream) + cdef cudaStream_t _cs = s.view().value() with nogil: - cpp_write_orc(move(options.c_obj), s.view()) + cpp_write_orc(move(options.c_obj), _cs) cdef class OrcChunkedWriter: @@ -721,7 +724,7 @@ cdef class OrcChunkedWriter: self.c_obj.get()[0].write(table.view()) @staticmethod - def from_options(ChunkedOrcWriterOptions options, Stream stream = None): + def from_options(ChunkedOrcWriterOptions options, object stream = None): """ Creates a chunked ORC writer from options @@ -740,7 +743,8 @@ cdef class OrcChunkedWriter: OrcChunkedWriter ) cdef Stream s = _get_stream(stream) - orc_writer.c_obj.reset(new orc_chunked_writer(options.c_obj, s.view())) + cdef cudaStream_t _cs = s.view().value() + orc_writer.c_obj.reset(new orc_chunked_writer(options.c_obj, _cs)) return orc_writer diff --git a/python/pylibcudf/pylibcudf/io/parquet.pxd b/python/pylibcudf/pylibcudf/io/parquet.pxd index d9350f77721..c98a90dd692 100644 --- a/python/pylibcudf/pylibcudf/io/parquet.pxd +++ b/python/pylibcudf/pylibcudf/io/parquet.pxd @@ -6,8 +6,8 @@ from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.vector cimport vector -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource +from rmm.pylibrmm.stream cimport Stream from pylibcudf.expressions cimport Expression @@ -74,7 +74,7 @@ cdef class ParquetReaderOptionsBuilder: cdef class ChunkedParquetReader: - cdef readonly Stream stream + cdef Stream _stream cdef DeviceMemoryResource mr cdef unique_ptr[cpp_chunked_parquet_reader] reader @@ -83,7 +83,7 @@ cdef class ChunkedParquetReader: cpdef read_parquet( - ParquetReaderOptions options, Stream stream = *, DeviceMemoryResource mr=* + ParquetReaderOptions options, object stream = *, DeviceMemoryResource mr=* ) @@ -180,7 +180,7 @@ cdef class ParquetWriterOptionsBuilder: cpdef ParquetWriterOptions build(self) -cpdef memoryview write_parquet(ParquetWriterOptions options, Stream stream = *) +cpdef memoryview write_parquet(ParquetWriterOptions options, object stream = *) cpdef bool is_supported_read_parquet(compression_type compression) diff --git a/python/pylibcudf/pylibcudf/io/parquet.pyi b/python/pylibcudf/pylibcudf/io/parquet.pyi index c0c31e22007..f0a092f63e0 100644 --- a/python/pylibcudf/pylibcudf/io/parquet.pyi +++ b/python/pylibcudf/pylibcudf/io/parquet.pyi @@ -5,7 +5,6 @@ from collections.abc import Mapping, Sequence from typing import Self from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.expressions import Expression from pylibcudf.io.types import ( @@ -20,6 +19,7 @@ from pylibcudf.io.types import ( ) from pylibcudf.table import Table from pylibcudf.types import TypeId +from pylibcudf.utils import CudaStreamLike class ParquetReaderOptions: def __init__(self): ... @@ -53,7 +53,7 @@ class ChunkedParquetReader: def __init__( self, options: ParquetReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, chunk_read_limit: int = 0, pass_read_limit: int = 1024000000, ) -> None: ... @@ -62,7 +62,7 @@ class ChunkedParquetReader: def read_parquet( options: ParquetReaderOptions, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> TableWithMetadata: ... @@ -101,7 +101,7 @@ class ParquetWriterOptionsBuilder: def build(self) -> ParquetWriterOptions: ... def write_parquet( - options: ParquetWriterOptions, stream: Stream | None = None + options: ParquetWriterOptions, stream: CudaStreamLike | None = None ) -> memoryview: ... def is_supported_read_parquet(compression: CompressionType) -> bool: ... def is_supported_write_parquet(compression: CompressionType) -> bool: ... @@ -112,7 +112,8 @@ class ChunkedParquetWriter: def write(self, table: Table, partitions_info: object = None) -> None: ... @staticmethod def from_options( - options: ChunkedParquetWriterOptions, stream: Stream | None = None + options: ChunkedParquetWriterOptions, + stream: CudaStreamLike | None = None, ) -> ChunkedParquetWriter: ... class ChunkedParquetWriterOptions: diff --git a/python/pylibcudf/pylibcudf/io/parquet.pyx b/python/pylibcudf/pylibcudf/io/parquet.pyx index c4bad082304..86904513cfa 100644 --- a/python/pylibcudf/pylibcudf/io/parquet.pyx +++ b/python/pylibcudf/pylibcudf/io/parquet.pyx @@ -46,6 +46,7 @@ from pylibcudf.libcudf.io.types cimport ( from pylibcudf.libcudf.types cimport size_type, type_id from pylibcudf.table cimport Table from pylibcudf.utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "ChunkedParquetReader", @@ -507,20 +508,21 @@ cdef class ChunkedParquetReader: def __init__( self, ParquetReaderOptions options, - Stream stream = None, + object stream = None, DeviceMemoryResource mr = None, size_t chunk_read_limit=0, size_t pass_read_limit=1024000000, ): - self.stream = _get_stream(stream) + self._stream = _get_stream(stream) self.mr = _get_memory_resource(mr) + cdef cudaStream_t stream_view = self._stream.view().value() with nogil: self.reader.reset( new cpp_chunked_parquet_reader( chunk_read_limit, pass_read_limit, options.c_obj, - self.stream.view(), + stream_view, self.mr.get_mr() ) ) @@ -560,11 +562,11 @@ cdef class ChunkedParquetReader: with nogil: c_result = move(self.reader.get()[0].read_chunk()) - return TableWithMetadata.from_libcudf(c_result, self.stream, mr) + return TableWithMetadata.from_libcudf(c_result, self._stream, mr) cpdef read_parquet( - ParquetReaderOptions options, Stream stream = None, DeviceMemoryResource mr=None + ParquetReaderOptions options, object stream = None, DeviceMemoryResource mr=None ): """ Read from Parquet format. @@ -584,9 +586,10 @@ cpdef read_parquet( Device memory resource used to allocate the returned table's device memory. """ cdef Stream s = _get_stream(stream) + cdef cudaStream_t _cs = s.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = move(cpp_read_parquet(options.c_obj, s.view(), mr.get_mr())) + c_result = move(cpp_read_parquet(options.c_obj, _cs, mr.get_mr())) return TableWithMetadata.from_libcudf(c_result, s, mr) @@ -640,7 +643,7 @@ cdef class ChunkedParquetWriter: self.c_obj.get()[0].write(table.view(), partitions) @staticmethod - def from_options(ChunkedParquetWriterOptions options, Stream stream = None): + def from_options(ChunkedParquetWriterOptions options, object stream = None): """ Creates a chunked Parquet writer from options @@ -659,8 +662,9 @@ cdef class ChunkedParquetWriter: ChunkedParquetWriter ) cdef Stream s = _get_stream(stream) + cdef cudaStream_t _cs = s.view().value() parquet_writer.c_obj.reset( - new cpp_chunked_parquet_writer(options.c_obj, s.view()) + new cpp_chunked_parquet_writer(options.c_obj, _cs) ) return parquet_writer @@ -1235,7 +1239,7 @@ cdef class ParquetWriterOptionsBuilder: return parquet_options -cpdef memoryview write_parquet(ParquetWriterOptions options, Stream stream = None): +cpdef memoryview write_parquet(ParquetWriterOptions options, object stream = None): """ Writes a set of columns to parquet format. @@ -1255,9 +1259,9 @@ cpdef memoryview write_parquet(ParquetWriterOptions options, Stream stream = Non """ cdef unique_ptr[vector[uint8_t]] c_result cdef Stream s = _get_stream(stream) - + cdef cudaStream_t _cs = s.view().value() with nogil: - c_result = cpp_write_parquet(move(options.c_obj), s.view()) + c_result = cpp_write_parquet(move(options.c_obj), _cs) return memoryview(HostBuffer.from_unique_ptr(move(c_result))) diff --git a/python/pylibcudf/pylibcudf/io/text.pxd b/python/pylibcudf/pylibcudf/io/text.pxd index 7623c8da26b..5276f9ffaba 100644 --- a/python/pylibcudf/pylibcudf/io/text.pxd +++ b/python/pylibcudf/pylibcudf/io/text.pxd @@ -1,10 +1,9 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr from libcpp.string cimport string from pylibcudf.column cimport Column -from pylibcudf.io.types cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from pylibcudf.libcudf.io.text cimport parse_options, data_chunk_source, byte_range_info @@ -23,7 +22,7 @@ cpdef Column multibyte_split( DataChunkSource source, str delimiter, ParseOptions options=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/io/text.pyi b/python/pylibcudf/pylibcudf/io/text.pyi index 66406c94dd2..581e45c3194 100644 --- a/python/pylibcudf/pylibcudf/io/text.pyi +++ b/python/pylibcudf/pylibcudf/io/text.pyi @@ -1,10 +1,10 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike class ByteRangeInfo: def __init__(self, offset: int, size: int) -> None: ... @@ -35,6 +35,6 @@ def multibyte_split( source: DataChunkSource, delimiter: str, options: ParseOptions | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/io/text.pyx b/python/pylibcudf/pylibcudf/io/text.pyx index 9fb220b0a37..be15701a4d8 100644 --- a/python/pylibcudf/pylibcudf/io/text.pyx +++ b/python/pylibcudf/pylibcudf/io/text.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator cimport dereference @@ -9,10 +9,11 @@ from libcpp.utility cimport move from pylibcudf.column cimport Column from pylibcudf.utils cimport _get_stream, _get_memory_resource -from pylibcudf.io.types cimport Stream +from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.io cimport text as cpp_text +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "ByteRangeInfo", @@ -193,7 +194,7 @@ cpdef Column multibyte_split( DataChunkSource source, str delimiter, ParseOptions options=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -224,7 +225,8 @@ cpdef Column multibyte_split( cdef unique_ptr[column] c_result cdef unique_ptr[data_chunk_source] c_source = move(source.c_source) cdef string c_delimiter = delimiter.encode() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if options is None: @@ -237,8 +239,8 @@ cpdef Column multibyte_split( dereference(c_source), c_delimiter, c_options, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/io/timezone.pxd b/python/pylibcudf/pylibcudf/io/timezone.pxd index a2fa33d102d..9a12be928b2 100644 --- a/python/pylibcudf/pylibcudf/io/timezone.pxd +++ b/python/pylibcudf/pylibcudf/io/timezone.pxd @@ -1,11 +1,11 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from ..table cimport Table -from .types cimport Stream + from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource cpdef Table make_timezone_transition_table( - str tzif_dir, str timezone_name, Stream stream=*, DeviceMemoryResource mr=* + str tzif_dir, str timezone_name, object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/io/timezone.pyi b/python/pylibcudf/pylibcudf/io/timezone.pyi index d83f68424b4..f87dda70f70 100644 --- a/python/pylibcudf/pylibcudf/io/timezone.pyi +++ b/python/pylibcudf/pylibcudf/io/timezone.pyi @@ -1,14 +1,14 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike def make_timezone_transition_table( tzif_dir: str, timezone_name: str, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/io/timezone.pyx b/python/pylibcudf/pylibcudf/io/timezone.pyx index 0416df1cf0b..033ed15a1ba 100644 --- a/python/pylibcudf/pylibcudf/io/timezone.pyx +++ b/python/pylibcudf/pylibcudf/io/timezone.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -12,13 +12,14 @@ from pylibcudf.libcudf.table.table cimport table from ..utils cimport _get_stream, _get_memory_resource from ..table cimport Table -from .types cimport Stream +from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["make_timezone_transition_table"] cpdef Table make_timezone_transition_table( - str tzif_dir, str timezone_name, Stream stream=None, DeviceMemoryResource mr=None, + str tzif_dir, str timezone_name, object stream=None, DeviceMemoryResource mr=None, ): """ Creates a transition table to convert ORC timestamps to UTC. @@ -42,15 +43,16 @@ cpdef Table make_timezone_transition_table( cdef unique_ptr[table] c_result cdef string c_tzdir = tzif_dir.encode() cdef string c_tzname = timezone_name.encode() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_make_timezone_transition_table( make_optional[string](c_tzdir), c_tzname, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/io/types.pxd b/python/pylibcudf/pylibcudf/io/types.pxd index db7e2ad95c5..1e52f4faa05 100644 --- a/python/pylibcudf/pylibcudf/io/types.pxd +++ b/python/pylibcudf/pylibcudf/io/types.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libc.stdint cimport uint8_t, int32_t @@ -29,7 +29,6 @@ from pylibcudf.libcudf.utilities.span cimport host_span from pylibcudf.table cimport Table -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource cdef class PartitionInfo: @@ -86,7 +85,7 @@ cdef class TableWithMetadata: @staticmethod cdef TableWithMetadata from_libcudf( - table_with_metadata& tbl, Stream stream, DeviceMemoryResource mr + table_with_metadata& tbl, object stream, DeviceMemoryResource mr ) cdef class SourceInfo: diff --git a/python/pylibcudf/pylibcudf/io/types.pyx b/python/pylibcudf/pylibcudf/io/types.pyx index 1c4a7f49268..27c3bb47caf 100644 --- a/python/pylibcudf/pylibcudf/io/types.pyx +++ b/python/pylibcudf/pylibcudf/io/types.pyx @@ -33,7 +33,6 @@ from pylibcudf.libcudf.utilities.span cimport device_span, host_span from pylibcudf.span import is_span from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream import codecs import errno @@ -396,7 +395,7 @@ cdef class TableWithMetadata: @staticmethod cdef TableWithMetadata from_libcudf( table_with_metadata& tbl_with_meta, - Stream stream, + object stream, DeviceMemoryResource mr ): """Create a Python TableWithMetadata from a libcudf table_with_metadata""" diff --git a/python/pylibcudf/pylibcudf/join.pxd b/python/pylibcudf/pylibcudf/join.pxd index 31a998029e3..f0b69a42621 100644 --- a/python/pylibcudf/pylibcudf/join.pxd +++ b/python/pylibcudf/pylibcudf/join.pxd @@ -1,10 +1,9 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr from pylibcudf.libcudf cimport join as cpp_join from pylibcudf.libcudf.types cimport null_equality -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column @@ -16,7 +15,7 @@ cpdef tuple inner_join( Table left_keys, Table right_keys, null_equality nulls_equal, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -24,7 +23,7 @@ cpdef tuple left_join( Table left_keys, Table right_keys, null_equality nulls_equal, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -32,7 +31,7 @@ cpdef tuple full_join( Table left_keys, Table right_keys, null_equality nulls_equal, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -40,7 +39,7 @@ cpdef Column left_semi_join( Table left_keys, Table right_keys, null_equality nulls_equal, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -48,19 +47,19 @@ cpdef Column left_anti_join( Table left_keys, Table right_keys, null_equality nulls_equal, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Table cross_join( - Table left, Table right, Stream stream=*, DeviceMemoryResource mr=* + Table left, Table right, object stream = *, DeviceMemoryResource mr=* ) cpdef tuple conditional_inner_join( Table left, Table right, Expression binary_predicate, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -68,7 +67,7 @@ cpdef tuple conditional_left_join( Table left, Table right, Expression binary_predicate, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -76,7 +75,7 @@ cpdef tuple conditional_full_join( Table left, Table right, Expression binary_predicate, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -84,7 +83,7 @@ cpdef Column conditional_left_semi_join( Table left, Table right, Expression binary_predicate, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -92,7 +91,7 @@ cpdef Column conditional_left_anti_join( Table left, Table right, Expression binary_predicate, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -103,7 +102,7 @@ cpdef tuple mixed_inner_join( Table right_conditional, Expression binary_predicate, null_equality nulls_equal, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -114,7 +113,7 @@ cpdef tuple mixed_left_join( Table right_conditional, Expression binary_predicate, null_equality nulls_equal, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -125,7 +124,7 @@ cpdef tuple mixed_full_join( Table right_conditional, Expression binary_predicate, null_equality nulls_equal, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -136,7 +135,7 @@ cpdef Column mixed_left_semi_join( Table right_conditional, Expression binary_predicate, null_equality nulls_equal, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -147,7 +146,7 @@ cpdef Column mixed_left_anti_join( Table right_conditional, Expression binary_predicate, null_equality nulls_equal, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/join.pyi b/python/pylibcudf/pylibcudf/join.pyi index 615eb914618..1cf86c7c704 100644 --- a/python/pylibcudf/pylibcudf/join.pyi +++ b/python/pylibcudf/pylibcudf/join.pyi @@ -4,12 +4,12 @@ from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.expressions import Expression from pylibcudf.table import Table from pylibcudf.types import NullEquality +from pylibcudf.utils import CudaStreamLike class SetAsBuildTable(IntEnum): LEFT = ... @@ -19,76 +19,76 @@ def inner_join( left_keys: Table, right_keys: Table, nulls_equal: NullEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Column, Column]: ... def left_join( left_keys: Table, right_keys: Table, nulls_equal: NullEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Column, Column]: ... def full_join( left_keys: Table, right_keys: Table, nulls_equal: NullEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Column, Column]: ... def left_semi_join( left_keys: Table, right_keys: Table, nulls_equal: NullEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def left_anti_join( left_keys: Table, right_keys: Table, nulls_equal: NullEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def cross_join( left: Table, right: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def conditional_inner_join( left: Table, right: Table, binary_predicate: Expression, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Column, Column]: ... def conditional_left_join( left: Table, right: Table, binary_predicate: Expression, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Column, Column]: ... def conditional_full_join( left: Table, right: Table, binary_predicate: Expression, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Column, Column]: ... def conditional_left_semi_join( left: Table, right: Table, binary_predicate: Expression, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def conditional_left_anti_join( left: Table, right: Table, binary_predicate: Expression, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def mixed_inner_join( @@ -98,7 +98,7 @@ def mixed_inner_join( right_conditional: Table, binary_predicate: Expression, nulls_equal: NullEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Column, Column]: ... def mixed_left_join( @@ -108,7 +108,7 @@ def mixed_left_join( right_conditional: Table, binary_predicate: Expression, nulls_equal: NullEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Column, Column]: ... def mixed_full_join( @@ -118,7 +118,7 @@ def mixed_full_join( right_conditional: Table, binary_predicate: Expression, nulls_equal: NullEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Column, Column]: ... def mixed_left_semi_join( @@ -128,7 +128,7 @@ def mixed_left_semi_join( right_conditional: Table, binary_predicate: Expression, nulls_equal: NullEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def mixed_left_anti_join( @@ -138,7 +138,7 @@ def mixed_left_anti_join( right_conditional: Table, binary_predicate: Expression, nulls_equal: NullEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... @@ -148,17 +148,17 @@ class FilteredJoin: build: Table, compare_nulls: NullEquality, load_factor: float = ..., - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> None: ... def semi_join( self, probe: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def anti_join( self, probe: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/join.pyx b/python/pylibcudf/pylibcudf/join.pyx index 61a321b27a8..78a44554dff 100644 --- a/python/pylibcudf/pylibcudf/join.pyx +++ b/python/pylibcudf/pylibcudf/join.pyx @@ -22,6 +22,7 @@ from .table cimport Table from .utils cimport _get_stream, _get_memory_resource from pylibcudf.libcudf.join import set_as_build_table as SetAsBuildTable # no-cython-lint # noqa: F401, deprecated +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "conditional_full_join", @@ -45,9 +46,10 @@ __all__ = [ ] cdef Column _column_from_gather_map( - cpp_join.gather_map_type gather_map, Stream stream, DeviceMemoryResource mr + cpp_join.gather_map_type gather_map, object stream, DeviceMemoryResource mr ): # helper to convert a gather map to a Column + cdef Stream _stream = _get_stream(stream) return Column.from_libcudf( move( make_unique[column]( @@ -55,9 +57,7 @@ cdef Column _column_from_gather_map( device_buffer(), 0 ) - ), - stream, - mr + ), _stream, mr ) @@ -65,7 +65,7 @@ cpdef tuple inner_join( Table left_keys, Table right_keys, null_equality nulls_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform an inner join between two tables. @@ -89,16 +89,21 @@ cpdef tuple inner_join( """ cdef cpp_join.gather_map_pair_type c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_join.inner_join( - left_keys.view(), right_keys.view(), nulls_equal, stream.view(), mr.get_mr() + left_keys.view(), + right_keys.view(), + nulls_equal, + _cs, + mr.get_mr() ) return ( - _column_from_gather_map(move(c_result.first), stream, mr), - _column_from_gather_map(move(c_result.second), stream, mr), + _column_from_gather_map(move(c_result.first), _stream, mr), + _column_from_gather_map(move(c_result.second), _stream, mr), ) @@ -106,7 +111,7 @@ cpdef tuple left_join( Table left_keys, Table right_keys, null_equality nulls_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a left join between two tables. @@ -130,16 +135,21 @@ cpdef tuple left_join( """ cdef cpp_join.gather_map_pair_type c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_join.left_join( - left_keys.view(), right_keys.view(), nulls_equal, stream.view(), mr.get_mr() + left_keys.view(), + right_keys.view(), + nulls_equal, + _cs, + mr.get_mr() ) return ( - _column_from_gather_map(move(c_result.first), stream, mr), - _column_from_gather_map(move(c_result.second), stream, mr), + _column_from_gather_map(move(c_result.first), _stream, mr), + _column_from_gather_map(move(c_result.second), _stream, mr), ) @@ -147,7 +157,7 @@ cpdef tuple full_join( Table left_keys, Table right_keys, null_equality nulls_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a full join between two tables. @@ -171,16 +181,21 @@ cpdef tuple full_join( """ cdef cpp_join.gather_map_pair_type c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_join.full_join( - left_keys.view(), right_keys.view(), nulls_equal, stream.view(), mr.get_mr() + left_keys.view(), + right_keys.view(), + nulls_equal, + _cs, + mr.get_mr() ) return ( - _column_from_gather_map(move(c_result.first), stream, mr), - _column_from_gather_map(move(c_result.second), stream, mr), + _column_from_gather_map(move(c_result.first), _stream, mr), + _column_from_gather_map(move(c_result.second), _stream, mr), ) @@ -188,7 +203,7 @@ cpdef Column left_semi_join( Table left_keys, Table right_keys, null_equality nulls_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a left semi join between two tables. @@ -211,7 +226,8 @@ cpdef Column left_semi_join( """ cdef cpp_join.gather_map_type c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) cdef unique_ptr[cpp_join.filtered_join] join_obj @@ -221,22 +237,22 @@ cpdef Column left_semi_join( new cpp_join.filtered_join( right_keys.view(), nulls_equal, - stream.view() + _cs ) ) c_result = join_obj.get()[0].semi_join( left_keys.view(), - stream.view(), + _cs, mr.get_mr() ) - return _column_from_gather_map(move(c_result), stream, mr) + return _column_from_gather_map(move(c_result), _stream, mr) cpdef Column left_anti_join( Table left_keys, Table right_keys, null_equality nulls_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a left anti join between two tables. @@ -259,7 +275,8 @@ cpdef Column left_anti_join( """ cdef cpp_join.gather_map_type c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) cdef unique_ptr[cpp_join.filtered_join] join_obj @@ -269,19 +286,19 @@ cpdef Column left_anti_join( new cpp_join.filtered_join( right_keys.view(), nulls_equal, - stream.view() + _cs ) ) c_result = join_obj.get()[0].anti_join( left_keys.view(), - stream.view(), + _cs, mr.get_mr() ) - return _column_from_gather_map(move(c_result), stream, mr) + return _column_from_gather_map(move(c_result), _stream, mr) cpdef Table cross_join( - Table left, Table right, Stream stream=None, DeviceMemoryResource mr=None + Table left, Table right, object stream=None, DeviceMemoryResource mr=None ): """Perform a cross join on two tables. @@ -305,21 +322,22 @@ cpdef Table cross_join( """ cdef unique_ptr[table] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: result = cpp_join.cross_join( - left.view(), right.view(), stream.view(), mr.get_mr() + left.view(), right.view(), _cs, mr.get_mr() ) - return Table.from_libcudf(move(result), stream, mr) + return Table.from_libcudf(move(result), _stream, mr) cpdef tuple conditional_inner_join( Table left, Table right, Expression binary_predicate, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a conditional inner join between two tables. @@ -344,7 +362,8 @@ cpdef tuple conditional_inner_join( cdef cpp_join.gather_map_pair_type c_result cdef optional[size_t] output_size - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -353,12 +372,12 @@ cpdef tuple conditional_inner_join( right.view(), dereference(binary_predicate.c_obj.get()), output_size, - stream.view(), + _cs, mr.get_mr() ) return ( - _column_from_gather_map(move(c_result.first), stream, mr), - _column_from_gather_map(move(c_result.second), stream, mr), + _column_from_gather_map(move(c_result.first), _stream, mr), + _column_from_gather_map(move(c_result.second), _stream, mr), ) @@ -366,7 +385,7 @@ cpdef tuple conditional_left_join( Table left, Table right, Expression binary_predicate, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a conditional left join between two tables. @@ -391,7 +410,8 @@ cpdef tuple conditional_left_join( cdef cpp_join.gather_map_pair_type c_result cdef optional[size_t] output_size - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -400,12 +420,12 @@ cpdef tuple conditional_left_join( right.view(), dereference(binary_predicate.c_obj.get()), output_size, - stream.view(), + _cs, mr.get_mr() ) return ( - _column_from_gather_map(move(c_result.first), stream, mr), - _column_from_gather_map(move(c_result.second), stream, mr), + _column_from_gather_map(move(c_result.first), _stream, mr), + _column_from_gather_map(move(c_result.second), _stream, mr), ) @@ -413,7 +433,7 @@ cpdef tuple conditional_full_join( Table left, Table right, Expression binary_predicate, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a conditional full join between two tables. @@ -437,7 +457,8 @@ cpdef tuple conditional_full_join( """ cdef cpp_join.gather_map_pair_type c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -445,12 +466,12 @@ cpdef tuple conditional_full_join( left.view(), right.view(), dereference(binary_predicate.c_obj.get()), - stream.view(), + _cs, mr.get_mr() ) return ( - _column_from_gather_map(move(c_result.first), stream, mr), - _column_from_gather_map(move(c_result.second), stream, mr), + _column_from_gather_map(move(c_result.first), _stream, mr), + _column_from_gather_map(move(c_result.second), _stream, mr), ) @@ -458,7 +479,7 @@ cpdef Column conditional_left_semi_join( Table left, Table right, Expression binary_predicate, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a conditional left semi join between two tables. @@ -482,7 +503,8 @@ cpdef Column conditional_left_semi_join( cdef cpp_join.gather_map_type c_result cdef optional[size_t] output_size - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -491,17 +513,17 @@ cpdef Column conditional_left_semi_join( right.view(), dereference(binary_predicate.c_obj.get()), output_size, - stream.view(), + _cs, mr.get_mr() ) - return _column_from_gather_map(move(c_result), stream, mr) + return _column_from_gather_map(move(c_result), _stream, mr) cpdef Column conditional_left_anti_join( Table left, Table right, Expression binary_predicate, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a conditional left anti join between two tables. @@ -525,7 +547,8 @@ cpdef Column conditional_left_anti_join( cdef cpp_join.gather_map_type c_result cdef optional[size_t] output_size - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -534,10 +557,10 @@ cpdef Column conditional_left_anti_join( right.view(), dereference(binary_predicate.c_obj.get()), output_size, - stream.view(), + _cs, mr.get_mr() ) - return _column_from_gather_map(move(c_result), stream, mr) + return _column_from_gather_map(move(c_result), _stream, mr) cpdef tuple mixed_inner_join( @@ -547,7 +570,7 @@ cpdef tuple mixed_inner_join( Table right_conditional, Expression binary_predicate, null_equality nulls_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a mixed inner join between two tables. @@ -578,7 +601,8 @@ cpdef tuple mixed_inner_join( cdef cpp_join.gather_map_pair_type c_result cdef cpp_join.output_size_data_type empty_optional - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -590,12 +614,12 @@ cpdef tuple mixed_inner_join( dereference(binary_predicate.c_obj.get()), nulls_equal, empty_optional, - stream.view(), + _cs, mr.get_mr() ) return ( - _column_from_gather_map(move(c_result.first), stream, mr), - _column_from_gather_map(move(c_result.second), stream, mr), + _column_from_gather_map(move(c_result.first), _stream, mr), + _column_from_gather_map(move(c_result.second), _stream, mr), ) @@ -606,7 +630,7 @@ cpdef tuple mixed_left_join( Table right_conditional, Expression binary_predicate, null_equality nulls_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a mixed left join between two tables. @@ -637,7 +661,8 @@ cpdef tuple mixed_left_join( cdef cpp_join.gather_map_pair_type c_result cdef cpp_join.output_size_data_type empty_optional - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -649,12 +674,12 @@ cpdef tuple mixed_left_join( dereference(binary_predicate.c_obj.get()), nulls_equal, empty_optional, - stream.view(), + _cs, mr.get_mr() ) return ( - _column_from_gather_map(move(c_result.first), stream, mr), - _column_from_gather_map(move(c_result.second), stream, mr), + _column_from_gather_map(move(c_result.first), _stream, mr), + _column_from_gather_map(move(c_result.second), _stream, mr), ) @@ -665,7 +690,7 @@ cpdef tuple mixed_full_join( Table right_conditional, Expression binary_predicate, null_equality nulls_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a mixed full join between two tables. @@ -696,7 +721,8 @@ cpdef tuple mixed_full_join( cdef cpp_join.gather_map_pair_type c_result cdef cpp_join.output_size_data_type empty_optional - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -708,12 +734,12 @@ cpdef tuple mixed_full_join( dereference(binary_predicate.c_obj.get()), nulls_equal, empty_optional, - stream.view(), + _cs, mr.get_mr() ) return ( - _column_from_gather_map(move(c_result.first), stream, mr), - _column_from_gather_map(move(c_result.second), stream, mr), + _column_from_gather_map(move(c_result.first), _stream, mr), + _column_from_gather_map(move(c_result.second), _stream, mr), ) @@ -724,7 +750,7 @@ cpdef Column mixed_left_semi_join( Table right_conditional, Expression binary_predicate, null_equality nulls_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a mixed left semi join between two tables. @@ -753,7 +779,8 @@ cpdef Column mixed_left_semi_join( """ cdef cpp_join.gather_map_type c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -764,10 +791,10 @@ cpdef Column mixed_left_semi_join( right_conditional.view(), dereference(binary_predicate.c_obj.get()), nulls_equal, - stream.view(), + _cs, mr.get_mr() ) - return _column_from_gather_map(move(c_result), stream, mr) + return _column_from_gather_map(move(c_result), _stream, mr) cpdef Column mixed_left_anti_join( @@ -777,7 +804,7 @@ cpdef Column mixed_left_anti_join( Table right_conditional, Expression binary_predicate, null_equality nulls_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a mixed left anti join between two tables. @@ -806,7 +833,8 @@ cpdef Column mixed_left_anti_join( """ cdef cpp_join.gather_map_type c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -817,10 +845,10 @@ cpdef Column mixed_left_anti_join( right_conditional.view(), dereference(binary_predicate.c_obj.get()), nulls_equal, - stream.view(), + _cs, mr.get_mr() ) - return _column_from_gather_map(move(c_result), stream, mr) + return _column_from_gather_map(move(c_result), _stream, mr) cdef class FilteredJoin: @@ -841,7 +869,7 @@ cdef class FilteredJoin: Table build, null_equality compare_nulls, double load_factor=0.5, - Stream stream=None, + object stream=None, ): """ Construct a filtered hash join object for subsequent probe calls. @@ -858,7 +886,8 @@ cdef class FilteredJoin: stream : Stream, optional CUDA stream used for device memory operations and kernel launches. """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() with nogil: self.c_obj.reset( @@ -866,14 +895,14 @@ cdef class FilteredJoin: build.view(), compare_nulls, load_factor, - stream.view() + _cs ) ) def semi_join( self, Table probe, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -898,21 +927,22 @@ cdef class FilteredJoin: """ cdef cpp_join.gather_map_type c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = self.c_obj.get()[0].semi_join( probe.view(), - stream.view(), + _cs, mr.get_mr() ) - return _column_from_gather_map(move(c_result), stream, mr) + return _column_from_gather_map(move(c_result), _stream, mr) def anti_join( self, Table probe, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -937,13 +967,14 @@ cdef class FilteredJoin: """ cdef cpp_join.gather_map_type c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = self.c_obj.get()[0].anti_join( probe.view(), - stream.view(), + _cs, mr.get_mr() ) - return _column_from_gather_map(move(c_result), stream, mr) + return _column_from_gather_map(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/json.pxd b/python/pylibcudf/pylibcudf/json.pxd index 5489fa26ee8..47cf3b37c63 100644 --- a/python/pylibcudf/pylibcudf/json.pxd +++ b/python/pylibcudf/pylibcudf/json.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column @@ -6,7 +6,6 @@ from pylibcudf.libcudf.json cimport get_json_object_options from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cdef class GetJsonObjectOptions: @@ -17,6 +16,6 @@ cpdef Column get_json_object( Column col, Scalar json_path, GetJsonObjectOptions options=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/json.pyi b/python/pylibcudf/pylibcudf/json.pyi index fa6bb08d510..a60bcb36f26 100644 --- a/python/pylibcudf/pylibcudf/json.pyi +++ b/python/pylibcudf/pylibcudf/json.pyi @@ -1,11 +1,11 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike class GetJsonObjectOptions: def __init__( @@ -26,6 +26,6 @@ def get_json_object( col: Column, json_path: Scalar, options: GetJsonObjectOptions | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/json.pyx b/python/pylibcudf/pylibcudf/json.pyx index b50bd4e7714..a470f6a1cb3 100644 --- a/python/pylibcudf/pylibcudf/json.pyx +++ b/python/pylibcudf/pylibcudf/json.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator cimport dereference @@ -15,6 +15,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["GetJsonObjectOptions", "get_json_object"] @@ -120,7 +121,7 @@ cpdef Column get_json_object( Column col, Scalar json_path, GetJsonObjectOptions options=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -155,7 +156,8 @@ cpdef Column get_json_object( options = GetJsonObjectOptions() cdef cpp_json.get_json_object_options c_options = options.options - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -163,8 +165,8 @@ cpdef Column get_json_object( col.view(), dereference(c_json_path), c_options, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/labeling.pxd b/python/pylibcudf/pylibcudf/labeling.pxd index fc93568ed7c..0d8f02d48ce 100644 --- a/python/pylibcudf/pylibcudf/labeling.pxd +++ b/python/pylibcudf/pylibcudf/labeling.pxd @@ -1,11 +1,10 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool from pylibcudf.libcudf.labeling cimport inclusive from .column cimport Column -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource @@ -15,6 +14,6 @@ cpdef Column label_bins( inclusive left_inclusive, Column right_edges, inclusive right_inclusive, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/labeling.pyi b/python/pylibcudf/pylibcudf/labeling.pyi index e9ff5c97f0b..272edd43f5f 100644 --- a/python/pylibcudf/pylibcudf/labeling.pyi +++ b/python/pylibcudf/pylibcudf/labeling.pyi @@ -1,12 +1,12 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike class Inclusive(IntEnum): YES = ... @@ -18,6 +18,6 @@ def label_bins( left_inclusive: Inclusive, right_edges: Column, right_inclusive: Inclusive, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/labeling.pyx b/python/pylibcudf/pylibcudf/labeling.pyx index 878390543cb..e3a052f7cb8 100644 --- a/python/pylibcudf/pylibcudf/labeling.pyx +++ b/python/pylibcudf/pylibcudf/labeling.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -14,6 +14,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["Inclusive", "label_bins"] @@ -23,7 +24,7 @@ cpdef Column label_bins( inclusive left_inclusive, Column right_edges, inclusive right_inclusive, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Labels elements based on membership in the specified bins. @@ -54,7 +55,8 @@ cpdef Column label_bins( according to the specified bins. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -64,10 +66,10 @@ cpdef Column label_bins( left_inclusive, right_edges.view(), right_inclusive, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) Inclusive.__str__ = Inclusive.__repr__ diff --git a/python/pylibcudf/pylibcudf/libcudf/binaryop.pxd b/python/pylibcudf/pylibcudf/libcudf/binaryop.pxd index 7ec2c6fe31f..303b112f71e 100644 --- a/python/pylibcudf/pylibcudf/libcudf/binaryop.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/binaryop.pxd @@ -10,7 +10,7 @@ from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport scalar from pylibcudf.libcudf.types cimport data_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -57,7 +57,7 @@ cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil: const column_view& rhs, binary_operator op, data_type output_type, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -66,7 +66,7 @@ cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil: const scalar& rhs, binary_operator op, data_type output_type, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -75,7 +75,7 @@ cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil: const column_view& rhs, binary_operator op, data_type output_type, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -84,7 +84,7 @@ cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil: const column_view& rhs, const string& op, data_type output_type, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/column/column.pxd b/python/pylibcudf/pylibcudf/libcudf/column/column.pxd index daefd24fb7b..b22eeb1dd40 100644 --- a/python/pylibcudf/pylibcudf/libcudf/column/column.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/column/column.pxd @@ -11,7 +11,7 @@ from pylibcudf.libcudf.column.column_view cimport ( from pylibcudf.libcudf.types cimport data_type, size_type from rmm.librmm.device_buffer cimport device_buffer -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -25,13 +25,13 @@ cdef extern from "cudf/column/column.hpp" namespace "cudf" nogil: column() except +libcudf_exception_handler column( const column& other, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler column( column_view view, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd b/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd index 5e17d3b89bd..f8cf3b38ccb 100644 --- a/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd @@ -13,7 +13,7 @@ from pylibcudf.libcudf.types cimport ( ) from rmm.librmm.device_buffer cimport device_buffer -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -22,7 +22,7 @@ cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil: data_type type, size_type size, mask_state state, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -31,7 +31,7 @@ cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil: size_type size, device_buffer mask, size_type null_count, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -39,7 +39,7 @@ cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil: data_type type, size_type size, mask_state state, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] make_fixed_point_column( @@ -47,14 +47,14 @@ cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil: size_type size, device_buffer mask, size_type null_count, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] make_timestamp_column( data_type type, size_type size, mask_state state, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] make_timestamp_column( @@ -62,14 +62,14 @@ cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil: size_type size, device_buffer mask, size_type null_count, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] make_duration_column( data_type type, size_type size, mask_state state, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] make_duration_column( @@ -77,14 +77,14 @@ cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil: size_type size, device_buffer mask, size_type null_count, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] make_fixed_width_column( data_type type, size_type size, mask_state state, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] make_fixed_width_column( @@ -92,27 +92,27 @@ cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil: size_type size, device_buffer mask, size_type null_count, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] make_column_from_scalar( const scalar& s, size_type size, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] make_dictionary_from_scalar( const scalar& s, size_type size, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] make_dictionary_column( unique_ptr[column] keys_column, unique_ptr[column] indices_column, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] make_empty_column( diff --git a/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd b/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd index 272f452a0a0..53cadee79c9 100644 --- a/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd @@ -8,7 +8,7 @@ from pylibcudf.libcudf.table.table cimport table, table_view from pylibcudf.libcudf.utilities.span cimport host_span from rmm.librmm.device_buffer cimport device_buffer -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -24,11 +24,11 @@ cdef extern from "cudf/concatenate.hpp" namespace "cudf" nogil: cdef unique_ptr[column] concatenate( const vector[column_view] columns, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[table] concatenate( const vector[table_view] tables, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd b/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd index 9d839835465..dd439d0d01d 100644 --- a/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd @@ -10,7 +10,7 @@ from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport size_type from pylibcudf.libcudf.utilities.span cimport device_span from rmm.librmm.device_buffer cimport device_buffer -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -32,7 +32,7 @@ cdef extern from "cudf/contiguous_split.hpp" namespace "cudf" nogil: unique_ptr[chunked_pack] create( const table_view & input, size_t user_buffer_size, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref temp_mr, ) except +libcudf_exception_handler @@ -43,13 +43,13 @@ cdef extern from "cudf/contiguous_split.hpp" namespace "cudf" nogil: cdef vector[contiguous_split_result] contiguous_split ( table_view input_table, vector[size_type] splits, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef packed_columns pack ( const table_view& input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/copying.pxd b/python/pylibcudf/pylibcudf/libcudf/copying.pxd index 2c3741342e9..36c95fa777c 100644 --- a/python/pylibcudf/pylibcudf/libcudf/copying.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/copying.pxd @@ -17,7 +17,7 @@ from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport size_type from rmm.librmm.device_buffer cimport device_buffer -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref ctypedef const scalar constscalar @@ -31,7 +31,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: const table_view& source_table, const column_view& gather_map, out_of_bounds_policy policy, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -39,7 +39,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: const column_view& input, size_type offset, const scalar& fill_values, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -47,7 +47,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: const table_view& source_table, const column_view& scatter_map, const table_view& target_table, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -55,7 +55,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: const vector[reference_wrapper[constscalar]]& source_scalars, const column_view& indices, const table_view& target, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -71,7 +71,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: cdef unique_ptr[column] allocate_like ( const column_view& input_column, mask_allocation_policy policy, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -79,7 +79,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: const column_view& input_column, size_type size, mask_allocation_policy policy, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -93,7 +93,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: size_type input_begin, size_type input_end, size_type target_begin, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler cdef unique_ptr[column] copy_range ( @@ -102,39 +102,39 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: size_type input_begin, size_type input_end, size_type target_begin, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef vector[column_view] slice ( const column_view& input_column, vector[size_type] indices, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler cdef vector[table_view] slice ( const table_view& input_table, vector[size_type] indices, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler cdef vector[column_view] split ( const column_view& input_column, vector[size_type] splits, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler cdef vector[table_view] split ( const table_view& input_table, vector[size_type] splits, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler cdef unique_ptr[column] copy_if_else ( const column_view& lhs, const column_view& rhs, const column_view& boolean_mask, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -142,7 +142,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: const scalar& lhs, const column_view& rhs, const column_view& boolean_mask, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -150,7 +150,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: const column_view& lhs, const scalar& rhs, const column_view boolean_mask, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -158,7 +158,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: const scalar& lhs, const scalar& rhs, const column_view boolean_mask, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -166,7 +166,7 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: const table_view& input, const table_view& target, const column_view& boolean_mask, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -174,14 +174,14 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: const vector[reference_wrapper[constscalar]]& input, const table_view& target, const column_view& boolean_mask, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[scalar] get_element ( const column_view& input, size_type index, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd index a14932f8910..7db66dc1070 100644 --- a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd @@ -7,7 +7,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport scalar -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -27,7 +27,7 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: cdef unique_ptr[column] extract_datetime_component( const column_view& column, datetime_component component, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -42,54 +42,54 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: cdef unique_ptr[column] ceil_datetimes( const column_view& column, rounding_frequency freq, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] floor_datetimes( const column_view& column, rounding_frequency freq, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] round_datetimes( const column_view& column, rounding_frequency freq, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] add_calendrical_months( const column_view& timestamps, const column_view& months, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] add_calendrical_months( const column_view& timestamps, const scalar& months, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] day_of_year( const column_view& column, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] is_leap_year( const column_view& column, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] last_day_of_month( const column_view& column, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] extract_quarter( const column_view& column, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] days_in_month( const column_view& column, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/detail/utilities/stream_pool.pxd b/python/pylibcudf/pylibcudf/libcudf/detail/utilities/stream_pool.pxd index 7aea4aafcd1..399a868db71 100644 --- a/python/pylibcudf/pylibcudf/libcudf/detail/utilities/stream_pool.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/detail/utilities/stream_pool.pxd @@ -1,14 +1,31 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 +from cuda.bindings.cyruntime cimport cudaStream_t from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.utilities.span cimport host_span -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +ctypedef const cudaStream_t const_cudaStream_t -cdef extern from "cudf/detail/utilities/stream_pool.hpp" namespace "cudf::detail" nogil: - cdef void join_streams( - host_span[const cuda_stream_view] streams, - cuda_stream_view stream +cdef extern from * nogil: + """ + #include + #include + #include + #include + + namespace { + void join_streams_wrapper( + cudf::host_span streams, + cudaStream_t stream + ) { + std::vector stream_views(streams.begin(), streams.end()); + cudf::detail::join_streams(stream_views, stream); + } + } + """ + cdef void join_streams "join_streams_wrapper"( + host_span[const_cudaStream_t] streams, + cudaStream_t stream ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/distinct_count.pxd b/python/pylibcudf/pylibcudf/libcudf/distinct_count.pxd index 5707f34f578..2cbf79c0c17 100644 --- a/python/pylibcudf/pylibcudf/libcudf/distinct_count.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/distinct_count.pxd @@ -9,7 +9,7 @@ from pylibcudf.libcudf.types cimport ( null_policy, size_type, ) -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t cdef extern from "cudf/reduction/distinct_count.hpp" namespace "cudf" nogil: @@ -17,9 +17,9 @@ cdef extern from "cudf/reduction/distinct_count.hpp" namespace "cudf" nogil: column_view column, null_policy null_handling, nan_policy nan_handling, - cuda_stream_view stream) except +libcudf_exception_handler + cudaStream_t stream) except +libcudf_exception_handler cdef size_type distinct_count( table_view source_table, null_equality nulls_equal, - cuda_stream_view stream) except +libcudf_exception_handler + cudaStream_t stream) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/filling.pxd b/python/pylibcudf/pylibcudf/libcudf/filling.pxd index ac969cb8822..e9470a828a7 100644 --- a/python/pylibcudf/pylibcudf/libcudf/filling.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/filling.pxd @@ -12,7 +12,7 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -22,7 +22,7 @@ cdef extern from "cudf/filling.hpp" namespace "cudf" nogil: size_type begin, size_type end, const scalar & value, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -31,20 +31,20 @@ cdef extern from "cudf/filling.hpp" namespace "cudf" nogil: size_type begin, size_type end, const scalar & value, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler cdef unique_ptr[table] repeat( const table_view & input, const column_view & count, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[table] repeat( const table_view & input, size_type count, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -52,7 +52,7 @@ cdef extern from "cudf/filling.hpp" namespace "cudf" nogil: size_type size, const scalar & init, const scalar & step, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -60,6 +60,6 @@ cdef extern from "cudf/filling.hpp" namespace "cudf" nogil: size_type n, const scalar& init, size_type months, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/groupby.pxd b/python/pylibcudf/pylibcudf/libcudf/groupby.pxd index 5ba69a12290..b5ba1031813 100644 --- a/python/pylibcudf/pylibcudf/libcudf/groupby.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/groupby.pxd @@ -24,7 +24,7 @@ from pylibcudf.libcudf.types cimport ( sorted, ) -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref # workaround for https://github.com/cython/cython/issues/3885 @@ -67,7 +67,7 @@ cdef extern from "cudf/groupby.hpp" \ vector[aggregation_result] ] aggregate( const vector[aggregation_request]& requests, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -76,7 +76,7 @@ cdef extern from "cudf/groupby.hpp" \ vector[aggregation_result] ] scan( const vector[scan_request]& requests, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -87,19 +87,19 @@ cdef extern from "cudf/groupby.hpp" \ const table_view values, const vector[size_type] offset, const vector[reference_wrapper[constscalar]] fill_values, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler groups get_groups( table_view values, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler pair[unique_ptr[table], unique_ptr[table]] replace_nulls( const table_view& values, const vector[replace_policy] replace_policy, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/hash.pxd b/python/pylibcudf/pylibcudf/libcudf/hash.pxd index 380afc96c58..9610fa2a09f 100644 --- a/python/pylibcudf/pylibcudf/libcudf/hash.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/hash.pxd @@ -7,7 +7,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -15,64 +15,64 @@ cdef extern from "cudf/hashing.hpp" namespace "cudf::hashing" nogil: cdef unique_ptr[column] murmurhash3_x86_32( const table_view& input, const uint32_t seed, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[table] murmurhash3_x64_128( const table_view& input, const uint64_t seed, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] md5( const table_view& input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] sha1( const table_view& input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] sha224( const table_view& input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] sha256( const table_view& input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] sha384( const table_view& input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] sha512( const table_view& input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] xxhash_32( const table_view& input, const uint32_t seed, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] xxhash_64( const table_view& input, const uint64_t seed, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/interop.pxd b/python/pylibcudf/pylibcudf/libcudf/interop.pxd index b09524a257b..78fc455dd35 100644 --- a/python/pylibcudf/pylibcudf/libcudf/interop.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/interop.pxd @@ -12,7 +12,7 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -40,13 +40,13 @@ cdef extern from "cudf/interop.hpp" namespace "cudf" \ nogil: cdef unique_ptr[table] from_dlpack( const DLManagedTensor* managed_tensor, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler DLManagedTensor* to_dlpack( const table_view& input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -65,18 +65,18 @@ cdef extern from "cudf/interop.hpp" namespace "cudf::interop" \ arrow_column( ArrowSchema&& schema, ArrowArray&& array, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler arrow_column( ArrowSchema&& schema, ArrowDeviceArray&& array, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler arrow_column( ArrowArrayStream&& stream, - cuda_stream_view cuda_stream, + cudaStream_t cuda_stream, device_async_resource_ref mr ) except +libcudf_exception_handler column_view view() except +libcudf_exception_handler @@ -84,13 +84,13 @@ cdef extern from "cudf/interop.hpp" namespace "cudf::interop" \ cdef cppclass arrow_table: arrow_table( ArrowArrayStream&& stream, - cuda_stream_view cuda_stream, + cudaStream_t cuda_stream, device_async_resource_ref mr ) except +libcudf_exception_handler arrow_table( ArrowSchema&& schema, ArrowDeviceArray&& array, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler table_view view() except +libcudf_exception_handler @@ -135,7 +135,7 @@ cdef extern from *: template ArrowArray* to_arrow_host_raw( ViewType const& obj, - rmm::cuda_stream_view stream, + cudaStream_t stream, rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) { ArrowArray *arr = new ArrowArray(); auto device_arr = cudf::to_arrow_host(obj, stream, mr); @@ -175,7 +175,7 @@ cdef extern from *: ArrowDeviceArray* to_arrow_device_raw( ViewType const& obj, PyObject* owner, - rmm::cuda_stream_view stream = cudf::get_default_stream(), + cudaStream_t stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) { auto tmp = cudf::to_arrow_device(obj, stream, mr); @@ -222,11 +222,11 @@ cdef extern from *: ) except +libcudf_exception_handler nogil cdef ArrowArray* to_arrow_host_raw( const table_view& tbl, - cuda_stream_view stream, + cudaStream_t stream, ) except +libcudf_exception_handler nogil cdef ArrowArray* to_arrow_host_raw( const column_view& tbl, - cuda_stream_view stream, + cudaStream_t stream, ) except +libcudf_exception_handler nogil cdef void release_arrow_array_raw( ArrowArray * diff --git a/python/pylibcudf/pylibcudf/libcudf/io/avro.pxd b/python/pylibcudf/pylibcudf/libcudf/io/avro.pxd index ff84ad922fc..521147218bf 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/avro.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/avro.pxd @@ -5,7 +5,7 @@ from libcpp.string cimport string from libcpp.vector cimport vector from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -49,6 +49,6 @@ cdef extern from "cudf/io/avro.hpp" namespace "cudf::io" nogil: cdef cudf_io_types.table_with_metadata read_avro( avro_reader_options &options, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/io/csv.pxd b/python/pylibcudf/pylibcudf/libcudf/io/csv.pxd index 31f626b7d9d..45987fbedcd 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/csv.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/csv.pxd @@ -10,7 +10,7 @@ from libcpp.string cimport string from libcpp.vector cimport vector from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.types cimport data_type, size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref cdef extern from "cudf/io/csv.hpp" \ @@ -263,7 +263,7 @@ cdef extern from "cudf/io/csv.hpp" \ cdef cudf_io_types.table_with_metadata read_csv( csv_reader_options &options, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -337,7 +337,7 @@ cdef extern from "cudf/io/csv.hpp" \ cdef void write_csv( csv_writer_options args, - cuda_stream_view stream, + cudaStream_t stream, ) except +libcudf_exception_handler cdef bool is_supported_write_csv( diff --git a/python/pylibcudf/pylibcudf/libcudf/io/hybrid_scan.pxd b/python/pylibcudf/pylibcudf/libcudf/io/hybrid_scan.pxd index 9f7462f6b86..8578908fc43 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/hybrid_scan.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/hybrid_scan.pxd @@ -15,7 +15,7 @@ from pylibcudf.libcudf.io.text cimport byte_range_info from pylibcudf.libcudf.io.types cimport table_with_metadata from pylibcudf.libcudf.types cimport size_type from pylibcudf.libcudf.utilities.span cimport device_span, host_span -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref ctypedef const uint8_t const_uint8_t @@ -61,7 +61,7 @@ cdef extern from "cudf/io/experimental/hybrid_scan.hpp" \ vector[size_type] filter_row_groups_with_stats( host_span[const_size_type] row_group_indices, const parquet_reader_options& options, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler pair[ @@ -75,20 +75,20 @@ cdef extern from "cudf/io/experimental/hybrid_scan.hpp" \ host_span[const_device_span_const_uint8_t] dictionary_page_data, host_span[const_size_type] row_group_indices, const parquet_reader_options& options, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler vector[size_type] filter_row_groups_with_bloom_filters( host_span[const_device_span_const_uint8_t] bloom_filter_data, host_span[const_size_type] row_group_indices, const parquet_reader_options& options, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler unique_ptr[column] build_row_mask_with_page_index_stats( host_span[const_size_type] row_group_indices, const parquet_reader_options& options, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -103,7 +103,7 @@ cdef extern from "cudf/io/experimental/hybrid_scan.hpp" \ mutable_column_view& row_mask, use_data_page_mask mask_data_pages, const parquet_reader_options& options, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -118,7 +118,7 @@ cdef extern from "cudf/io/experimental/hybrid_scan.hpp" \ const column_view& row_mask, use_data_page_mask mask_data_pages, const parquet_reader_options& options, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -131,7 +131,7 @@ cdef extern from "cudf/io/experimental/hybrid_scan.hpp" \ host_span[const_size_type] row_group_indices, host_span[const_device_span_const_uint8_t] column_chunk_data, const parquet_reader_options& options, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -143,7 +143,7 @@ cdef extern from "cudf/io/experimental/hybrid_scan.hpp" \ use_data_page_mask mask_data_pages, host_span[const_device_span_const_uint8_t] column_chunk_data, const parquet_reader_options& options, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -159,7 +159,7 @@ cdef extern from "cudf/io/experimental/hybrid_scan.hpp" \ use_data_page_mask mask_data_pages, host_span[const_device_span_const_uint8_t] column_chunk_data, const parquet_reader_options& options, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/io/json.pxd b/python/pylibcudf/pylibcudf/libcudf/io/json.pxd index 6d5a506d18a..af3b1e59bd1 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/json.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/json.pxd @@ -11,7 +11,7 @@ from libcpp.string cimport string from libcpp.vector cimport vector from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.types cimport data_type, size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -158,7 +158,7 @@ cdef extern from "cudf/io/json.hpp" namespace "cudf::io" nogil: cdef cudf_io_types.table_with_metadata read_json( json_reader_options &options, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -240,7 +240,7 @@ cdef extern from "cudf/io/json.hpp" namespace "cudf::io" nogil: cdef cudf_io_types.table_with_metadata write_json( json_writer_options &options, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler cdef bool is_supported_write_json( diff --git a/python/pylibcudf/pylibcudf/libcudf/io/orc.pxd b/python/pylibcudf/pylibcudf/libcudf/io/orc.pxd index 0455c0fa1b1..bea5c1e06f0 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/orc.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/orc.pxd @@ -11,7 +11,7 @@ from libcpp.string cimport string from libcpp.vector cimport vector from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.types cimport data_type, size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -80,7 +80,7 @@ cdef extern from "cudf/io/orc.hpp" namespace "cudf::io" nogil: cdef cudf_io_types.table_with_metadata read_orc( orc_reader_options opts, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr, ) except +libcudf_exception_handler @@ -150,7 +150,7 @@ cdef extern from "cudf/io/orc.hpp" namespace "cudf::io" nogil: cdef void write_orc( orc_writer_options options, - cuda_stream_view stream, + cudaStream_t stream, ) except +libcudf_exception_handler cdef bool is_supported_read_orc( @@ -228,7 +228,7 @@ cdef extern from "cudf/io/orc.hpp" namespace "cudf::io" nogil: orc_chunked_writer() except +libcudf_exception_handler orc_chunked_writer( chunked_orc_writer_options args, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler orc_chunked_writer& write( cudf_table_view.table_view table_, diff --git a/python/pylibcudf/pylibcudf/libcudf/io/orc_metadata.pxd b/python/pylibcudf/pylibcudf/libcudf/io/orc_metadata.pxd index e0c67e14e1d..f365a45b34a 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/orc_metadata.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/orc_metadata.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libc.stdint cimport int32_t, int64_t, uint32_t, uint64_t from libcpp cimport bool @@ -8,7 +8,7 @@ from libcpp.vector cimport vector from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.io cimport types as cudf_io_types from pylibcudf.variant cimport monostate, variant -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t cdef extern from "cudf/io/orc_metadata.hpp" \ @@ -71,5 +71,5 @@ cdef extern from "cudf/io/orc_metadata.hpp" \ cdef parsed_orc_statistics read_parsed_orc_statistics( const cudf_io_types.source_info& src_info, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/io/parquet.pxd b/python/pylibcudf/pylibcudf/libcudf/io/parquet.pxd index dc0dff818a3..00b62e55514 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/parquet.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/parquet.pxd @@ -22,7 +22,7 @@ from pylibcudf.libcudf.io.types cimport ( ) from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport data_type, size_type, type_id -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -124,7 +124,7 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: cdef table_with_metadata read_parquet( parquet_reader_options args, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -256,7 +256,7 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: cdef unique_ptr[vector[uint8_t]] write_parquet( parquet_writer_options options, - cuda_stream_view stream, + cudaStream_t stream, ) except +libcudf_exception_handler cdef bool is_supported_read_parquet( @@ -288,7 +288,7 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: chunked_parquet_writer() except +libcudf_exception_handler chunked_parquet_writer( const chunked_parquet_writer_options& args, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler chunked_parquet_writer& write( const table_view& table_, @@ -303,14 +303,14 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: chunked_parquet_reader( size_t chunk_read_limit, const parquet_reader_options& options, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler chunked_parquet_reader( size_t chunk_read_limit, size_t pass_read_limit, const parquet_reader_options& options, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler bool has_next() except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/io/text.pxd b/python/pylibcudf/pylibcudf/libcudf/io/text.pxd index 77552a80cfd..7152e5d0afb 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/text.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/text.pxd @@ -6,7 +6,7 @@ from libcpp.memory cimport unique_ptr from libcpp.string cimport string from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -63,6 +63,6 @@ cdef extern from "cudf/io/text/multibyte_split.hpp" \ data_chunk_source source, string delimiter, parse_options options, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/io/timezone.pxd b/python/pylibcudf/pylibcudf/libcudf/io/timezone.pxd index 557e8856b28..45cfb4f15da 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/timezone.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/io/timezone.pxd @@ -6,7 +6,7 @@ from libcpp.optional cimport optional from libcpp.string cimport string from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.table.table cimport table -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -14,6 +14,6 @@ cdef extern from "cudf/timezone.hpp" namespace "cudf" nogil: unique_ptr[table] make_timezone_transition_table( optional[string] tzif_dir, string timezone_name, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/join.pxd b/python/pylibcudf/pylibcudf/libcudf/join.pxd index 06a7d497ad5..d13bf245119 100644 --- a/python/pylibcudf/pylibcudf/libcudf/join.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/join.pxd @@ -13,7 +13,7 @@ from pylibcudf.libcudf.expressions cimport expression from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport null_equality, size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref from rmm.librmm.device_uvector cimport device_uvector @@ -28,7 +28,7 @@ cdef extern from "cudf/join/join.hpp" namespace "cudf" nogil: const table_view left_keys, const table_view right_keys, null_equality nulls_equal, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -36,7 +36,7 @@ cdef extern from "cudf/join/join.hpp" namespace "cudf" nogil: const table_view left_keys, const table_view right_keys, null_equality nulls_equal, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -44,7 +44,7 @@ cdef extern from "cudf/join/join.hpp" namespace "cudf" nogil: const table_view left_keys, const table_view right_keys, null_equality nulls_equal, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -52,7 +52,7 @@ cdef extern from "cudf/join/join.hpp" namespace "cudf" nogil: const table_view left_keys, const table_view right_keys, null_equality nulls_equal, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -60,7 +60,7 @@ cdef extern from "cudf/join/join.hpp" namespace "cudf" nogil: const table_view left_keys, const table_view right_keys, null_equality nulls_equal, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -68,14 +68,14 @@ cdef extern from "cudf/join/join.hpp" namespace "cudf" nogil: const table_view left_keys, const table_view right_keys, null_equality nulls_equal, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[table] cross_join( const table_view left, const table_view right, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -84,7 +84,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil: const table_view left, const table_view right, const expression binary_predicate, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -93,7 +93,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil: const table_view right, const expression binary_predicate, optional[size_t] output_size, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -101,7 +101,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil: const table_view left, const table_view right, const expression binary_predicate, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -110,7 +110,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil: const table_view right, const expression binary_predicate, optional[size_t] output_size, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -118,7 +118,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil: const table_view left, const table_view right, const expression binary_predicate, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -126,7 +126,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil: const table_view left, const table_view right, const expression binary_predicate, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -135,7 +135,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil: const table_view right, const expression binary_predicate, optional[size_t] output_size, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -143,7 +143,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil: const table_view left, const table_view right, const expression binary_predicate, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -152,7 +152,7 @@ cdef extern from "cudf/join/conditional_join.hpp" namespace "cudf" nogil: const table_view right, const expression binary_predicate, optional[size_t] output_size, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -165,7 +165,7 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil: const expression binary_predicate, null_equality compare_nulls, output_size_data_type output_size_data, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -177,7 +177,7 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil: const expression binary_predicate, null_equality compare_nulls, output_size_data_type output_size_data, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -189,7 +189,7 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil: const expression binary_predicate, null_equality compare_nulls, output_size_data_type output_size_data, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -200,7 +200,7 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil: const table_view right_conditional, const expression binary_predicate, null_equality compare_nulls, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -211,7 +211,7 @@ cdef extern from "cudf/join/mixed_join.hpp" namespace "cudf" nogil: const table_view right_conditional, const expression binary_predicate, null_equality compare_nulls, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -225,21 +225,21 @@ cdef extern from "cudf/join/filtered_join.hpp" namespace "cudf" nogil: filtered_join( const table_view build, null_equality compare_nulls, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler filtered_join( const table_view build, null_equality compare_nulls, double load_factor, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler gather_map_type semi_join( const table_view probe, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler gather_map_type anti_join( const table_view probe, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/json.pxd b/python/pylibcudf/pylibcudf/libcudf/json.pxd index 39899490cac..bb606b86b33 100644 --- a/python/pylibcudf/pylibcudf/libcudf/json.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/json.pxd @@ -8,7 +8,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport scalar, string_scalar -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -30,6 +30,6 @@ cdef extern from "cudf/json/json.hpp" namespace "cudf" nogil: column_view col, string_scalar json_path, get_json_object_options options, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/labeling.pxd b/python/pylibcudf/pylibcudf/libcudf/labeling.pxd index ad9611511dd..0b2c1651714 100644 --- a/python/pylibcudf/pylibcudf/libcudf/labeling.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/labeling.pxd @@ -6,7 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -21,6 +21,6 @@ cdef extern from "cudf/labeling/label_bins.hpp" namespace "cudf" nogil: inclusive left_inclusive, const column_view &right_edges, inclusive right_inclusive, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd index 66e90dcd66a..310d166df59 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/combine.pxd @@ -7,7 +7,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.table.table_view cimport table_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -21,19 +21,19 @@ cdef extern from "cudf/lists/combine.hpp" namespace \ cdef unique_ptr[column] concatenate_rows( const table_view input_table, concatenate_null_policy null_policy, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] concatenate_list_elements( const table_view input_table, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] concatenate_list_elements( const column_view input_table, concatenate_null_policy null_policy, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd index efb2d760366..3736e42b32d 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/contains.pxd @@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view from pylibcudf.libcudf.scalar.scalar cimport scalar -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -20,20 +20,20 @@ cdef extern from "cudf/lists/contains.hpp" namespace "cudf::lists" nogil: cdef unique_ptr[column] contains( const lists_column_view& lists, const scalar& search_key, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] contains( const lists_column_view& lists, const column_view& search_keys, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] contains_nulls( const lists_column_view& lists, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -41,7 +41,7 @@ cdef extern from "cudf/lists/contains.hpp" namespace "cudf::lists" nogil: const lists_column_view& lists, const scalar& search_key, duplicate_find_option find_option, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -49,6 +49,6 @@ cdef extern from "cudf/lists/contains.hpp" namespace "cudf::lists" nogil: const lists_column_view& lists, const column_view& search_keys, duplicate_find_option find_option, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd index 6203bafdc38..6fa64c8b291 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/count_elements.pxd @@ -4,13 +4,13 @@ from libcpp.memory cimport unique_ptr from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref cdef extern from "cudf/lists/count_elements.hpp" namespace "cudf::lists" nogil: cdef unique_ptr[column] count_elements( const lists_column_view&, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/explode.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/explode.pxd index b31d3a7cdca..fa15fb1eeef 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/explode.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/explode.pxd @@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -13,6 +13,6 @@ cdef extern from "cudf/lists/explode.hpp" namespace "cudf" nogil: cdef unique_ptr[table] explode_outer( const table_view, size_type explode_column_idx, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/extract.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/extract.pxd index c82a9029311..66a07f41e38 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/extract.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/extract.pxd @@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column, column_view from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -13,12 +13,12 @@ cdef extern from "cudf/lists/extract.hpp" namespace "cudf::lists" nogil: cdef unique_ptr[column] extract_list_element( const lists_column_view&, size_type, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] extract_list_element( const lists_column_view&, const column_view&, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd index 11cc19b86f9..1e55916d299 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/filling.pxd @@ -4,7 +4,7 @@ from libcpp.memory cimport unique_ptr from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -12,7 +12,7 @@ cdef extern from "cudf/lists/filling.hpp" namespace "cudf::lists" nogil: cdef unique_ptr[column] sequences( const column_view& starts, const column_view& sizes, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -20,6 +20,6 @@ cdef extern from "cudf/lists/filling.hpp" namespace "cudf::lists" nogil: const column_view& starts, const column_view& steps, const column_view& sizes, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/gather.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/gather.pxd index bae67a96b0d..b7212bea51e 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/gather.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/gather.pxd @@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.copying cimport out_of_bounds_policy from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref cdef extern from "cudf/lists/gather.hpp" namespace "cudf::lists" nogil: @@ -13,6 +13,6 @@ cdef extern from "cudf/lists/gather.hpp" namespace "cudf::lists" nogil: const lists_column_view& source_column, const lists_column_view& gather_map_list, out_of_bounds_policy bounds_policy, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/lists_column_view.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/lists_column_view.pxd index fe1630c1728..69a6c80f242 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/lists_column_view.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/lists_column_view.pxd @@ -1,6 +1,6 @@ # SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column_view cimport ( @@ -26,7 +26,7 @@ cdef extern from "cudf/lists/lists_column_view.hpp" namespace "cudf" nogil: column_view offsets() except +libcudf_exception_handler column_view child() except +libcudf_exception_handler column_view get_sliced_child( - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler cdef enum: diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/reverse.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/reverse.pxd index f831024ec82..e60c8acbb38 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/reverse.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/reverse.pxd @@ -4,13 +4,13 @@ from libcpp.memory cimport unique_ptr from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref cdef extern from "cudf/lists/reverse.hpp" namespace "cudf::lists" nogil: cdef unique_ptr[column] reverse( const lists_column_view& lists_column, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd index 5e02d11d95a..b56caa9adb5 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/set_operations.pxd @@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view from pylibcudf.libcudf.types cimport nan_equality, null_equality -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -15,7 +15,7 @@ cdef extern from "cudf/lists/set_operations.hpp" namespace "cudf::lists" nogil: const lists_column_view& rhs, null_equality nulls_equal, nan_equality nans_equal, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -24,7 +24,7 @@ cdef extern from "cudf/lists/set_operations.hpp" namespace "cudf::lists" nogil: const lists_column_view& rhs, null_equality nulls_equal, nan_equality nans_equal, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -33,7 +33,7 @@ cdef extern from "cudf/lists/set_operations.hpp" namespace "cudf::lists" nogil: const lists_column_view& rhs, null_equality nulls_equal, nan_equality nans_equal, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -42,6 +42,6 @@ cdef extern from "cudf/lists/set_operations.hpp" namespace "cudf::lists" nogil: const lists_column_view& rhs, null_equality nulls_equal, nan_equality nans_equal, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd index 4036ccec6c5..9899591d6d1 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/sorting.pxd @@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view from pylibcudf.libcudf.types cimport null_order, order -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -14,7 +14,7 @@ cdef extern from "cudf/lists/sorting.hpp" namespace "cudf::lists" nogil: const lists_column_view source_column, order column_order, null_order null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -22,6 +22,6 @@ cdef extern from "cudf/lists/sorting.hpp" namespace "cudf::lists" nogil: const lists_column_view source_column, order column_order, null_order null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd index dec32027402..0187642e0c7 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd @@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view from pylibcudf.libcudf.stream_compaction cimport duplicate_keep_option from pylibcudf.libcudf.types cimport nan_equality, null_equality -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -15,7 +15,7 @@ cdef extern from "cudf/lists/stream_compaction.hpp" \ cdef unique_ptr[column] apply_boolean_mask( const lists_column_view& lists_column, const lists_column_view& boolean_mask, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -24,6 +24,6 @@ cdef extern from "cudf/lists/stream_compaction.hpp" \ null_equality nulls_equal, nan_equality nans_equal, duplicate_keep_option keep_option, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/merge.pxd b/python/pylibcudf/pylibcudf/libcudf/merge.pxd index 860e4263c1c..f4389ac991a 100644 --- a/python/pylibcudf/pylibcudf/libcudf/merge.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/merge.pxd @@ -7,7 +7,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -17,6 +17,6 @@ cdef extern from "cudf/merge.hpp" namespace "cudf" nogil: vector[libcudf_types.size_type] key_cols, vector[libcudf_types.order] column_order, vector[libcudf_types.null_order] null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/null_mask.pxd b/python/pylibcudf/pylibcudf/libcudf/null_mask.pxd index 1b1b3001981..330c69f0579 100644 --- a/python/pylibcudf/pylibcudf/libcudf/null_mask.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/null_mask.pxd @@ -8,14 +8,14 @@ from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport bitmask_type, mask_state, size_type from rmm.librmm.device_buffer cimport device_buffer -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref cdef extern from "cudf/null_mask.hpp" namespace "cudf" nogil: cdef device_buffer copy_bitmask ( column_view view, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -23,7 +23,7 @@ cdef extern from "cudf/null_mask.hpp" namespace "cudf" nogil: const bitmask_type* null_mask, size_type begin_bit, size_type end_bit, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -35,19 +35,19 @@ cdef extern from "cudf/null_mask.hpp" namespace "cudf" nogil: cdef device_buffer create_null_mask ( size_type size, mask_state state, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef pair[device_buffer, size_type] bitmask_and( table_view view, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) cdef pair[device_buffer, size_type] bitmask_or( table_view view, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) @@ -55,12 +55,12 @@ cdef extern from "cudf/null_mask.hpp" namespace "cudf" nogil: const bitmask_type * bitmask, size_type start, size_type stop, - cuda_stream_view stream + cudaStream_t stream ) cdef size_type index_of_first_set_bit( const bitmask_type * bitmask, size_type start, size_type stop, - cuda_stream_view stream + cudaStream_t stream ) diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/byte_pair_encode.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/byte_pair_encode.pxd index eca30faa630..94a7fe3db9d 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/byte_pair_encode.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/byte_pair_encode.pxd @@ -6,7 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -17,7 +17,7 @@ cdef extern from "nvtext/byte_pair_encoding.hpp" namespace "nvtext" nogil: cdef unique_ptr[bpe_merge_pairs] load_merge_pairs( const column_view &merge_pairs, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -25,6 +25,6 @@ cdef extern from "nvtext/byte_pair_encoding.hpp" namespace "nvtext" nogil: const column_view &strings, const bpe_merge_pairs &merge_pairs, const string_scalar &separator, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/deduplicate.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/deduplicate.pxd index 26e39c963d2..82a8581ea0a 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/deduplicate.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/deduplicate.pxd @@ -7,7 +7,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref from rmm.librmm.device_uvector cimport device_uvector @@ -19,7 +19,7 @@ cdef extern from "nvtext/deduplicate.hpp" namespace "nvtext" nogil: cdef suffix_array_type build_suffix_array( column_view source_strings, size_type min_width, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -27,7 +27,7 @@ cdef extern from "nvtext/deduplicate.hpp" namespace "nvtext" nogil: column_view source_strings, column_view indices, size_type min_width, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -37,6 +37,6 @@ cdef extern from "nvtext/deduplicate.hpp" namespace "nvtext" nogil: column_view input2, column_view indices2, size_type min_width, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/edit_distance.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/edit_distance.pxd index b7f3e97a4b0..f3c10c11abf 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/edit_distance.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/edit_distance.pxd @@ -6,7 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -15,12 +15,12 @@ cdef extern from "nvtext/edit_distance.hpp" namespace "nvtext" nogil: cdef unique_ptr[column] edit_distance( const column_view & strings, const column_view & targets, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] edit_distance_matrix( const column_view & strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/generate_ngrams.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/generate_ngrams.pxd index 43619d356f6..3d97aaf93b1 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/generate_ngrams.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/generate_ngrams.pxd @@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -17,14 +17,14 @@ cdef extern from "nvtext/generate_ngrams.hpp" namespace "nvtext" nogil: const column_view &strings, size_type ngrams, const string_scalar & separator, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] generate_character_ngrams( const column_view &strings, size_type ngrams, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -32,6 +32,6 @@ cdef extern from "nvtext/generate_ngrams.hpp" namespace "nvtext" nogil: const column_view &strings, size_type ngrams, uint32_t seed, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/jaccard.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/jaccard.pxd index de45913fbb5..0a3ba52a3d5 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/jaccard.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/jaccard.pxd @@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -15,6 +15,6 @@ cdef extern from "nvtext/jaccard.hpp" namespace "nvtext" nogil: const column_view &input1, const column_view &input2, size_type width, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd index eaf0b8c63b1..94083fbafd3 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd @@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport numeric_scalar from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -19,7 +19,7 @@ cdef extern from "nvtext/minhash.hpp" namespace "nvtext" nogil: const column_view &a, const column_view &b, const size_type width, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -29,7 +29,7 @@ cdef extern from "nvtext/minhash.hpp" namespace "nvtext" nogil: const column_view &a, const column_view &b, const size_type width, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -39,7 +39,7 @@ cdef extern from "nvtext/minhash.hpp" namespace "nvtext" nogil: const uint32_t seed, const column_view &a, const column_view &b, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -49,6 +49,6 @@ cdef extern from "nvtext/minhash.hpp" namespace "nvtext" nogil: const uint64_t seed, const column_view &a, const column_view &b, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/ngrams_tokenize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/ngrams_tokenize.pxd index 41d153b99a0..6e4cc18e17f 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/ngrams_tokenize.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/ngrams_tokenize.pxd @@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -17,6 +17,6 @@ cdef extern from "nvtext/ngrams_tokenize.hpp" namespace "nvtext" nogil: size_type ngrams, const string_scalar & delimiter, const string_scalar & separator, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/normalize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/normalize.pxd index 25678d12091..0184c1d8785 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/normalize.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/normalize.pxd @@ -5,7 +5,7 @@ from libcpp.memory cimport unique_ptr from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -13,7 +13,7 @@ cdef extern from "nvtext/normalize.hpp" namespace "nvtext" nogil: cdef unique_ptr[column] normalize_spaces( const column_view & strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -23,13 +23,13 @@ cdef extern from "nvtext/normalize.hpp" namespace "nvtext" nogil: cdef unique_ptr[character_normalizer] create_character_normalizer( bool do_lower_case, const column_view & strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] normalize_characters( const column_view & strings, const character_normalizer & normalizer, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/replace.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/replace.pxd index d14ce40b168..628181b3f89 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/replace.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/replace.pxd @@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -17,7 +17,7 @@ cdef extern from "nvtext/replace.hpp" namespace "nvtext" nogil: const column_view & targets, const column_view & replacements, const string_scalar & delimiter, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -26,6 +26,6 @@ cdef extern from "nvtext/replace.hpp" namespace "nvtext" nogil: size_type min_token_length, const string_scalar & replacement, const string_scalar & delimiter, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd index e6e2866008b..2088440749a 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/stemmer.pxd @@ -7,7 +7,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -18,7 +18,7 @@ cdef extern from "nvtext/stemmer.hpp" namespace "nvtext" nogil: cdef unique_ptr[column] porter_stemmer_measure( const column_view & strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -26,12 +26,12 @@ cdef extern from "nvtext/stemmer.hpp" namespace "nvtext" nogil: column_view source_strings, letter_type ltype, size_type character_index, - cuda_stream_view stream) except +libcudf_exception_handler + cudaStream_t stream) except +libcudf_exception_handler cdef unique_ptr[column] is_letter( column_view source_strings, letter_type ltype, column_view indices, - cuda_stream_view stream) except +libcudf_exception_handler + cudaStream_t stream) except +libcudf_exception_handler ctypedef int32_t underlying_type_t_letter_type diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/tokenize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/tokenize.pxd index 3b7ae2e9b6f..1c6eccb0476 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/tokenize.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/tokenize.pxd @@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -15,34 +15,34 @@ cdef extern from "nvtext/tokenize.hpp" namespace "nvtext" nogil: cdef unique_ptr[column] tokenize( const column_view & strings, const string_scalar & delimiter, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] tokenize( const column_view & strings, const column_view & delimiters, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] count_tokens( const column_view & strings, const string_scalar & delimiter, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] count_tokens( const column_view & strings, const column_view & delimiters, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] character_tokenize( const column_view & strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -50,7 +50,7 @@ cdef extern from "nvtext/tokenize.hpp" namespace "nvtext" nogil: const column_view & strings, const column_view & row_indices, const string_scalar & separator, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -59,7 +59,7 @@ cdef extern from "nvtext/tokenize.hpp" namespace "nvtext" nogil: cdef unique_ptr[tokenize_vocabulary] load_vocabulary( const column_view & strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -68,6 +68,6 @@ cdef extern from "nvtext/tokenize.hpp" namespace "nvtext" nogil: const tokenize_vocabulary & vocabulary, const string_scalar & delimiter, size_type default_id, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/wordpiece_tokenize.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/wordpiece_tokenize.pxd index a4bcde47f80..0c43f0d21ff 100644 --- a/python/pylibcudf/pylibcudf/libcudf/nvtext/wordpiece_tokenize.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/wordpiece_tokenize.pxd @@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -16,7 +16,7 @@ cdef extern from "nvtext/wordpiece_tokenize.hpp" namespace "nvtext" nogil: cdef unique_ptr[wordpiece_vocabulary] load_wordpiece_vocabulary( const column_view & strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -24,6 +24,6 @@ cdef extern from "nvtext/wordpiece_tokenize.hpp" namespace "nvtext" nogil: const column_view & strings, const wordpiece_vocabulary & vocabulary, size_type max_tokens_per_row, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd b/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd index e7c0f496de8..2e0c978f77d 100644 --- a/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd @@ -11,7 +11,7 @@ from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.hash cimport DEFAULT_HASH_SEED from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref cdef extern from "cudf/partitioning.hpp" namespace "cudf" nogil: @@ -28,7 +28,7 @@ cdef extern from "cudf/partitioning.hpp" namespace "cudf" nogil: int num_partitions, hash_id hash_function, uint32_t seed, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -39,7 +39,7 @@ cdef extern from "cudf/partitioning.hpp" namespace "cudf" nogil: int num_partitions, hash_id hash_function, uint32_t seed, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -48,7 +48,7 @@ cdef extern from "cudf/partitioning.hpp" namespace "cudf" nogil: const table_view& t, const column_view& partition_map, int num_partitions, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -57,6 +57,6 @@ cdef extern from "cudf/partitioning.hpp" namespace "cudf" nogil: const table_view& input, int num_partitions, int start_partition, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/quantiles.pxd b/python/pylibcudf/pylibcudf/libcudf/quantiles.pxd index 823bd34e4a7..8bc636da998 100644 --- a/python/pylibcudf/pylibcudf/libcudf/quantiles.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/quantiles.pxd @@ -15,7 +15,7 @@ from pylibcudf.libcudf.types cimport ( order_info, sorted, ) -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -27,7 +27,7 @@ cdef extern from "cudf/quantiles.hpp" namespace "cudf" nogil: interpolation interp, column_view ordered_indices, bool exact, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -38,6 +38,6 @@ cdef extern from "cudf/quantiles.hpp" namespace "cudf" nogil: sorted is_input_sorted, vector[order] column_order, vector[null_order] null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/reduce.pxd b/python/pylibcudf/pylibcudf/libcudf/reduce.pxd index 9da4159d0c1..5fb383149a7 100644 --- a/python/pylibcudf/pylibcudf/libcudf/reduce.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/reduce.pxd @@ -11,7 +11,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport scalar from pylibcudf.libcudf.types cimport data_type, null_policy -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref ctypedef const scalar constscalar @@ -22,7 +22,7 @@ cdef extern from "cudf/reduction.hpp" namespace "cudf" nogil: const reduce_aggregation& agg, data_type output_type, optional[reference_wrapper[constscalar]] init, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -35,13 +35,13 @@ cdef extern from "cudf/reduction.hpp" namespace "cudf" nogil: const scan_aggregation& agg, scan_type inclusive, null_policy null_handling, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef pair[unique_ptr[scalar], unique_ptr[scalar]] minmax( const column_view& col, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/replace.pxd b/python/pylibcudf/pylibcudf/libcudf/replace.pxd index 35078b64ee3..4821a13924c 100644 --- a/python/pylibcudf/pylibcudf/libcudf/replace.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/replace.pxd @@ -9,7 +9,7 @@ from pylibcudf.libcudf.column.column_view cimport ( mutable_column_view, ) from pylibcudf.libcudf.scalar.scalar cimport scalar -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -22,47 +22,47 @@ cdef extern from "cudf/replace.hpp" namespace "cudf" nogil: cdef unique_ptr[column] replace_nulls( column_view source_column, column_view replacement_column, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] replace_nulls( column_view source_column, scalar replacement, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] replace_nulls( column_view source_column, replace_policy replace_policy, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] find_and_replace_all( column_view source_column, column_view values_to_replace, column_view replacement_values, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] clamp( column_view source_column, scalar lo, scalar lo_replace, scalar hi, scalar hi_replace, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] clamp( column_view source_column, scalar lo, scalar hi, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] normalize_nans_and_zeros( column_view source_column, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef void normalize_nans_and_zeros( mutable_column_view source_column, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/reshape.pxd b/python/pylibcudf/pylibcudf/libcudf/reshape.pxd index 598e148d643..beda4ec09fc 100644 --- a/python/pylibcudf/pylibcudf/libcudf/reshape.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/reshape.pxd @@ -8,7 +8,7 @@ from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport size_type, data_type from pylibcudf.libcudf.utilities.span cimport device_span -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref cdef extern from "cuda/functional" namespace "cuda::std": @@ -19,17 +19,17 @@ cdef extern from "cuda/functional" namespace "cuda::std": cdef extern from "cudf/reshape.hpp" namespace "cudf" nogil: cdef unique_ptr[column] interleave_columns( table_view source_table, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[table] tile( table_view source_table, size_type count, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef void table_to_array( table_view input_table, device_span[byte] output, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/rolling.pxd b/python/pylibcudf/pylibcudf/libcudf/rolling.pxd index 6ea400f92d3..69cdbd6f396 100644 --- a/python/pylibcudf/pylibcudf/libcudf/rolling.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/rolling.pxd @@ -12,7 +12,7 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport data_type, null_order, order, size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -44,7 +44,7 @@ cdef extern from "cudf/rolling.hpp" namespace "cudf" nogil: range_window_type preceding, range_window_type following, vector[rolling_request]& requests, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -54,7 +54,7 @@ cdef extern from "cudf/rolling.hpp" namespace "cudf" nogil: column_view following_window, size_type min_periods, rolling_aggregation& agg, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] rolling_window( @@ -63,7 +63,7 @@ cdef extern from "cudf/rolling.hpp" namespace "cudf" nogil: size_type following_window, size_type min_periods, rolling_aggregation& agg, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef pair[unique_ptr[column], unique_ptr[column]] make_range_windows( @@ -73,7 +73,7 @@ cdef extern from "cudf/rolling.hpp" namespace "cudf" nogil: null_order null_order, range_window_type preceding, range_window_type following, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/round.pxd b/python/pylibcudf/pylibcudf/libcudf/round.pxd index 39965d025c6..f21987844f3 100644 --- a/python/pylibcudf/pylibcudf/libcudf/round.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/round.pxd @@ -6,7 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -20,7 +20,7 @@ cdef extern from "cudf/round.hpp" namespace "cudf" nogil: const column_view& input, int32_t decimal_places, rounding_method method, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -28,6 +28,6 @@ cdef extern from "cudf/round.hpp" namespace "cudf" nogil: const column_view& input, int32_t decimal_places, rounding_method method, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/scalar/scalar.pxd b/python/pylibcudf/pylibcudf/libcudf/scalar/scalar.pxd index 6c3dc71e019..10d3a42c572 100644 --- a/python/pylibcudf/pylibcudf/libcudf/scalar/scalar.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/scalar/scalar.pxd @@ -8,7 +8,7 @@ from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.fixed_point.fixed_point cimport scale_type from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport data_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -18,31 +18,31 @@ cdef extern from "cudf/scalar/scalar.hpp" namespace "cudf" nogil: scalar(scalar other) except +libcudf_exception_handler data_type type() except +libcudf_exception_handler void set_valid_async( - bool is_valid, cuda_stream_view stream + bool is_valid, cudaStream_t stream ) except +libcudf_exception_handler - bool is_valid(cuda_stream_view stream) except +libcudf_exception_handler + bool is_valid(cudaStream_t stream) except +libcudf_exception_handler cdef cppclass numeric_scalar[T](scalar): void set_value( T value, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler - T value(cuda_stream_view stream) except +libcudf_exception_handler + T value(cudaStream_t stream) except +libcudf_exception_handler cdef cppclass timestamp_scalar[T](scalar): void set_value( T value, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler cdef cppclass duration_scalar[T](scalar): void set_value( T value, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler cdef cppclass string_scalar(scalar): - string to_string(cuda_stream_view stream) except +libcudf_exception_handler + string to_string(cudaStream_t stream) except +libcudf_exception_handler cdef cppclass list_scalar(scalar): pass @@ -57,4 +57,4 @@ cdef extern from "cudf/scalar/scalar.hpp" namespace "cudf" nogil: scale_type scale, bool is_valid ) except +libcudf_exception_handler - T value(cuda_stream_view stream) except +libcudf_exception_handler + T value(cudaStream_t stream) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd b/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd index 6034b2ecc08..6b1329962cd 100644 --- a/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/scalar/scalar_factories.pxd @@ -9,49 +9,49 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar from pylibcudf.libcudf.fixed_point.fixed_point cimport scale_type from pylibcudf.libcudf.types cimport int128 as int128_t -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref cdef extern from "cudf/scalar/scalar_factories.hpp" namespace "cudf" nogil: cdef unique_ptr[scalar] make_string_scalar( const string & _string, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[scalar] make_fixed_width_scalar[T]( T value, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[scalar] make_fixed_point_scalar[T]( int128_t value, scale_type scale, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[scalar] make_numeric_scalar( data_type type_, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[scalar] make_timestamp_scalar( data_type type_, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[scalar] make_empty_scalar_like( const column_view &, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[scalar] make_duration_scalar( data_type type_, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[scalar] make_default_constructed_scalar( data_type type_, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/search.pxd b/python/pylibcudf/pylibcudf/libcudf/search.pxd index b369ec05392..c1e41893d2e 100644 --- a/python/pylibcudf/pylibcudf/libcudf/search.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/search.pxd @@ -7,7 +7,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.table.table_view cimport table_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -18,7 +18,7 @@ cdef extern from "cudf/search.hpp" namespace "cudf" nogil: table_view needles, vector[libcudf_types.order] column_order, vector[libcudf_types.null_order] null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -27,13 +27,13 @@ cdef extern from "cudf/search.hpp" namespace "cudf" nogil: table_view needles, vector[libcudf_types.order] column_order, vector[libcudf_types.null_order] null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] contains( column_view haystack, column_view needles, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/sorting.pxd b/python/pylibcudf/pylibcudf/libcudf/sorting.pxd index 97822e2c374..c8e252ced2c 100644 --- a/python/pylibcudf/pylibcudf/libcudf/sorting.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/sorting.pxd @@ -17,7 +17,7 @@ from pylibcudf.libcudf.types cimport ( null_order, size_type ) -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -26,7 +26,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: table_view source_table, vector[order] column_order, vector[null_order] null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -34,7 +34,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: table_view source_table, vector[order] column_order, vector[null_order] null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -45,7 +45,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: null_policy null_handling, null_order null_precedence, bool percentage, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -53,7 +53,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: const table_view& table, vector[order] column_order, vector[null_order] null_precedence, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler cdef unique_ptr[table] segmented_sort_by_key( @@ -62,7 +62,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: const column_view& segment_offsets, vector[order] column_order, vector[null_order] null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -72,7 +72,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: const column_view& segment_offsets, vector[order] column_order, vector[null_order] null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -81,7 +81,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: const table_view& keys, vector[order] column_order, vector[null_order] null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -90,7 +90,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: const table_view& keys, vector[order] column_order, vector[null_order] null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -98,7 +98,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: table_view source_table, vector[order] column_order, vector[null_order] null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -106,7 +106,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: table_view source_table, vector[order] column_order, vector[null_order] null_precedence, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -114,7 +114,7 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: const column_view& col, size_type k, order sort_order, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -122,6 +122,6 @@ cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil: const column_view& col, size_type k, order sort_order, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd b/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd index 0358aa4068c..9f8686da472 100644 --- a/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd @@ -14,7 +14,7 @@ from pylibcudf.libcudf.types cimport ( null_equality, size_type, ) -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -29,7 +29,7 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil: table_view source_table, vector[size_type] keys, size_type keep_threshold, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -37,14 +37,14 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil: table_view source_table, vector[size_type] keys, size_type keep_threshold, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[table] apply_boolean_mask( table_view source_table, column_view boolean_mask, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -53,7 +53,7 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil: vector[size_type] keys, duplicate_keep_option keep, null_equality nulls_equal, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -63,7 +63,7 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil: duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equals, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -72,7 +72,7 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil: duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -82,7 +82,7 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil: duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -90,6 +90,6 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil: table_view predicate_table, const expression& predicate_expr, table_view filter_table, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/attributes.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/attributes.pxd index 06e95c95870..0cee9e43346 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/attributes.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/attributes.pxd @@ -4,7 +4,7 @@ from libcpp.memory cimport unique_ptr from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -12,15 +12,15 @@ cdef extern from "cudf/strings/attributes.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] count_characters( column_view source_strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] count_bytes( column_view source_strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] code_points( column_view source_strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/capitalize.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/capitalize.pxd index b615cd984db..7b8ac094311 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/capitalize.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/capitalize.pxd @@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.strings.char_types cimport string_character_types -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -14,18 +14,18 @@ cdef extern from "cudf/strings/capitalize.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] capitalize( const column_view & strings, const string_scalar & delimiters, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] title( const column_view & strings, string_character_types sequence_type, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] is_title( const column_view & strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/case.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/case.pxd index 463586d9f37..a056f1b4737 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/case.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/case.pxd @@ -4,22 +4,22 @@ from libcpp.memory cimport unique_ptr from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref cdef extern from "cudf/strings/case.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] to_lower( const column_view & strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] to_upper( const column_view & strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] swapcase( const column_view & strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/char_types.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/char_types.pxd index 7706498eceb..c6af0fb73d2 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/char_types.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/char_types.pxd @@ -6,7 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -29,7 +29,7 @@ cdef extern from "cudf/strings/char_types/char_types.hpp" \ column_view source_strings, string_character_types types, string_character_types verify_types, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] filter_characters_of_type( @@ -37,5 +37,5 @@ cdef extern from "cudf/strings/char_types/char_types.hpp" \ string_character_types types_to_remove, string_scalar replacement, string_character_types types_to_keep, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/combine.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/combine.pxd index ef831d3b167..2e2b6656797 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/combine.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/combine.pxd @@ -8,7 +8,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.table.table_view cimport table_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -27,7 +27,7 @@ cdef extern from "cudf/strings/combine.hpp" namespace "cudf::strings" nogil: string_scalar separator, string_scalar narep, separator_on_nulls separate_nulls, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] concatenate( @@ -36,14 +36,14 @@ cdef extern from "cudf/strings/combine.hpp" namespace "cudf::strings" nogil: string_scalar separator_narep, string_scalar col_narep, separator_on_nulls separate_nulls, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] join_strings( column_view input, string_scalar separator, string_scalar narep, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] join_list_elements( @@ -53,7 +53,7 @@ cdef extern from "cudf/strings/combine.hpp" namespace "cudf::strings" nogil: string_scalar string_narep, separator_on_nulls separate_nulls, output_if_empty_list empty_list_policy, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] join_list_elements( @@ -62,5 +62,5 @@ cdef extern from "cudf/strings/combine.hpp" namespace "cudf::strings" nogil: string_scalar narep, separator_on_nulls separate_nulls, output_if_empty_list empty_list_policy, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/contains.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/contains.pxd index f60782e93b7..cc9a7c6835d 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/contains.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/contains.pxd @@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.strings.regex_program cimport regex_program -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -16,31 +16,31 @@ cdef extern from "cudf/strings/contains.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] contains_re( column_view source_strings, regex_program, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] count_re( column_view source_strings, regex_program, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] matches_re( column_view source_strings, regex_program, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] like( column_view source_strings, string pattern, string escape_character, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] like( column_view source_strings, column_view patterns, string_scalar escape_character, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd index b5b837878f9..8875bc62ed5 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd @@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -15,12 +15,12 @@ cdef extern from "cudf/strings/convert/convert_booleans.hpp" namespace \ cdef unique_ptr[column] to_booleans( column_view input, string_scalar true_string, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] from_booleans( column_view booleans, string_scalar true_string, string_scalar false_string, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd index 5779839a685..92983f9dc49 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd @@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport data_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -17,18 +17,18 @@ cdef extern from "cudf/strings/convert/convert_datetime.hpp" namespace \ column_view input, data_type timestamp_type, string format, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] from_timestamps( column_view timestamps, string format, column_view names, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] is_timestamp( column_view input_col, string format, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd index 2eae8b987b9..4f22b715ef9 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd @@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport data_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -17,11 +17,11 @@ cdef extern from "cudf/strings/convert/convert_durations.hpp" namespace \ const column_view & input, data_type duration_type, const string & format, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] from_durations( const column_view & durations, const string & format, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd index e5f512c331f..8aaa0ebf4c7 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd @@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport data_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -15,17 +15,17 @@ cdef extern from "cudf/strings/convert/convert_fixed_point.hpp" namespace \ cdef unique_ptr[column] to_fixed_point( column_view input, data_type output_type, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] from_fixed_point( column_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] is_fixed_point( column_view input, data_type decimal_type, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd index 4ea1cd527f4..5a111c1979d 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd @@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport data_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -15,16 +15,16 @@ cdef extern from "cudf/strings/convert/convert_floats.hpp" namespace \ cdef unique_ptr[column] to_floats( column_view strings, data_type output_type, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] from_floats( column_view floats, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] is_float( column_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_integers.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_integers.pxd index 306c4b66758..4d3f4ff758a 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_integers.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_integers.pxd @@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport data_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -15,40 +15,40 @@ cdef extern from "cudf/strings/convert/convert_integers.hpp" namespace \ cdef unique_ptr[column] to_integers( column_view input, data_type output_type, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] from_integers( column_view integers, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] is_integer( column_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] is_integer( column_view input, data_type int_type, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] hex_to_integers( column_view input, data_type output_type, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] is_hex( column_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] integers_to_hex( column_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd index d12f3992d85..00a64787957 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd @@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -13,16 +13,16 @@ cdef extern from "cudf/strings/convert/convert_ipv4.hpp" namespace \ "cudf::strings" nogil: cdef unique_ptr[column] ipv4_to_integers( column_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] integers_to_ipv4( column_view integers, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] is_ipv4( column_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_lists.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_lists.pxd index 8ed381e87da..bfae49bae4b 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_lists.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_lists.pxd @@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -17,5 +17,5 @@ cdef extern from "cudf/strings/convert/convert_lists.hpp" namespace \ column_view input, string_scalar na_rep, column_view separators, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd index b20c03f976b..db2d4f4efc0 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd @@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -13,10 +13,10 @@ cdef extern from "cudf/strings/convert/convert_urls.hpp" namespace \ "cudf::strings" nogil: cdef unique_ptr[column] url_encode( column_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] url_decode( column_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/extract.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/extract.pxd index 845de206dbf..d3e0d0fd35a 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/extract.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/extract.pxd @@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.strings.regex_program cimport regex_program from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -16,18 +16,18 @@ cdef extern from "cudf/strings/extract.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[table] extract( column_view input, regex_program prog, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] extract_all_record( column_view input, regex_program prog, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] extract_single( column_view input, regex_program prog, size_type group, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/find.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/find.pxd index b8934aeb7fe..42752152de8 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/find.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/find.pxd @@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -16,37 +16,37 @@ cdef extern from "cudf/strings/find.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] contains( column_view source_strings, string_scalar target, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] contains( column_view source_strings, column_view target_strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] ends_with( column_view source_strings, string_scalar target, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] ends_with( column_view source_strings, column_view target_strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] starts_with( column_view source_strings, string_scalar target, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] starts_with( column_view source_strings, column_view target_strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] find( @@ -54,14 +54,14 @@ cdef extern from "cudf/strings/find.hpp" namespace "cudf::strings" nogil: string_scalar target, size_type start, size_type stop, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] find( column_view source_strings, column_view target, size_type start, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] rfind( @@ -69,5 +69,5 @@ cdef extern from "cudf/strings/find.hpp" namespace "cudf::strings" nogil: string_scalar target, size_type start, size_type stop, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/find_multiple.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/find_multiple.pxd index da751990053..1e42a476c13 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/find_multiple.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/find_multiple.pxd @@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.table.table cimport table -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -15,11 +15,11 @@ cdef extern from "cudf/strings/find_multiple.hpp" namespace "cudf::strings" \ cdef unique_ptr[table] contains_multiple( column_view input, column_view targets, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] find_multiple( column_view input, column_view targets, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd index 02ecbef7095..d72ffd09d8e 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd @@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.strings.regex_program cimport regex_program -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -14,11 +14,11 @@ cdef extern from "cudf/strings/findall.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] findall( column_view input, regex_program prog, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] find_re( column_view input, regex_program prog, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd index 5e3e5c43f61..8b291a22a05 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd @@ -9,7 +9,7 @@ from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.strings.side_type cimport side_type from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -20,17 +20,17 @@ cdef extern from "cudf/strings/padding.hpp" namespace "cudf::strings" nogil: size_type width, side_type side, string fill_char, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] zfill( column_view input, size_type width, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] zfill_by_widths( column_view input, column_view widths, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/repeat.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/repeat.pxd index 05a2954af35..86519de0b90 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/repeat.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/repeat.pxd @@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -16,13 +16,13 @@ cdef extern from "cudf/strings/repeat_strings.hpp" namespace "cudf::strings" \ cdef unique_ptr[column] repeat_strings( column_view input, size_type repeat_times, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] repeat_strings( column_view input, column_view repeat_times, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/replace.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/replace.pxd index 263b91475b8..cf2573af5ed 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/replace.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/replace.pxd @@ -8,7 +8,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -18,7 +18,7 @@ cdef extern from "cudf/strings/replace.hpp" namespace "cudf::strings" nogil: string_scalar repl, size_type start, size_type stop, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] replace( @@ -26,12 +26,12 @@ cdef extern from "cudf/strings/replace.hpp" namespace "cudf::strings" nogil: string_scalar target, string_scalar repl, int32_t maxrepl, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] replace_multiple( column_view source_strings, column_view target_strings, column_view repl_strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd index 5f5cbaeaf55..d3e958841ab 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/replace_re.pxd @@ -11,7 +11,7 @@ from pylibcudf.libcudf.strings.regex_flags cimport regex_flags from pylibcudf.libcudf.strings.regex_program cimport regex_program from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -22,7 +22,7 @@ cdef extern from "cudf/strings/replace_re.hpp" namespace "cudf::strings" nogil: regex_program prog, string_scalar replacement, size_type max_replace_count, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] replace_re( @@ -30,12 +30,12 @@ cdef extern from "cudf/strings/replace_re.hpp" namespace "cudf::strings" nogil: vector[string] patterns, column_view replacements, regex_flags flags, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] replace_with_backrefs( column_view input, regex_program prog, string replacement, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/reverse.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/reverse.pxd index 6e6fc2acac4..39a3ac4b769 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/reverse.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/reverse.pxd @@ -4,12 +4,12 @@ from libcpp.memory cimport unique_ptr from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref cdef extern from "cudf/strings/reverse.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] reverse( column_view source_strings, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/split/partition.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/split/partition.pxd index 0c99455ea33..6c9031482ca 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/split/partition.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/split/partition.pxd @@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.table.table cimport table -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -17,11 +17,11 @@ cdef extern from "cudf/strings/split/partition.hpp" namespace \ cdef unique_ptr[table] partition( column_view input, string_scalar delimiter, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[table] rpartition( column_view input, string_scalar delimiter, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/split/split.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/split/split.pxd index 9ed741b608a..5d14fefdb1b 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/split/split.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/split/split.pxd @@ -9,7 +9,7 @@ from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.strings.regex_program cimport regex_program from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -20,35 +20,35 @@ cdef extern from "cudf/strings/split/split.hpp" namespace \ column_view strings_column, string_scalar delimiter, size_type maxsplit, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[table] rsplit( column_view strings_column, string_scalar delimiter, size_type maxsplit, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] split_record( column_view strings, string_scalar delimiter, size_type maxsplit, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] rsplit_record( column_view strings, string_scalar delimiter, size_type maxsplit, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] split_part( column_view strings, string_scalar delimiter, size_type index, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler @@ -59,26 +59,26 @@ cdef extern from "cudf/strings/split/split_re.hpp" namespace \ const column_view& input, regex_program prog, size_type maxsplit, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[table] rsplit_re( const column_view& input, regex_program prog, size_type maxsplit, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] split_record_re( const column_view& input, regex_program prog, size_type maxsplit, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef unique_ptr[column] rsplit_record_re( const column_view& input, regex_program prog, size_type maxsplit, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/strings_column_view.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/strings_column_view.pxd index 8c72fed7219..5fa0dfb4289 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/strings_column_view.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/strings_column_view.pxd @@ -1,13 +1,13 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libc.stdint cimport int64_t from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column_view cimport column_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t cdef extern from "cudf/strings/strings_column_view.hpp" namespace "cudf" nogil: cdef cppclass strings_column_view: strings_column_view(column_view) except +libcudf_exception_handler - int64_t chars_size(cuda_stream_view) except +libcudf_exception_handler + int64_t chars_size(cudaStream_t) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd index 13e017c33f7..4d56b2de5d3 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd @@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.strings.side_type cimport side_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -16,5 +16,5 @@ cdef extern from "cudf/strings/strip.hpp" namespace "cudf::strings" nogil: column_view input, side_type side, string_scalar to_strip, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/substring.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/substring.pxd index 21c2fe4a77b..d0b4f192307 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/substring.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/substring.pxd @@ -7,7 +7,7 @@ from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport numeric_scalar from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -17,7 +17,7 @@ cdef extern from "cudf/strings/slice.hpp" namespace "cudf::strings" nogil: numeric_scalar[size_type] start, numeric_scalar[size_type] end, numeric_scalar[size_type] step, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -25,6 +25,6 @@ cdef extern from "cudf/strings/slice.hpp" namespace "cudf::strings" nogil: column_view source_strings, column_view starts, column_view stops, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/translate.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/translate.pxd index 9bdc0489a89..dcf5aa20948 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/translate.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/translate.pxd @@ -9,7 +9,7 @@ from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.scalar.scalar cimport string_scalar from pylibcudf.libcudf.types cimport char_utf8 -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -18,7 +18,7 @@ cdef extern from "cudf/strings/translate.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] translate( column_view input, vector[pair[char_utf8, char_utf8]] chars_table, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -31,5 +31,5 @@ cdef extern from "cudf/strings/translate.hpp" namespace "cudf::strings" nogil: vector[pair[char_utf8, char_utf8]] characters_to_filter, filter_type keep_characters, string_scalar replacement, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd index 8aa5631a12e..2ddd924df48 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd @@ -5,7 +5,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -14,5 +14,5 @@ cdef extern from "cudf/strings/wrap.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] wrap( column_view input, size_type width, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/structs/structs_column_view.pxd b/python/pylibcudf/pylibcudf/libcudf/structs/structs_column_view.pxd index 7b339782295..d51a51dfb13 100644 --- a/python/pylibcudf/pylibcudf/libcudf/structs/structs_column_view.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/structs/structs_column_view.pxd @@ -1,6 +1,6 @@ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column_view cimport column_view @@ -22,5 +22,5 @@ cdef extern from "cudf/structs/structs_column_view.hpp" namespace "cudf" nogil: column_view parent() except +libcudf_exception_handler column_view get_sliced_child( size_type index, - cuda_stream_view stream + cudaStream_t stream ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/table/table.pxd b/python/pylibcudf/pylibcudf/libcudf/table/table.pxd index 230131d5520..dcfc046a904 100644 --- a/python/pylibcudf/pylibcudf/libcudf/table/table.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/table/table.pxd @@ -6,7 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.table.table_view cimport mutable_table_view, table_view from pylibcudf.libcudf.types cimport size_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -14,12 +14,12 @@ cdef extern from "cudf/table/table.hpp" namespace "cudf" nogil: cdef cppclass table: table( const table&, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler table( table_view, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler size_type num_columns() except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/transform.pxd b/python/pylibcudf/pylibcudf/libcudf/transform.pxd index 9b2ace2d940..ebc9d8bfa1d 100644 --- a/python/pylibcudf/pylibcudf/libcudf/transform.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/transform.pxd @@ -16,14 +16,14 @@ from pylibcudf.libcudf.types cimport bitmask_type, data_type, size_type from pylibcudf.libcudf.types cimport null_aware, output_nullability from rmm.librmm.device_buffer cimport device_buffer -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref cdef extern from "cudf/transform.hpp" namespace "cudf" nogil: cdef pair[unique_ptr[device_buffer], size_type] bools_to_mask ( const column_view& input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -31,19 +31,19 @@ cdef extern from "cudf/transform.hpp" namespace "cudf" nogil: const bitmask_type* bitmask, size_type begin_bit, size_type end_bit, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef pair[unique_ptr[device_buffer], size_type] nans_to_nulls( const column_view& input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] column_nans_to_nulls( const column_view& input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler @@ -55,33 +55,33 @@ cdef extern from "cudf/transform.hpp" namespace "cudf" nogil: optional[void *] user_data, null_aware is_null_aware, output_nullability null_policy, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef pair[unique_ptr[table], unique_ptr[column]] encode( table_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef pair[unique_ptr[column], table_view] one_hot_encode( column_view input_column, column_view categories, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] compute_column( const table_view table, const expression& expr, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef unique_ptr[column] compute_column_jit( const table_view table, const expression& expr, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/transpose.pxd b/python/pylibcudf/pylibcudf/libcudf/transpose.pxd index 2345ab5a2d9..0ce2048ba0f 100644 --- a/python/pylibcudf/pylibcudf/libcudf/transpose.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/transpose.pxd @@ -6,7 +6,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.table.table_view cimport table_view -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -16,6 +16,6 @@ cdef extern from "cudf/transpose.hpp" namespace "cudf" nogil: table_view ] transpose( table_view input_table, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/unary.pxd b/python/pylibcudf/pylibcudf/libcudf/unary.pxd index d3fd2f2f976..6f59ff8d5e0 100644 --- a/python/pylibcudf/pylibcudf/libcudf/unary.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/unary.pxd @@ -7,7 +7,7 @@ from pylibcudf.exception_handler cimport libcudf_exception_handler from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.types cimport data_type -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t from rmm.librmm.memory_resource cimport device_async_resource_ref @@ -42,32 +42,32 @@ cdef extern from "cudf/unary.hpp" namespace "cudf" nogil: cdef extern unique_ptr[column] unary_operation( column_view input, unary_operator op, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef extern unique_ptr[column] is_null( column_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef extern unique_ptr[column] is_valid( column_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef extern unique_ptr[column] cast( column_view input, data_type out_type, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr) except +libcudf_exception_handler cdef extern bool is_supported_cast(data_type from_, data_type to) noexcept cdef extern unique_ptr[column] is_nan( column_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler cdef extern unique_ptr[column] is_not_nan( column_view input, - cuda_stream_view stream, + cudaStream_t stream, device_async_resource_ref mr ) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/unique_count.pxd b/python/pylibcudf/pylibcudf/libcudf/unique_count.pxd index 5954dace85e..04001f5a064 100644 --- a/python/pylibcudf/pylibcudf/libcudf/unique_count.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/unique_count.pxd @@ -9,7 +9,7 @@ from pylibcudf.libcudf.types cimport ( null_policy, size_type, ) -from rmm.librmm.cuda_stream_view cimport cuda_stream_view +from cuda.bindings.cyruntime cimport cudaStream_t cdef extern from "cudf/reduction/unique_count.hpp" namespace "cudf" nogil: @@ -17,9 +17,9 @@ cdef extern from "cudf/reduction/unique_count.hpp" namespace "cudf" nogil: column_view column, null_policy null_handling, nan_policy nan_handling, - cuda_stream_view stream) except +libcudf_exception_handler + cudaStream_t stream) except +libcudf_exception_handler cdef size_type unique_count( table_view source_table, null_equality nulls_equal, - cuda_stream_view stream) except +libcudf_exception_handler + cudaStream_t stream) except +libcudf_exception_handler diff --git a/python/pylibcudf/pylibcudf/libcudf/utilities/default_stream.pxd b/python/pylibcudf/pylibcudf/libcudf/utilities/default_stream.pxd index a9569f11706..661db24f5aa 100644 --- a/python/pylibcudf/pylibcudf/libcudf/utilities/default_stream.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/utilities/default_stream.pxd @@ -1,10 +1,9 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 +from cuda.bindings.cyruntime cimport cudaStream_t from libcpp cimport bool -from rmm.librmm.cuda_stream_view cimport cuda_stream_view - cdef extern from "cudf/utilities/default_stream.hpp" namespace "cudf" nogil: cdef bool is_ptds_enabled() - cdef cuda_stream_view get_default_stream() + cdef cudaStream_t get_default_stream() diff --git a/python/pylibcudf/pylibcudf/lists.pxd b/python/pylibcudf/pylibcudf/lists.pxd index be47db18a59..88b09c01531 100644 --- a/python/pylibcudf/pylibcudf/lists.pxd +++ b/python/pylibcudf/pylibcudf/lists.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool @@ -9,7 +9,6 @@ from pylibcudf.libcudf.copying cimport out_of_bounds_policy from pylibcudf.libcudf.lists.combine cimport concatenate_null_policy from pylibcudf.libcudf.lists.contains cimport duplicate_find_option from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream from .column cimport Column from .scalar cimport Scalar @@ -26,33 +25,33 @@ ctypedef fused ColumnOrSizeType: cpdef Table explode_outer( Table, size_type explode_column_idx, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column concatenate_rows( Table, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column concatenate_list_elements( Column, concatenate_null_policy null_policy, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column contains( Column, ColumnOrScalar, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column contains_nulls( Column, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -60,13 +59,13 @@ cpdef Column index_of( Column, ColumnOrScalar, duplicate_find_option, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column reverse( Column, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -74,20 +73,20 @@ cpdef Column segmented_gather( Column, Column, out_of_bounds_policy bounds_policy=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column extract_list_element( Column, ColumnOrSizeType, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column count_elements( Column, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -95,7 +94,7 @@ cpdef Column sequences( Column, Column, Column steps = *, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -104,7 +103,7 @@ cpdef Column sort_lists( order, null_order, bool stable = *, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -113,7 +112,7 @@ cpdef Column difference_distinct( Column, null_equality nulls_equal=*, nan_equality nans_equal=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -122,7 +121,7 @@ cpdef Column have_overlap( Column, null_equality nulls_equal=*, nan_equality nans_equal=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -131,7 +130,7 @@ cpdef Column intersect_distinct( Column, null_equality nulls_equal=*, nan_equality nans_equal=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -140,14 +139,14 @@ cpdef Column union_distinct( Column, null_equality nulls_equal=*, nan_equality nans_equal=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column apply_boolean_mask( Column, Column, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -155,6 +154,6 @@ cpdef Column distinct( Column, null_equality, nan_equality, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/lists.pyi b/python/pylibcudf/pylibcudf/lists.pyi index a3bcf9f76d6..1e418b59726 100644 --- a/python/pylibcudf/pylibcudf/lists.pyi +++ b/python/pylibcudf/pylibcudf/lists.pyi @@ -1,16 +1,16 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.copying import OutOfBoundsPolicy from pylibcudf.scalar import Scalar from pylibcudf.table import Table from pylibcudf.types import NanEquality, NullEquality, NullOrder, Order +from pylibcudf.utils import CudaStreamLike class ConcatenateNullPolicy(IntEnum): IGNORE = ... @@ -23,66 +23,66 @@ class DuplicateFindOption(IntEnum): def explode_outer( input: Table, explode_column_idx: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def concatenate_rows( input: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def concatenate_list_elements( input: Column, null_policy: ConcatenateNullPolicy, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def contains( input: Column, search_key: Column | Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def contains_nulls( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def index_of( input: Column, search_key: Column | Scalar, find_option: DuplicateFindOption, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def reverse( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def segmented_gather( input: Column, gather_map_list: Column, bounds_policy: OutOfBoundsPolicy = OutOfBoundsPolicy.DONT_CHECK, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def extract_list_element( input: Column, index: Column | int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def count_elements( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def sequences( starts: Column, sizes: Column, steps: Column | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def sort_lists( @@ -90,7 +90,7 @@ def sort_lists( sort_order: Order, na_position: NullOrder, stable: bool = False, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def difference_distinct( @@ -98,7 +98,7 @@ def difference_distinct( rhs: Column, nulls_equal: NullEquality = NullEquality.EQUAL, nans_equal: NanEquality = NanEquality.ALL_EQUAL, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def have_overlap( @@ -106,7 +106,7 @@ def have_overlap( rhs: Column, nulls_equal: NullEquality = NullEquality.EQUAL, nans_equal: NanEquality = NanEquality.ALL_EQUAL, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def intersect_distinct( @@ -114,7 +114,7 @@ def intersect_distinct( rhs: Column, nulls_equal: NullEquality = NullEquality.EQUAL, nans_equal: NanEquality = NanEquality.ALL_EQUAL, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def union_distinct( @@ -122,19 +122,19 @@ def union_distinct( rhs: Column, nulls_equal: NullEquality = NullEquality.EQUAL, nans_equal: NanEquality = NanEquality.ALL_EQUAL, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def apply_boolean_mask( input: Column, boolean_mask: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def distinct( input: Column, nulls_equal: NullEquality, nans_equal: NanEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/lists.pyx b/python/pylibcudf/pylibcudf/lists.pyx index 0076f7da677..fd05242e44f 100644 --- a/python/pylibcudf/pylibcudf/lists.pyx +++ b/python/pylibcudf/pylibcudf/lists.pyx @@ -55,6 +55,7 @@ from .column cimport Column, ListsColumnView from .scalar cimport Scalar from .table cimport Table from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "ConcatenateNullPolicy", @@ -82,7 +83,7 @@ __all__ = [ cpdef Table explode_outer( Table input, size_type explode_column_idx, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Explode a column of lists into rows. @@ -105,20 +106,21 @@ cpdef Table explode_outer( """ cdef unique_ptr[table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_explode.explode_outer( - input.view(), explode_column_idx, stream.view(), mr.get_mr() + input.view(), explode_column_idx, _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Column concatenate_rows( Table input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Concatenate multiple lists columns into a single lists column row-wise. @@ -139,21 +141,22 @@ cpdef Column concatenate_rows( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_concatenate_rows( - input.view(), concatenate_null_policy.IGNORE, stream.view(), mr.get_mr() + input.view(), concatenate_null_policy.IGNORE, _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column concatenate_list_elements( Column input, concatenate_null_policy null_policy, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Concatenate multiple lists on the same row into a single list. @@ -174,21 +177,22 @@ cpdef Column concatenate_list_elements( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_concatenate_list_elements( - input.view(), null_policy, stream.view(), mr.get_mr() + input.view(), null_policy, _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column contains( Column input, ColumnOrScalar search_key, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a column of bool values indicating whether @@ -218,7 +222,8 @@ cpdef Column contains( cdef unique_ptr[column] c_result cdef ListsColumnView list_view = input.list_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if not isinstance(search_key, (Column, Scalar)): @@ -230,15 +235,15 @@ cpdef Column contains( search_key.view() if ColumnOrScalar is Column else dereference( search_key.get() ), - stream.view(), + _cs, mr.get_mr(), ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column contains_nulls( Column input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a column of bool values indicating whether @@ -262,21 +267,22 @@ cpdef Column contains_nulls( cdef unique_ptr[column] c_result cdef ListsColumnView list_view = input.list_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_contains.contains_nulls( - list_view.view(), stream.view(), mr.get_mr() + list_view.view(), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column index_of( Column input, ColumnOrScalar search_key, duplicate_find_option find_option, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a column of index values indicating the position of a search @@ -307,7 +313,8 @@ cpdef Column index_of( cdef unique_ptr[column] c_result cdef ListsColumnView list_view = input.list_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -317,15 +324,15 @@ cpdef Column index_of( search_key.get() ), find_option, - stream.view(), + _cs, mr.get_mr(), ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column reverse( Column input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Reverse the element order within each list of the input column. @@ -347,19 +354,20 @@ cpdef Column reverse( cdef unique_ptr[column] c_result cdef ListsColumnView list_view = input.list_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_reverse.reverse(list_view.view(), stream.view(), mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + c_result = cpp_reverse.reverse(list_view.view(), _cs, mr.get_mr()) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column segmented_gather( Column input, Column gather_map_list, out_of_bounds_policy bounds_policy=out_of_bounds_policy.DONT_CHECK, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a column with elements gathered based on the indices in gather_map_list @@ -394,7 +402,8 @@ cpdef Column segmented_gather( cdef ListsColumnView list_view1 = input.list_view() cdef ListsColumnView list_view2 = gather_map_list.list_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -402,16 +411,16 @@ cpdef Column segmented_gather( list_view1.view(), list_view2.view(), bounds_policy, - stream.view(), + _cs, mr.get_mr(), ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column extract_list_element( Column input, ColumnOrSizeType index, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a column of extracted list elements. @@ -433,22 +442,23 @@ cpdef Column extract_list_element( cdef unique_ptr[column] c_result cdef ListsColumnView list_view = input.list_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_extract_list_element( list_view.view(), index.view() if ColumnOrSizeType is Column else index, - stream.view(), + _cs, mr.get_mr(), ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column count_elements( Column input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Count the number of rows in each @@ -472,20 +482,21 @@ cpdef Column count_elements( cdef ListsColumnView list_view = input.list_view() cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_count_elements(list_view.view(), stream.view(), mr.get_mr()) + c_result = cpp_count_elements(list_view.view(), _cs, mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column sequences( Column starts, Column sizes, Column steps = None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a lists column in which each row contains a sequence of @@ -509,7 +520,8 @@ cpdef Column sequences( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if steps is not None: @@ -518,22 +530,22 @@ cpdef Column sequences( starts.view(), steps.view(), sizes.view(), - stream.view(), + _cs, mr.get_mr(), ) else: with nogil: c_result = cpp_filling.sequences( - starts.view(), sizes.view(), stream.view(), mr.get_mr() + starts.view(), sizes.view(), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column sort_lists( Column input, order sort_order, null_order na_position, bool stable = False, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Sort the elements within a list in each row of a list column. @@ -561,7 +573,8 @@ cpdef Column sort_lists( cdef unique_ptr[column] c_result cdef ListsColumnView list_view = input.list_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -570,7 +583,7 @@ cpdef Column sort_lists( list_view.view(), sort_order, na_position, - stream.view(), + _cs, mr.get_mr(), ) else: @@ -578,10 +591,10 @@ cpdef Column sort_lists( list_view.view(), sort_order, na_position, - stream.view(), + _cs, mr.get_mr(), ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column difference_distinct( @@ -589,7 +602,7 @@ cpdef Column difference_distinct( Column rhs, null_equality nulls_equal=null_equality.EQUAL, nan_equality nans_equal=nan_equality.ALL_EQUAL, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a column of index values indicating the position of a search @@ -617,7 +630,8 @@ cpdef Column difference_distinct( cdef ListsColumnView lhs_view = lhs.list_view() cdef ListsColumnView rhs_view = rhs.list_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -626,10 +640,10 @@ cpdef Column difference_distinct( rhs_view.view(), nulls_equal, nans_equal, - stream.view(), + _cs, mr.get_mr(), ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column have_overlap( @@ -637,7 +651,7 @@ cpdef Column have_overlap( Column rhs, null_equality nulls_equal=null_equality.EQUAL, nan_equality nans_equal=nan_equality.ALL_EQUAL, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Check if lists at each row of the given lists columns overlap. @@ -664,7 +678,8 @@ cpdef Column have_overlap( cdef ListsColumnView lhs_view = lhs.list_view() cdef ListsColumnView rhs_view = rhs.list_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -673,10 +688,10 @@ cpdef Column have_overlap( rhs_view.view(), nulls_equal, nans_equal, - stream.view(), + _cs, mr.get_mr(), ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column intersect_distinct( @@ -684,7 +699,7 @@ cpdef Column intersect_distinct( Column rhs, null_equality nulls_equal=null_equality.EQUAL, nan_equality nans_equal=nan_equality.ALL_EQUAL, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a lists column of distinct elements common to two input lists columns. @@ -711,7 +726,8 @@ cpdef Column intersect_distinct( cdef ListsColumnView lhs_view = lhs.list_view() cdef ListsColumnView rhs_view = rhs.list_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -720,10 +736,10 @@ cpdef Column intersect_distinct( rhs_view.view(), nulls_equal, nans_equal, - stream.view(), + _cs, mr.get_mr(), ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column union_distinct( @@ -731,7 +747,7 @@ cpdef Column union_distinct( Column rhs, null_equality nulls_equal=null_equality.EQUAL, nan_equality nans_equal=nan_equality.ALL_EQUAL, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a lists column of distinct elements found in @@ -759,7 +775,8 @@ cpdef Column union_distinct( cdef ListsColumnView lhs_view = lhs.list_view() cdef ListsColumnView rhs_view = rhs.list_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -768,16 +785,16 @@ cpdef Column union_distinct( rhs_view.view(), nulls_equal, nans_equal, - stream.view(), + _cs, mr.get_mr(), ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column apply_boolean_mask( Column input, Column boolean_mask, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Filters elements in each row of the input lists column using a boolean mask @@ -802,24 +819,25 @@ cpdef Column apply_boolean_mask( cdef ListsColumnView list_view = input.list_view() cdef ListsColumnView mask_view = boolean_mask.list_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_apply_boolean_mask( list_view.view(), mask_view.view(), - stream.view(), + _cs, mr.get_mr(), ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column distinct( Column input, null_equality nulls_equal, nan_equality nans_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a new list column without duplicate elements in each list. @@ -843,7 +861,8 @@ cpdef Column distinct( cdef unique_ptr[column] c_result cdef ListsColumnView list_view = input.list_view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -852,10 +871,10 @@ cpdef Column distinct( nulls_equal, nans_equal, duplicate_keep_option.KEEP_ANY, - stream.view(), + _cs, mr.get_mr(), ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) ConcatenateNullPolicy.__str__ = ConcatenateNullPolicy.__repr__ DuplicateFindOption.__str__ = DuplicateFindOption.__repr__ diff --git a/python/pylibcudf/pylibcudf/merge.pxd b/python/pylibcudf/pylibcudf/merge.pxd index aed9dda7479..07624852289 100644 --- a/python/pylibcudf/pylibcudf/merge.pxd +++ b/python/pylibcudf/pylibcudf/merge.pxd @@ -1,9 +1,8 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from .table cimport Table -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource @@ -12,6 +11,6 @@ cpdef Table merge ( list key_cols, list column_order, list null_precedence, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/merge.pyi b/python/pylibcudf/pylibcudf/merge.pyi index f96e1d8534e..50e87d5bffa 100644 --- a/python/pylibcudf/pylibcudf/merge.pyi +++ b/python/pylibcudf/pylibcudf/merge.pyi @@ -1,17 +1,17 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.table import Table from pylibcudf.types import NullOrder, Order +from pylibcudf.utils import CudaStreamLike def merge( tables_to_merge: list[Table], key_cols: list[int], column_order: list[Order], null_precedence: list[NullOrder], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/merge.pyx b/python/pylibcudf/pylibcudf/merge.pyx index a6cbaf81051..3c0cd93a342 100644 --- a/python/pylibcudf/pylibcudf/merge.pyx +++ b/python/pylibcudf/pylibcudf/merge.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -14,6 +14,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .table cimport Table from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["merge"] @@ -22,7 +23,7 @@ cpdef Table merge ( list key_cols, list column_order, list null_precedence, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Merge a set of sorted tables. @@ -58,7 +59,8 @@ cpdef Table merge ( c_tables_to_merge.push_back(( tbl).view()) cdef unique_ptr[table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -67,7 +69,7 @@ cpdef Table merge ( c_key_cols, c_column_order, c_null_precedence, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/null_mask.pxd b/python/pylibcudf/pylibcudf/null_mask.pxd index 6eb10eddb2e..e7fa70e23ae 100644 --- a/python/pylibcudf/pylibcudf/null_mask.pxd +++ b/python/pylibcudf/pylibcudf/null_mask.pxd @@ -5,18 +5,19 @@ from pylibcudf.libcudf.types cimport mask_state, size_type from rmm.pylibrmm.device_buffer cimport DeviceBuffer from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream from .column cimport Column -cpdef DeviceBuffer copy_bitmask(Column col, Stream stream=*, DeviceMemoryResource mr=*) +cpdef DeviceBuffer copy_bitmask( + Column col, object stream = *, DeviceMemoryResource mr=* +) cpdef DeviceBuffer copy_bitmask_from_bitmask( object bitmask, size_type begin_bit, size_type end_bit, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) @@ -25,24 +26,24 @@ cpdef size_t bitmask_allocation_size_bytes(size_type number_of_bits) cpdef DeviceBuffer create_null_mask( size_type size, mask_state state=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) -cpdef tuple bitmask_and(list columns, Stream stream=*, DeviceMemoryResource mr=*) +cpdef tuple bitmask_and(list columns, object stream = *, DeviceMemoryResource mr=*) -cpdef tuple bitmask_or(list columns, Stream stream=*, DeviceMemoryResource mr=*) +cpdef tuple bitmask_or(list columns, object stream = *, DeviceMemoryResource mr=*) cpdef size_type null_count( object bitmask, size_type start, size_type stop, - Stream stream=* + object stream = * ) cpdef size_type index_of_first_set_bit( object bitmask, size_type start, size_type stop, - Stream stream=* + object stream = * ) diff --git a/python/pylibcudf/pylibcudf/null_mask.pyi b/python/pylibcudf/pylibcudf/null_mask.pyi index 98f6e60fb0d..45e130b704e 100644 --- a/python/pylibcudf/pylibcudf/null_mask.pyi +++ b/python/pylibcudf/pylibcudf/null_mask.pyi @@ -3,44 +3,44 @@ from rmm.pylibrmm.device_buffer import DeviceBuffer from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.span import Span from pylibcudf.types import MaskState +from pylibcudf.utils import CudaStreamLike def copy_bitmask( col: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> DeviceBuffer: ... def copy_bitmask_from_bitmask( bitmask: Span, begin_bit: int, end_bit: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> DeviceBuffer: ... def bitmask_allocation_size_bytes(number_of_bits: int) -> int: ... def create_null_mask( size: int, state: MaskState = MaskState.UNINITIALIZED, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> DeviceBuffer: ... def bitmask_and( columns: list[Column], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[DeviceBuffer, int]: ... def bitmask_or( columns: list[Column], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[DeviceBuffer, int]: ... def null_count( - bitmask: Span, start: int, stop: int, stream: Stream | None = None + bitmask: Span, start: int, stop: int, stream: CudaStreamLike | None = None ) -> int: ... def index_of_first_set_bit( - bitmask: Span, start: int, stop: int, stream: Stream | None = None + bitmask: Span, start: int, stop: int, stream: CudaStreamLike | None = None ) -> int: ... diff --git a/python/pylibcudf/pylibcudf/null_mask.pyx b/python/pylibcudf/pylibcudf/null_mask.pyx index 176e73047e2..164c51aca9f 100644 --- a/python/pylibcudf/pylibcudf/null_mask.pyx +++ b/python/pylibcudf/pylibcudf/null_mask.pyx @@ -19,6 +19,7 @@ from .span import is_span as py_is_span from .column cimport Column from .table cimport Table from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "bitmask_allocation_size_bytes", @@ -31,7 +32,7 @@ __all__ = [ ] cdef DeviceBuffer buffer_to_python( - device_buffer buf, Stream stream, DeviceMemoryResource mr + device_buffer buf, object stream, DeviceMemoryResource mr ): return DeviceBuffer.c_from_unique_ptr( make_unique[device_buffer](move(buf)), stream, mr @@ -40,7 +41,7 @@ cdef DeviceBuffer buffer_to_python( cpdef DeviceBuffer copy_bitmask( Column col, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Copies ``col``'s bitmask into a ``DeviceBuffer``. @@ -63,20 +64,21 @@ cpdef DeviceBuffer copy_bitmask( ``DeviceBuffer`` if ``col`` is not nullable """ cdef device_buffer db - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - db = cpp_null_mask.copy_bitmask(col.view(), stream.view(), mr.get_mr()) + db = cpp_null_mask.copy_bitmask(col.view(), _cs, mr.get_mr()) - return buffer_to_python(move(db), stream, mr) + return buffer_to_python(move(db), _stream, mr) cpdef DeviceBuffer copy_bitmask_from_bitmask( object bitmask, size_type begin_bit, size_type end_bit, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Copies a portion of a bitmask into a ``DeviceBuffer``. @@ -108,7 +110,8 @@ cpdef DeviceBuffer copy_bitmask_from_bitmask( f"got {type(bitmask).__name__}" ) cdef device_buffer db - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) cdef uintptr_t ptr = bitmask.ptr @@ -117,11 +120,11 @@ cpdef DeviceBuffer copy_bitmask_from_bitmask( ptr, begin_bit, end_bit, - stream.view(), + _cs, mr.get_mr() ) - return buffer_to_python(move(db), stream, mr) + return buffer_to_python(move(db), _stream, mr) cpdef size_t bitmask_allocation_size_bytes(size_type number_of_bits): @@ -148,7 +151,7 @@ cpdef size_t bitmask_allocation_size_bytes(size_type number_of_bits): cpdef DeviceBuffer create_null_mask( size_type size, mask_state state = mask_state.UNINITIALIZED, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Creates a ``DeviceBuffer`` for use as a null value indicator bitmask of a @@ -176,16 +179,17 @@ cpdef DeviceBuffer create_null_mask( state """ cdef device_buffer db - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - db = cpp_null_mask.create_null_mask(size, state, stream.view(), mr.get_mr()) + db = cpp_null_mask.create_null_mask(size, state, _cs, mr.get_mr()) - return buffer_to_python(move(db), stream, mr) + return buffer_to_python(move(db), _stream, mr) -cpdef tuple bitmask_and(list columns, Stream stream=None, DeviceMemoryResource mr=None): +cpdef tuple bitmask_and(list columns, object stream=None, DeviceMemoryResource mr=None): """Performs bitwise AND of the bitmasks of a list of columns. For details, see :cpp:func:`bitmask_and`. @@ -206,16 +210,19 @@ cpdef tuple bitmask_and(list columns, Stream stream=None, DeviceMemoryResource m """ cdef Table c_table = Table(columns) cdef pair[device_buffer, size_type] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_null_mask.bitmask_and(c_table.view(), stream.view(), mr.get_mr()) + c_result = cpp_null_mask.bitmask_and( + c_table.view(), _cs, mr.get_mr() + ) - return buffer_to_python(move(c_result.first), stream, mr), c_result.second + return buffer_to_python(move(c_result.first), _stream, mr), c_result.second -cpdef tuple bitmask_or(list columns, Stream stream=None, DeviceMemoryResource mr=None): +cpdef tuple bitmask_or(list columns, object stream=None, DeviceMemoryResource mr=None): """Performs bitwise OR of the bitmasks of a list of columns. For details, see :cpp:func:`bitmask_or`. @@ -236,20 +243,21 @@ cpdef tuple bitmask_or(list columns, Stream stream=None, DeviceMemoryResource mr """ cdef Table c_table = Table(columns) cdef pair[device_buffer, size_type] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_null_mask.bitmask_or(c_table.view(), stream.view(), mr.get_mr()) + c_result = cpp_null_mask.bitmask_or(c_table.view(), _cs, mr.get_mr()) - return buffer_to_python(move(c_result.first), stream, mr), c_result.second + return buffer_to_python(move(c_result.first), _stream, mr), c_result.second cpdef size_type null_count( object bitmask, size_type start, size_type stop, - Stream stream=None + object stream=None ): """Given a validity bitmask, counts the number of null elements. @@ -277,20 +285,21 @@ cpdef size_type null_count( f"got {type(bitmask).__name__}" ) cdef uintptr_t ptr = bitmask.ptr - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() with nogil: return cpp_null_mask.null_count( ptr, start, stop, - stream.view() + _cs ) cpdef size_type index_of_first_set_bit( object bitmask, size_type start, size_type stop, - Stream stream=None + object stream=None ): """Given a validity bitmask, returns the index of the first valid element relative to ``start``. @@ -319,11 +328,12 @@ cpdef size_type index_of_first_set_bit( f"got {type(bitmask).__name__}" ) cdef uintptr_t ptr = bitmask.ptr - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() with nogil: return cpp_null_mask.index_of_first_set_bit( ptr, start, stop, - stream.view() + _cs ) diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pxd b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pxd index 8cd73fe41ad..2bc3f75b174 100644 --- a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -6,7 +6,6 @@ from pylibcudf.column cimport Column from pylibcudf.libcudf.nvtext.byte_pair_encode cimport bpe_merge_pairs from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cdef class BPEMergePairs: @@ -16,6 +15,6 @@ cpdef Column byte_pair_encoding( Column input, BPEMergePairs merge_pairs, Scalar separator=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi index 4abf1f52b4d..7ee48f72209 100644 --- a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyi @@ -1,17 +1,17 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike class BPEMergePairs: def __init__( self, merge_pairs: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ): ... @@ -19,6 +19,6 @@ def byte_pair_encoding( input: Column, merge_pairs: BPEMergePairs, separator: Scalar | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx index 001b9dfca1e..023e00a1169 100644 --- a/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/byte_pair_encode.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator cimport dereference @@ -19,6 +19,7 @@ from pylibcudf.scalar cimport Scalar from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["BPEMergePairs", "byte_pair_encoding"] @@ -30,14 +31,17 @@ cdef class BPEMergePairs: def __cinit__( self, Column merge_pairs, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): cdef column_view c_pairs = merge_pairs.view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - self.c_obj = move(cpp_load_merge_pairs(c_pairs, stream.view(), mr.get_mr())) + self.c_obj = move( + cpp_load_merge_pairs(c_pairs, _cs, mr.get_mr()) + ) __hash__ = None @@ -45,7 +49,7 @@ cpdef Column byte_pair_encoding( Column input, BPEMergePairs merge_pairs, Scalar separator=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -70,12 +74,13 @@ cpdef Column byte_pair_encoding( An encoded column of strings. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if separator is None: separator = Scalar.from_libcudf( - cpp_make_string_scalar(" ".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar(" ".encode(), _stream.view().value(), mr.get_mr()) ) with nogil: @@ -84,9 +89,9 @@ cpdef Column byte_pair_encoding( input.view(), dereference(merge_pairs.c_obj.get()), dereference(separator.c_obj.get()), - stream.view(), + _cs, mr.get_mr() ) ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/nvtext/deduplicate.pxd b/python/pylibcudf/pylibcudf/nvtext/deduplicate.pxd index ecca0a495a1..d038d4a3e27 100644 --- a/python/pylibcudf/pylibcudf/nvtext/deduplicate.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/deduplicate.pxd @@ -1,22 +1,21 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.libcudf.types cimport size_type from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column build_suffix_array( Column input, size_type min_width, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) cpdef Column resolve_duplicates( Column input, Column indices, size_type min_width, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) cpdef Column resolve_duplicates_pair( @@ -25,6 +24,6 @@ cpdef Column resolve_duplicates_pair( Column input2, Column indices2, size_type min_width, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyi b/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyi index 6e3d6883df4..653ee588f61 100644 --- a/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyi @@ -1,22 +1,22 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike def build_suffix_array( input: Column, min_width: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def resolve_duplicates( input: Column, indices: Column, min_width: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def resolve_duplicates_pair( @@ -25,6 +25,6 @@ def resolve_duplicates_pair( input2: Column, indices2: Column, min_width: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyx b/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyx index c71ae479674..e679841a792 100644 --- a/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/deduplicate.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator import dereference @@ -18,6 +18,7 @@ from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.librmm.device_buffer cimport device_buffer from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "build_suffix_array", @@ -36,14 +37,12 @@ cdef Column _column_from_suffix_array( device_buffer(), 0 ) - ), - stream, - mr + ), stream, mr ) cpdef Column build_suffix_array( - Column input, size_type min_width, Stream stream=None, DeviceMemoryResource mr=None + Column input, size_type min_width, object stream=None, DeviceMemoryResource mr=None ): """ Builds a suffix array for the input strings column. @@ -68,22 +67,23 @@ cpdef Column build_suffix_array( New column of suffix array """ cdef cpp_suffix_array_type c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_build_suffix_array( - input.view(), min_width, stream.view(), mr.get_mr() + input.view(), min_width, _cs, mr.get_mr() ) - return _column_from_suffix_array(move(c_result), stream, mr) + return _column_from_suffix_array(move(c_result), _stream, mr) cpdef Column resolve_duplicates( Column input, Column indices, size_type min_width, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -111,15 +111,16 @@ cpdef Column resolve_duplicates( New column of duplicate strings """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_resolve_duplicates( - input.view(), indices.view(), min_width, stream.view(), mr.get_mr() + input.view(), indices.view(), min_width, _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column resolve_duplicates_pair( @@ -128,7 +129,7 @@ cpdef Column resolve_duplicates_pair( Column input2, Column indices2, size_type min_width, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -161,7 +162,8 @@ cpdef Column resolve_duplicates_pair( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -171,8 +173,8 @@ cpdef Column resolve_duplicates_pair( input2.view(), indices2.view(), min_width, - stream.view(), + _cs, mr.get_mr(), ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pxd b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pxd index aca87ac4882..c0297ebd887 100644 --- a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pxd @@ -1,20 +1,19 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column edit_distance( Column input, Column targets, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column edit_distance_matrix( Column input, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyi b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyi index 8c0e97b9951..5a6bde4cb66 100644 --- a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyi @@ -1,19 +1,19 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike def edit_distance( input: Column, targets: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def edit_distance_matrix( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx index 14d3b4539dc..4b9d3f6bcc3 100644 --- a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx @@ -17,13 +17,14 @@ from rmm.pylibrmm.stream cimport Stream from ..column cimport Column from ..utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["edit_distance", "edit_distance_matrix"] cpdef Column edit_distance( Column input, Column targets, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -48,18 +49,19 @@ cpdef Column edit_distance( cdef column_view c_strings = input.view() cdef column_view c_targets = targets.view() cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_edit_distance(c_strings, c_targets, stream.view(), mr.get_mr()) + c_result = cpp_edit_distance(c_strings, c_targets, _cs, mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column edit_distance_matrix( Column input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -88,10 +90,11 @@ cpdef Column edit_distance_matrix( ) cdef column_view c_strings = input.view() cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_edit_distance_matrix(c_strings, stream.view(), mr.get_mr()) + c_result = cpp_edit_distance_matrix(c_strings, _cs, mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pxd b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pxd index 1eb55f1fcf6..85477223954 100644 --- a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libc.stdint cimport uint32_t @@ -6,21 +6,20 @@ from pylibcudf.column cimport Column from pylibcudf.libcudf.types cimport size_type from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column generate_ngrams( Column input, size_type ngrams, Scalar separator, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column generate_character_ngrams( Column input, size_type ngrams=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -28,6 +27,6 @@ cpdef Column hash_character_ngrams( Column input, size_type ngrams, uint32_t seed, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyi b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyi index 7a522acc5a9..317fdb9ee73 100644 --- a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyi @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from typing import Any @@ -6,28 +6,28 @@ from typing import Any import numpy as np from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike def generate_ngrams( input: Column, ngrams: int, separator: Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def generate_character_ngrams( input: Column, ngrams: int = 2, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def hash_character_ngrams( input: Column, ngrams: int, seed: int | np.unsignedinteger[Any], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx index ca8a21c279c..6d70751a5a0 100644 --- a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libc.stdint cimport uint32_t @@ -18,6 +18,7 @@ from pylibcudf.scalar cimport Scalar from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "generate_ngrams", @@ -29,7 +30,7 @@ cpdef Column generate_ngrams( Column input, size_type ngrams, Scalar separator, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -56,7 +57,8 @@ cpdef Column generate_ngrams( cdef column_view c_strings = input.view() cdef const string_scalar* c_separator = separator.c_obj.get() cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -64,16 +66,16 @@ cpdef Column generate_ngrams( c_strings, ngrams, c_separator[0], - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column generate_character_ngrams( Column input, size_type ngrams = 2, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -97,24 +99,25 @@ cpdef Column generate_character_ngrams( """ cdef column_view c_strings = input.view() cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_generate_character_ngrams( c_strings, ngrams, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column hash_character_ngrams( Column input, size_type ngrams, uint32_t seed, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -140,7 +143,8 @@ cpdef Column hash_character_ngrams( """ cdef column_view c_strings = input.view() cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -148,7 +152,7 @@ cpdef Column hash_character_ngrams( c_strings, ngrams, seed, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/nvtext/jaccard.pxd b/python/pylibcudf/pylibcudf/nvtext/jaccard.pxd index fbf8e99ac55..1e3a26454a1 100644 --- a/python/pylibcudf/pylibcudf/nvtext/jaccard.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/jaccard.pxd @@ -1,16 +1,15 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.libcudf.types cimport size_type from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column jaccard_index( Column input1, Column input2, size_type width, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/nvtext/jaccard.pyi b/python/pylibcudf/pylibcudf/nvtext/jaccard.pyi index abc86597c0e..355d2d7a92f 100644 --- a/python/pylibcudf/pylibcudf/nvtext/jaccard.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/jaccard.pyi @@ -1,15 +1,15 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike def jaccard_index( input1: Column, input2: Column, width: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx b/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx index 4089853ca77..24a343e4508 100644 --- a/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -13,6 +13,7 @@ from pylibcudf.libcudf.types cimport size_type from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["jaccard_index"] @@ -20,7 +21,7 @@ cpdef Column jaccard_index( Column input1, Column input2, size_type width, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -49,7 +50,8 @@ cpdef Column jaccard_index( cdef column_view c_input1 = input1.view() cdef column_view c_input2 = input2.view() cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -57,8 +59,8 @@ cpdef Column jaccard_index( c_input1, c_input2, width, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/nvtext/minhash.pxd b/python/pylibcudf/pylibcudf/nvtext/minhash.pxd index 0647337324d..f26b1e30245 100644 --- a/python/pylibcudf/pylibcudf/nvtext/minhash.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/minhash.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libc.stdint cimport uint32_t, uint64_t @@ -6,7 +6,6 @@ from pylibcudf.column cimport Column from pylibcudf.libcudf.types cimport size_type from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream ctypedef fused ColumnOrScalar: Column @@ -18,7 +17,7 @@ cpdef Column minhash( Column a, Column b, size_type width, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) @@ -28,7 +27,7 @@ cpdef Column minhash64( Column a, Column b, size_type width, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) @@ -38,7 +37,7 @@ cpdef Column minhash_ngrams( uint32_t seed, Column a, Column b, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) @@ -48,6 +47,6 @@ cpdef Column minhash64_ngrams( uint64_t seed, Column a, Column b, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/nvtext/minhash.pyi b/python/pylibcudf/pylibcudf/nvtext/minhash.pyi index ee924f8d7aa..5bce73dc991 100644 --- a/python/pylibcudf/pylibcudf/nvtext/minhash.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/minhash.pyi @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from typing import Any @@ -6,9 +6,9 @@ from typing import Any import numpy as np from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike def minhash( input: Column, @@ -16,7 +16,7 @@ def minhash( a: Column, b: Column, width: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def minhash64( @@ -25,7 +25,7 @@ def minhash64( a: Column, b: Column, width: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def minhash_ngrams( @@ -34,7 +34,7 @@ def minhash_ngrams( seed: int | np.unsignedinteger[Any], a: Column, b: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def minhash64_ngrams( @@ -43,6 +43,6 @@ def minhash64_ngrams( seed: int | np.unsignedinteger[Any], a: Column, b: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/minhash.pyx b/python/pylibcudf/pylibcudf/nvtext/minhash.pyx index 1329d88060c..3029ed54c50 100644 --- a/python/pylibcudf/pylibcudf/nvtext/minhash.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/minhash.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libc.stdint cimport uint32_t, uint64_t @@ -16,6 +16,7 @@ from pylibcudf.libcudf.types cimport size_type from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "minhash", @@ -30,7 +31,7 @@ cpdef Column minhash( Column a, Column b, size_type width, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """ @@ -58,7 +59,8 @@ cpdef Column minhash( List column of minhash values for each string per seed """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -68,11 +70,11 @@ cpdef Column minhash( a.view(), b.view(), width, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column minhash64( Column input, @@ -80,7 +82,7 @@ cpdef Column minhash64( Column a, Column b, size_type width, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """ @@ -110,7 +112,8 @@ cpdef Column minhash64( List column of minhash values for each string per seed """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -120,11 +123,11 @@ cpdef Column minhash64( a.view(), b.view(), width, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column minhash_ngrams( Column input, @@ -132,7 +135,7 @@ cpdef Column minhash_ngrams( uint32_t seed, Column a, Column b, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """ @@ -163,7 +166,8 @@ cpdef Column minhash_ngrams( value in columns a and b. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -173,11 +177,11 @@ cpdef Column minhash_ngrams( seed, a.view(), b.view(), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column minhash64_ngrams( Column input, @@ -185,7 +189,7 @@ cpdef Column minhash64_ngrams( uint64_t seed, Column a, Column b, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """ @@ -216,7 +220,8 @@ cpdef Column minhash64_ngrams( value in columns a and b. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -226,8 +231,8 @@ cpdef Column minhash64_ngrams( seed, a.view(), b.view(), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pxd b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pxd index f410d778cb1..5deaa45c73f 100644 --- a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pxd @@ -1,11 +1,10 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.libcudf.types cimport size_type from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column ngrams_tokenize( @@ -13,6 +12,6 @@ cpdef Column ngrams_tokenize( size_type ngrams, Scalar delimiter, Scalar separator, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyi b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyi index 1347b7e7087..99c309a21ff 100644 --- a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyi @@ -1,17 +1,17 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike def ngrams_tokenize( input: Column, ngrams: int, delimiter: Scalar, separator: Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyx index f9f36244a1d..959c47d595d 100644 --- a/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/ngrams_tokenize.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator cimport dereference @@ -15,6 +15,7 @@ from pylibcudf.scalar cimport Scalar from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["ngrams_tokenize"] @@ -23,7 +24,7 @@ cpdef Column ngrams_tokenize( size_type ngrams, Scalar delimiter, Scalar separator, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -52,7 +53,8 @@ cpdef Column ngrams_tokenize( New strings columns of tokens """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -61,7 +63,7 @@ cpdef Column ngrams_tokenize( ngrams, dereference(delimiter.get()), dereference(separator.get()), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/nvtext/normalize.pxd b/python/pylibcudf/pylibcudf/nvtext/normalize.pxd index 8c8623e07a3..30e459f75a5 100644 --- a/python/pylibcudf/pylibcudf/nvtext/normalize.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/normalize.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool @@ -6,16 +6,17 @@ from libcpp.memory cimport unique_ptr from pylibcudf.column cimport Column from pylibcudf.libcudf.nvtext.normalize cimport character_normalizer from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cdef class CharacterNormalizer: cdef unique_ptr[character_normalizer] c_obj -cpdef Column normalize_spaces(Column input, Stream stream=*, DeviceMemoryResource mr=*) +cpdef Column normalize_spaces( + Column input, object stream = *, DeviceMemoryResource mr=* +) cpdef Column normalize_characters( Column input, CharacterNormalizer normalizer, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/nvtext/normalize.pyi b/python/pylibcudf/pylibcudf/nvtext/normalize.pyi index 958adb10ada..0fbd2e7e725 100644 --- a/python/pylibcudf/pylibcudf/nvtext/normalize.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/normalize.pyi @@ -1,28 +1,28 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike class CharacterNormalizer: def __init__( self, do_lower_case: bool, special_tokens: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ): ... def normalize_spaces( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def normalize_characters( input: Column, normalizer: CharacterNormalizer, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/normalize.pyx b/python/pylibcudf/pylibcudf/nvtext/normalize.pyx index 5f62189f2f5..8e29aad9121 100644 --- a/python/pylibcudf/pylibcudf/nvtext/normalize.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/normalize.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator cimport dereference @@ -12,6 +12,7 @@ from pylibcudf.libcudf.nvtext cimport normalize as cpp_normalize from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "CharacterNormalizer" @@ -28,18 +29,19 @@ cdef class CharacterNormalizer: self, bool do_lower_case, Column tokens, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): cdef column_view c_tokens = tokens.view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: self.c_obj = move( cpp_normalize.create_character_normalizer( do_lower_case, c_tokens, - stream.view(), + _cs, mr.get_mr() ) ) @@ -47,7 +49,7 @@ cdef class CharacterNormalizer: __hash__ = None cpdef Column normalize_spaces( - Column input, Stream stream=None, DeviceMemoryResource mr=None + Column input, object stream=None, DeviceMemoryResource mr=None ): """ Returns a new strings column by normalizing the whitespace in @@ -68,21 +70,22 @@ cpdef Column normalize_spaces( New strings columns of normalized strings. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_normalize.normalize_spaces( - input.view(), stream.view(), mr.get_mr() + input.view(), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column normalize_characters( Column input, CharacterNormalizer normalizer, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -105,15 +108,16 @@ cpdef Column normalize_characters( Normalized strings column """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_normalize.normalize_characters( input.view(), dereference(normalizer.c_obj.get()), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/nvtext/replace.pxd b/python/pylibcudf/pylibcudf/nvtext/replace.pxd index c6a9ed5ba67..1265f75a514 100644 --- a/python/pylibcudf/pylibcudf/nvtext/replace.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/replace.pxd @@ -1,11 +1,10 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.libcudf.types cimport size_type from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column replace_tokens( @@ -13,7 +12,7 @@ cpdef Column replace_tokens( Column targets, Column replacements, Scalar delimiter=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -22,6 +21,6 @@ cpdef Column filter_tokens( size_type min_token_length, Scalar replacement=*, Scalar delimiter=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/nvtext/replace.pyi b/python/pylibcudf/pylibcudf/nvtext/replace.pyi index 09187c1edf1..a5e451cdb16 100644 --- a/python/pylibcudf/pylibcudf/nvtext/replace.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/replace.pyi @@ -1,18 +1,18 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike def replace_tokens( input: Column, targets: Column, replacements: Column, delimiter: Scalar | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def filter_tokens( @@ -20,6 +20,6 @@ def filter_tokens( min_token_length: int, replacement: Scalar | None = None, delimiter: Scalar | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/replace.pyx b/python/pylibcudf/pylibcudf/nvtext/replace.pyx index db375e6993f..4b00d76bd64 100644 --- a/python/pylibcudf/pylibcudf/nvtext/replace.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/replace.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator cimport dereference @@ -19,6 +19,7 @@ from pylibcudf.scalar cimport Scalar from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["filter_tokens", "replace_tokens"] @@ -27,7 +28,7 @@ cpdef Column replace_tokens( Column targets, Column replacements, Scalar delimiter=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -55,11 +56,12 @@ cpdef Column replace_tokens( New strings column with replaced strings """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if delimiter is None: delimiter = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) with nogil: c_result = cpp_replace_tokens( @@ -67,10 +69,10 @@ cpdef Column replace_tokens( targets.view(), replacements.view(), dereference(delimiter.get()), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column filter_tokens( @@ -78,7 +80,7 @@ cpdef Column filter_tokens( size_type min_token_length, Scalar replacement=None, Scalar delimiter=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -107,15 +109,16 @@ cpdef Column filter_tokens( New strings column of filtered strings """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if delimiter is None: delimiter = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) if replacement is None: replacement = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) with nogil: @@ -124,8 +127,8 @@ cpdef Column filter_tokens( min_token_length, dereference(replacement.get()), dereference(delimiter.get()), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/nvtext/stemmer.pxd b/python/pylibcudf/pylibcudf/nvtext/stemmer.pxd index 0b19c699ea8..d9f9ef1549c 100644 --- a/python/pylibcudf/pylibcudf/nvtext/stemmer.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/stemmer.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool @@ -6,7 +6,6 @@ from pylibcudf.column cimport Column from pylibcudf.libcudf.nvtext.stemmer cimport letter_type from pylibcudf.libcudf.types cimport size_type from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream ctypedef fused ColumnOrSize: Column @@ -16,10 +15,10 @@ cpdef Column is_letter( Column input, bool check_vowels, ColumnOrSize indices, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column porter_stemmer_measure( - Column input, Stream stream=*, DeviceMemoryResource mr=* + Column input, object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/nvtext/stemmer.pyi b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyi index ae53ce887a4..5fef689a895 100644 --- a/python/pylibcudf/pylibcudf/nvtext/stemmer.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyi @@ -1,20 +1,20 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike def is_letter( input: Column, check_vowels: bool, indices: Column | int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def porter_stemmer_measure( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx index 44dc6be5c60..e038cd03fb2 100644 --- a/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/stemmer.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool @@ -18,6 +18,7 @@ from pylibcudf.utils cimport _get_stream, _get_memory_resource from pylibcudf.libcudf.nvtext.stemmer import letter_type as LetterType # no-cython-lint from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["is_letter", "porter_stemmer_measure", "LetterType"] @@ -25,7 +26,7 @@ cpdef Column is_letter( Column input, bool check_vowels, ColumnOrSize indices, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -55,7 +56,8 @@ cpdef Column is_letter( New boolean column. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -63,14 +65,14 @@ cpdef Column is_letter( input.view(), letter_type.VOWEL if check_vowels else letter_type.CONSONANT, indices if ColumnOrSize is size_type else indices.view(), - stream.view() + _cs ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column porter_stemmer_measure( - Column input, Stream stream=None, DeviceMemoryResource mr=None + Column input, object stream=None, DeviceMemoryResource mr=None ): """ Returns the Porter Stemmer measurements of a strings column. @@ -92,12 +94,13 @@ cpdef Column porter_stemmer_measure( New column of measure values """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_porter_stemmer_measure(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_porter_stemmer_measure(input.view(), _cs, mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) LetterType.__str__ = LetterType.__repr__ diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pxd b/python/pylibcudf/pylibcudf/nvtext/tokenize.pxd index 2ad694d1eca..8346d420440 100644 --- a/python/pylibcudf/pylibcudf/nvtext/tokenize.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -7,36 +7,35 @@ from pylibcudf.libcudf.nvtext.tokenize cimport tokenize_vocabulary from pylibcudf.libcudf.types cimport size_type from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cdef class TokenizeVocabulary: cdef unique_ptr[tokenize_vocabulary] c_obj cpdef Column tokenize_scalar( - Column input, Scalar delimiter=*, Stream stream=*, DeviceMemoryResource mr=* + Column input, Scalar delimiter=*, object stream = *, DeviceMemoryResource mr=* ) cpdef Column tokenize_column( - Column input, Column delimiters, Stream stream=*, DeviceMemoryResource mr=* + Column input, Column delimiters, object stream = *, DeviceMemoryResource mr=* ) cpdef Column count_tokens_scalar( - Column input, Scalar delimiter=*, Stream stream=*, DeviceMemoryResource mr=* + Column input, Scalar delimiter=*, object stream = *, DeviceMemoryResource mr=* ) cpdef Column count_tokens_column( - Column input, Column delimiters, Stream stream=*, DeviceMemoryResource mr=* + Column input, Column delimiters, object stream = *, DeviceMemoryResource mr=* ) cpdef Column character_tokenize( - Column input, Stream stream=*, DeviceMemoryResource mr=* + Column input, object stream = *, DeviceMemoryResource mr=* ) cpdef Column detokenize( Column input, Column row_indices, Scalar separator=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -45,6 +44,6 @@ cpdef Column tokenize_with_vocabulary( TokenizeVocabulary vocabulary, Scalar delimiter, size_type default_id=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi index c6e2d4cfcb4..72a5209902e 100644 --- a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyi @@ -1,54 +1,54 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike class TokenizeVocabulary: def __init__( self, vocab: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ): ... def tokenize_scalar( input: Column, delimiter: Scalar | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def tokenize_column( input: Column, delimiters: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def count_tokens_scalar( input: Column, delimiter: Scalar | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def count_tokens_column( input: Column, delimiters: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def character_tokenize( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def detokenize( input: Column, row_indices: Column, separator: Scalar | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def tokenize_with_vocabulary( @@ -56,6 +56,6 @@ def tokenize_with_vocabulary( vocabulary: TokenizeVocabulary, delimiter: Scalar, default_id: int = -1, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx index e296ea38a58..4e44d781cc4 100644 --- a/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/tokenize.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator cimport dereference @@ -24,6 +24,7 @@ from pylibcudf.scalar cimport Scalar from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "TokenizeVocabulary", @@ -41,19 +42,20 @@ cdef class TokenizeVocabulary: For details, see :cpp:class:`cudf::nvtext::tokenize_vocabulary`. """ - def __cinit__(self, Column vocab, Stream stream=None, DeviceMemoryResource mr=None): + def __cinit__(self, Column vocab, object stream=None, DeviceMemoryResource mr=None): cdef column_view c_vocab = vocab.view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - self.c_obj = move(cpp_load_vocabulary(c_vocab, stream.view(), mr.get_mr())) + self.c_obj = move(cpp_load_vocabulary(c_vocab, _cs, mr.get_mr())) __hash__ = None cpdef Column tokenize_scalar( Column input, Scalar delimiter=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -77,26 +79,27 @@ cpdef Column tokenize_scalar( New strings columns of tokens """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if delimiter is None: delimiter = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) with nogil: c_result = cpp_tokenize( input.view(), dereference(delimiter.c_obj.get()), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column tokenize_column( - Column input, Column delimiters, Stream stream=None, DeviceMemoryResource mr=None + Column input, Column delimiters, object stream=None, DeviceMemoryResource mr=None ): """ Returns a single column of strings by tokenizing the input @@ -119,23 +122,24 @@ cpdef Column tokenize_column( New strings columns of tokens """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_tokenize( input.view(), delimiters.view(), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column count_tokens_scalar( Column input, Scalar delimiter=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -159,26 +163,27 @@ cpdef Column count_tokens_scalar( New column of token counts """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if delimiter is None: delimiter = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) with nogil: c_result = cpp_count_tokens( input.view(), dereference(delimiter.c_obj.get()), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column count_tokens_column( - Column input, Column delimiters, Stream stream=None, DeviceMemoryResource mr=None + Column input, Column delimiters, object stream=None, DeviceMemoryResource mr=None ): """ Returns the number of tokens in each string of a strings column @@ -201,21 +206,22 @@ cpdef Column count_tokens_column( New column of token counts """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_count_tokens( input.view(), delimiters.view(), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column character_tokenize( - Column input, Stream stream=None, DeviceMemoryResource mr=None + Column input, object stream=None, DeviceMemoryResource mr=None ): """ Returns a single column of strings by converting @@ -236,18 +242,19 @@ cpdef Column character_tokenize( New strings columns of tokens """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_character_tokenize(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_character_tokenize(input.view(), _cs, mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column detokenize( Column input, Column row_indices, Scalar separator=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -273,12 +280,13 @@ cpdef Column detokenize( New strings columns of tokens """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if separator is None: separator = Scalar.from_libcudf( - cpp_make_string_scalar(" ".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar(" ".encode(), _stream.view().value(), mr.get_mr()) ) with nogil: @@ -286,18 +294,18 @@ cpdef Column detokenize( input.view(), row_indices.view(), dereference(separator.c_obj.get()), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column tokenize_with_vocabulary( Column input, TokenizeVocabulary vocabulary, Scalar delimiter, size_type default_id=-1, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -325,7 +333,8 @@ cpdef Column tokenize_with_vocabulary( Lists column of token ids """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -334,8 +343,8 @@ cpdef Column tokenize_with_vocabulary( dereference(vocabulary.c_obj.get()), dereference(delimiter.c_obj.get()), default_id, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pxd b/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pxd index 3f7685903e0..604a566c701 100644 --- a/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pxd +++ b/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -6,7 +6,6 @@ from pylibcudf.column cimport Column from pylibcudf.libcudf.nvtext.wordpiece_tokenize cimport wordpiece_vocabulary from pylibcudf.libcudf.types cimport size_type from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cdef class WordPieceVocabulary: cdef unique_ptr[wordpiece_vocabulary] c_obj @@ -15,6 +14,6 @@ cpdef Column wordpiece_tokenize( Column input, WordPieceVocabulary vocabulary, size_type max_words_per_row, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyi b/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyi index e91cfc8f21e..e77a8c86a69 100644 --- a/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyi +++ b/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyi @@ -1,16 +1,16 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike class WordPieceVocabulary: def __init__( self, vocab: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ): ... @@ -18,6 +18,6 @@ def wordpiece_tokenize( input: Column, vocabulary: WordPieceVocabulary, max_words_per_row: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyx b/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyx index b6c516cf739..dfdb563087d 100644 --- a/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyx +++ b/python/pylibcudf/pylibcudf/nvtext/wordpiece_tokenize.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator cimport dereference @@ -15,6 +15,7 @@ from pylibcudf.libcudf.types cimport size_type from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "WordPieceVocabulary", @@ -29,15 +30,16 @@ cdef class WordPieceVocabulary: def __cinit__( self, Column vocab, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): cdef column_view c_vocab = vocab.view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: self.c_obj = move(cpp_load_wordpiece_vocabulary( - c_vocab, stream.view(), mr.get_mr() + c_vocab, _cs, mr.get_mr() )) __hash__ = None @@ -46,7 +48,7 @@ cpdef Column wordpiece_tokenize( Column input, WordPieceVocabulary vocabulary, size_type max_words_per_row, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -73,7 +75,8 @@ cpdef Column wordpiece_tokenize( Lists column of token ids """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -81,8 +84,8 @@ cpdef Column wordpiece_tokenize( input.view(), dereference(vocabulary.c_obj.get()), max_words_per_row, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/partitioning.pxd b/python/pylibcudf/pylibcudf/partitioning.pxd index 096b4eb99e8..84c9b647691 100644 --- a/python/pylibcudf/pylibcudf/partitioning.pxd +++ b/python/pylibcudf/pylibcudf/partitioning.pxd @@ -1,7 +1,6 @@ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from libc.stdint cimport uint32_t @@ -20,7 +19,7 @@ cpdef tuple[Table, list] hash_partition( int num_partitions, hash_id hash_function = *, uint32_t seed = *, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -28,7 +27,7 @@ cpdef tuple[Table, list] partition( Table t, Column partition_map, int num_partitions, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -36,6 +35,6 @@ cpdef tuple[Table, list] round_robin_partition( Table input, int num_partitions, int start_partition=*, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) diff --git a/python/pylibcudf/pylibcudf/partitioning.pyi b/python/pylibcudf/pylibcudf/partitioning.pyi index 081ee53731f..971346421ea 100644 --- a/python/pylibcudf/pylibcudf/partitioning.pyi +++ b/python/pylibcudf/pylibcudf/partitioning.pyi @@ -4,10 +4,10 @@ from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike class HashId(IntEnum): HASH_IDENTITY = ... @@ -19,20 +19,20 @@ def hash_partition( num_partitions: int, hash_function: HashId = ..., seed: int = ..., - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Table, list[int]]: ... def partition( t: Table, partition_map: Column, num_partitions: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Table, list[int]]: ... def round_robin_partition( input: Table, num_partitions: int, start_partition: int = 0, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Table, list[int]]: ... diff --git a/python/pylibcudf/pylibcudf/partitioning.pyx b/python/pylibcudf/pylibcudf/partitioning.pyx index b8da9249656..62e35ab9cca 100644 --- a/python/pylibcudf/pylibcudf/partitioning.pyx +++ b/python/pylibcudf/pylibcudf/partitioning.pyx @@ -15,6 +15,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column from .table cimport Table from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ @@ -29,7 +30,7 @@ cpdef tuple[Table, list] hash_partition( int num_partitions, cpp_partitioning.hash_id hash_function = cpp_partitioning.hash_id.HASH_MURMUR3, uint32_t seed = cpp_partitioning.DEFAULT_HASH_SEED, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -63,7 +64,8 @@ cpdef tuple[Table, list] hash_partition( cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] c_result cdef int c_num_partitions = num_partitions cdef vector[libcudf_types.size_type] columns_to_hash - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if TableOrList is Table: with nogil: @@ -73,7 +75,7 @@ cpdef tuple[Table, list] hash_partition( c_num_partitions, hash_function, seed, - stream.view(), + _cs, mr.get_mr() ) else: @@ -85,17 +87,17 @@ cpdef tuple[Table, list] hash_partition( c_num_partitions, hash_function, seed, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result.first), stream, mr), list(c_result.second) + return Table.from_libcudf(move(c_result.first), _stream, mr), list(c_result.second) cpdef tuple[Table, list] partition( Table t, Column partition_map, int num_partitions, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -126,7 +128,8 @@ cpdef tuple[Table, list] partition( cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] c_result cdef int c_num_partitions = num_partitions - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -134,18 +137,18 @@ cpdef tuple[Table, list] partition( t.view(), partition_map.view(), c_num_partitions, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result.first), stream, mr), list(c_result.second) + return Table.from_libcudf(move(c_result.first), _stream, mr), list(c_result.second) cpdef tuple[Table, list] round_robin_partition( Table input, int num_partitions, int start_partition=0, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -176,7 +179,8 @@ cpdef tuple[Table, list] round_robin_partition( cdef int c_num_partitions = num_partitions cdef int c_start_partition = start_partition - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -184,8 +188,8 @@ cpdef tuple[Table, list] round_robin_partition( input.view(), c_num_partitions, c_start_partition, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result.first), stream, mr), list(c_result.second) + return Table.from_libcudf(move(c_result.first), _stream, mr), list(c_result.second) diff --git a/python/pylibcudf/pylibcudf/quantiles.pxd b/python/pylibcudf/pylibcudf/quantiles.pxd index 9492ef8ce38..668e8015688 100644 --- a/python/pylibcudf/pylibcudf/quantiles.pxd +++ b/python/pylibcudf/pylibcudf/quantiles.pxd @@ -1,9 +1,8 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.vector cimport vector from pylibcudf.libcudf.types cimport interpolation, sorted from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream from .column cimport Column from .table cimport Table @@ -15,7 +14,7 @@ cpdef Column quantile( interpolation interp = *, Column ordered_indices = *, bint exact = *, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -26,6 +25,6 @@ cpdef Table quantiles( sorted is_input_sorted = *, list column_order = *, list null_precedence = *, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) diff --git a/python/pylibcudf/pylibcudf/quantiles.pyi b/python/pylibcudf/pylibcudf/quantiles.pyi index 2e414357651..9af646407ab 100644 --- a/python/pylibcudf/pylibcudf/quantiles.pyi +++ b/python/pylibcudf/pylibcudf/quantiles.pyi @@ -1,14 +1,14 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from collections.abc import Iterable from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.table import Table from pylibcudf.types import Interpolation, NullOrder, Order, Sorted +from pylibcudf.utils import CudaStreamLike def quantile( input: Column, @@ -16,7 +16,7 @@ def quantile( interp: Interpolation = Interpolation.LINEAR, ordered_indices: Column | None = None, exact: bool = True, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def quantiles( @@ -26,6 +26,6 @@ def quantiles( is_input_sorted: Sorted = Sorted.NO, column_order: list[Order] | None = None, null_precedence: list[NullOrder] | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/quantiles.pyx b/python/pylibcudf/pylibcudf/quantiles.pyx index de1ee3344d3..f02643754cb 100644 --- a/python/pylibcudf/pylibcudf/quantiles.pyx +++ b/python/pylibcudf/pylibcudf/quantiles.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool @@ -20,6 +20,7 @@ from .column cimport Column from .table cimport Table from .types cimport interpolation from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["quantile", "quantiles"] @@ -29,7 +30,7 @@ cpdef Column quantile( interpolation interp = interpolation.LINEAR, Column ordered_indices = None, bool exact=True, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes quantiles with interpolation. @@ -74,7 +75,8 @@ cpdef Column quantile( else: ordered_indices_view = ordered_indices.view() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -84,11 +86,11 @@ cpdef Column quantile( interp, ordered_indices_view, exact, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Table quantiles( @@ -98,7 +100,7 @@ cpdef Table quantiles( sorted is_input_sorted = sorted.NO, list column_order = None, list null_precedence = None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes row quantiles with interpolation. @@ -156,7 +158,8 @@ cpdef Table quantiles( if null_precedence is not None: null_precedence_vec = null_precedence - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -167,8 +170,8 @@ cpdef Table quantiles( is_input_sorted, column_order_vec, null_precedence_vec, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/reduce.pxd b/python/pylibcudf/pylibcudf/reduce.pxd index e9acd2aaed5..dc33d7053f4 100644 --- a/python/pylibcudf/pylibcudf/reduce.pxd +++ b/python/pylibcudf/pylibcudf/reduce.pxd @@ -4,7 +4,6 @@ from libcpp cimport bool from pylibcudf.libcudf.reduce cimport scan_type from pylibcudf.libcudf.types cimport nan_policy, null_policy, size_type -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .aggregation cimport Aggregation @@ -18,7 +17,7 @@ cpdef Scalar reduce( Aggregation agg, DataType data_type, Scalar init = *, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -26,11 +25,11 @@ cpdef Column scan( Column col, Aggregation agg, scan_type inclusive, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) -cpdef tuple minmax(Column col, Stream stream = *, DeviceMemoryResource mr = *) +cpdef tuple minmax(Column col, object stream = *, DeviceMemoryResource mr = *) cpdef bool is_valid_reduce_aggregation(DataType source, Aggregation agg) @@ -38,12 +37,12 @@ cpdef size_type unique_count( Column source, null_policy null_handling, nan_policy nan_handling, - Stream stream = * + object stream = * ) cpdef size_type distinct_count( Column source, null_policy null_handling, nan_policy nan_handling, - Stream stream = * + object stream = * ) diff --git a/python/pylibcudf/pylibcudf/reduce.pyi b/python/pylibcudf/pylibcudf/reduce.pyi index 5956b93661c..9e1c643b0cd 100644 --- a/python/pylibcudf/pylibcudf/reduce.pyi +++ b/python/pylibcudf/pylibcudf/reduce.pyi @@ -4,12 +4,12 @@ from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.aggregation import Aggregation from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.types import DataType, NanPolicy, NullPolicy +from pylibcudf.utils import CudaStreamLike class ScanType(IntEnum): INCLUSIVE = ... @@ -19,19 +19,19 @@ def reduce( col: Column, agg: Aggregation, data_type: DataType, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Scalar: ... def scan( col: Column, agg: Aggregation, inclusive: ScanType, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def minmax( col: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Scalar, Scalar]: ... def is_valid_reduce_aggregation( @@ -41,11 +41,11 @@ def unique_count( source: Column, null_handling: NullPolicy, nan_handling: NanPolicy, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> int: ... def distinct_count( source: Column, null_handling: NullPolicy, nan_handling: NanPolicy, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> int: ... diff --git a/python/pylibcudf/pylibcudf/reduce.pyx b/python/pylibcudf/pylibcudf/reduce.pyx index 54036b73e85..95c3555d021 100644 --- a/python/pylibcudf/pylibcudf/reduce.pyx +++ b/python/pylibcudf/pylibcudf/reduce.pyx @@ -31,6 +31,7 @@ from .types cimport DataType from .utils cimport _get_stream, _get_memory_resource from pylibcudf.libcudf.reduce import scan_type as ScanType # no-cython-lint +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "ScanType", @@ -47,7 +48,7 @@ cpdef Scalar reduce( Aggregation agg, DataType data_type, Scalar init=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a reduction on a column @@ -79,7 +80,8 @@ cpdef Scalar reduce( cdef optional[reference_wrapper[constscalar]] c_init cdef const scalar* c_init_ptr - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if init is not None: @@ -96,7 +98,7 @@ cpdef Scalar reduce( dereference(c_agg), data_type.c_obj, c_init, - stream.view(), + _cs, mr.get_mr() ) return Scalar.from_libcudf(move(result)) @@ -106,7 +108,7 @@ cpdef Column scan( Column col, Aggregation agg, scan_type inclusive, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a scan on a column @@ -134,7 +136,8 @@ cpdef Column scan( cdef unique_ptr[column] result cdef const scan_aggregation *c_agg = agg.view_underlying_as_scan() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -143,13 +146,13 @@ cpdef Column scan( dereference(c_agg), inclusive, null_policy.EXCLUDE, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) -cpdef tuple minmax(Column col, Stream stream=None, DeviceMemoryResource mr=None): +cpdef tuple minmax(Column col, object stream=None, DeviceMemoryResource mr=None): """Compute the minimum and maximum of a column For details, see ``cudf::minmax`` documentation. @@ -173,11 +176,12 @@ cpdef tuple minmax(Column col, Stream stream=None, DeviceMemoryResource mr=None) cdef Scalar min_scalar cdef Scalar max_scalar - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_minmax(col.view(), stream.view(), mr.get_mr()) + result = cpp_minmax(col.view(), _cs, mr.get_mr()) min_scalar = Scalar.from_libcudf(move(result.first)) max_scalar = Scalar.from_libcudf(move(result.second)) @@ -206,7 +210,7 @@ cpdef size_type unique_count( Column source, null_policy null_handling, nan_policy nan_handling, - Stream stream=None + object stream=None ): """Returns the number of unique consecutive elements in the input column. @@ -231,10 +235,10 @@ cpdef size_type unique_count( If the input column is sorted, then unique_count can produce the same result as distinct_count, but faster. """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) return cpp_unique_count.unique_count( - source.view(), null_handling, nan_handling, stream.view() + source.view(), null_handling, nan_handling, _stream.view().value() ) @@ -242,7 +246,7 @@ cpdef size_type distinct_count( Column source, null_policy null_handling, nan_policy nan_handling, - Stream stream=None + object stream=None ): """Returns the number of distinct elements in the input column. @@ -262,10 +266,10 @@ cpdef size_type distinct_count( size_type The number of distinct elements in the input column. """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) return cpp_distinct_count.distinct_count( - source.view(), null_handling, nan_handling, stream.view() + source.view(), null_handling, nan_handling, _stream.view().value() ) diff --git a/python/pylibcudf/pylibcudf/replace.pxd b/python/pylibcudf/pylibcudf/replace.pxd index 49b57753eb1..7e78e92d514 100644 --- a/python/pylibcudf/pylibcudf/replace.pxd +++ b/python/pylibcudf/pylibcudf/replace.pxd @@ -1,9 +1,8 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool from pylibcudf.libcudf.replace cimport replace_policy -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column @@ -22,7 +21,7 @@ ctypedef fused ReplacementType: cpdef Column replace_nulls( Column source_column, ReplacementType replacement, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -30,7 +29,7 @@ cpdef Column find_and_replace_all( Column source_column, Column values_to_replace, Column replacement_values, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -40,13 +39,13 @@ cpdef Column clamp( Scalar hi, Scalar lo_replace=*, Scalar hi_replace=*, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) cpdef Column normalize_nans_and_zeros( Column source_column, bool inplace=*, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) diff --git a/python/pylibcudf/pylibcudf/replace.pyi b/python/pylibcudf/pylibcudf/replace.pyi index d7a35721769..f74e06c3909 100644 --- a/python/pylibcudf/pylibcudf/replace.pyi +++ b/python/pylibcudf/pylibcudf/replace.pyi @@ -1,13 +1,13 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike class ReplacePolicy(IntEnum): PRECEDING = ... @@ -16,14 +16,14 @@ class ReplacePolicy(IntEnum): def replace_nulls( source_column: Column, replacement: Column | Scalar | ReplacePolicy, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def find_and_replace_all( source_column: Column, values_to_replace: Column, replacement_values: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def clamp( @@ -32,12 +32,12 @@ def clamp( hi: Scalar, lo_replace: Scalar | None = None, hi_replace: Scalar | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def normalize_nans_and_zeros( source_column: Column, inplace: bool = False, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/replace.pyx b/python/pylibcudf/pylibcudf/replace.pyx index c3730e3971f..4a5cc162551 100644 --- a/python/pylibcudf/pylibcudf/replace.pyx +++ b/python/pylibcudf/pylibcudf/replace.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 @@ -18,6 +18,7 @@ from pylibcudf.libcudf.replace import \ from .column cimport Column from .scalar cimport Scalar from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "ReplacePolicy", @@ -31,7 +32,7 @@ __all__ = [ cpdef Column replace_nulls( Column source_column, ReplacementType replacement, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Replace nulls in source_column. @@ -70,7 +71,8 @@ cpdef Column replace_nulls( cdef unique_ptr[column] c_result cdef replace_policy policy - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) # Due to https://github.com/cython/cython/issues/5984, if this function is @@ -84,10 +86,10 @@ cpdef Column replace_nulls( c_result = cpp_replace.replace_nulls( source_column.view(), policy, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) else: raise TypeError("replacement must be a Column, Scalar, or replace_policy") @@ -96,33 +98,33 @@ cpdef Column replace_nulls( c_result = cpp_replace.replace_nulls( source_column.view(), replacement.view(), - stream.view(), + _cs, mr.get_mr() ) elif ReplacementType is Scalar: c_result = cpp_replace.replace_nulls( source_column.view(), dereference(replacement.c_obj), - stream.view(), + _cs, mr.get_mr() ) elif ReplacementType is replace_policy: c_result = cpp_replace.replace_nulls( source_column.view(), replacement, - stream.view(), + _cs, mr.get_mr() ) else: assert False, "Internal error. Please contact pylibcudf developers" - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column find_and_replace_all( Column source_column, Column values_to_replace, Column replacement_values, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Replace all occurrences of values_to_replace with replacement_values. @@ -150,7 +152,8 @@ cpdef Column find_and_replace_all( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -158,10 +161,10 @@ cpdef Column find_and_replace_all( source_column.view(), values_to_replace.view(), replacement_values.view(), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column clamp( @@ -170,7 +173,7 @@ cpdef Column clamp( Scalar hi, Scalar lo_replace=None, Scalar hi_replace=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Clamp the values in source_column to the range [lo, hi]. @@ -206,7 +209,8 @@ cpdef Column clamp( cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -215,7 +219,7 @@ cpdef Column clamp( source_column.view(), dereference(lo.c_obj), dereference(hi.c_obj), - stream.view(), + _cs, mr.get_mr() ) else: @@ -225,16 +229,16 @@ cpdef Column clamp( dereference(lo_replace.c_obj), dereference(hi.c_obj), dereference(hi_replace.c_obj), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column normalize_nans_and_zeros( Column source_column, bool inplace=False, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Normalize NaNs and zeros in source_column. @@ -260,24 +264,25 @@ cpdef Column normalize_nans_and_zeros( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: if inplace: cpp_replace.normalize_nans_and_zeros( source_column.mutable_view(), - stream.view(), + _cs, mr.get_mr() ) else: c_result = cpp_replace.normalize_nans_and_zeros( source_column.view(), - stream.view(), + _cs, mr.get_mr() ) if not inplace: - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) ReplacePolicy.__str__ = ReplacePolicy.__repr__ diff --git a/python/pylibcudf/pylibcudf/reshape.pxd b/python/pylibcudf/pylibcudf/reshape.pxd index fd2eb9f31ec..09a111770b5 100644 --- a/python/pylibcudf/pylibcudf/reshape.pxd +++ b/python/pylibcudf/pylibcudf/reshape.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libc.stddef cimport size_t @@ -6,7 +6,6 @@ from libc.stdint cimport uintptr_t from pylibcudf.libcudf.types cimport size_type -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.device_buffer cimport DeviceBuffer from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource @@ -17,17 +16,17 @@ from .types cimport DataType cpdef Column interleave_columns( - Table source_table, Stream stream=*, DeviceMemoryResource mr=* + Table source_table, object stream = *, DeviceMemoryResource mr=* ) cpdef Table tile( Table source_table, size_type count, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) cpdef void table_to_array( Table input_table, uintptr_t ptr, size_t size, - Stream stream=* + object stream = * ) diff --git a/python/pylibcudf/pylibcudf/reshape.pyi b/python/pylibcudf/pylibcudf/reshape.pyi index c8ca83be981..03acda18353 100644 --- a/python/pylibcudf/pylibcudf/reshape.pyi +++ b/python/pylibcudf/pylibcudf/reshape.pyi @@ -1,26 +1,26 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike def interleave_columns( source_table: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def tile( source_table: Table, count: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def table_to_array( input_table: Table, ptr: int, size: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> None: ... diff --git a/python/pylibcudf/pylibcudf/reshape.pyx b/python/pylibcudf/pylibcudf/reshape.pyx index b001b289794..a81dadf62ce 100644 --- a/python/pylibcudf/pylibcudf/reshape.pyx +++ b/python/pylibcudf/pylibcudf/reshape.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libc.stddef cimport size_t @@ -24,11 +24,12 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column from .table cimport Table from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["interleave_columns", "tile", "table_to_array"] cpdef Column interleave_columns( - Table source_table, Stream stream=None, DeviceMemoryResource mr=None + Table source_table, object stream=None, DeviceMemoryResource mr=None ): """Interleave columns of a table into a single column. @@ -55,21 +56,22 @@ cpdef Column interleave_columns( A new column which is the result of interleaving the input columns """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_interleave_columns( - source_table.view(), stream.view(), mr.get_mr() + source_table.view(), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Table tile( Table source_table, size_type count, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Repeats the rows from input table count times to form a new table. @@ -93,22 +95,23 @@ cpdef Table tile( The table containing the tiled "rows" """ cdef unique_ptr[table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_tile( - source_table.view(), count, stream.view(), mr.get_mr() + source_table.view(), count, _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef void table_to_array( Table input_table, uintptr_t ptr, size_t size, - Stream stream=None + object stream=None ): """ Copy a table into a preallocated column-major device array. @@ -129,7 +132,8 @@ cpdef void table_to_array( raise ValueError( "Size exceeds the size_t limit." ) - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() cdef device_span[byte] span = device_span[byte]( ptr, size @@ -139,5 +143,5 @@ cpdef void table_to_array( cpp_table_to_array( input_table.view(), span, - stream.view() + _cs ) diff --git a/python/pylibcudf/pylibcudf/rolling.pxd b/python/pylibcudf/pylibcudf/rolling.pxd index 5ea7dc747f4..94a6a8a6d89 100644 --- a/python/pylibcudf/pylibcudf/rolling.pxd +++ b/python/pylibcudf/pylibcudf/rolling.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool @@ -8,7 +8,6 @@ from pylibcudf.libcudf.rolling cimport ( bounded_closed, bounded_open, current_row, rolling_request, unbounded ) from pylibcudf.libcudf.types cimport null_order, order, size_type -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .aggregation cimport Aggregation @@ -63,7 +62,7 @@ cpdef Table grouped_range_rolling_window( PrecedingRangeWindowType preceding, FollowingRangeWindowType following, list requests, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -73,7 +72,7 @@ cpdef Column rolling_window( WindowType following_window, size_type min_periods, Aggregation agg, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -86,6 +85,6 @@ cpdef tuple make_range_windows( null_order null_order, PrecedingRangeWindowType preceding, FollowingRangeWindowType following, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) diff --git a/python/pylibcudf/pylibcudf/rolling.pyi b/python/pylibcudf/pylibcudf/rolling.pyi index 239ce9ddbd8..883f62d0d3f 100644 --- a/python/pylibcudf/pylibcudf/rolling.pyi +++ b/python/pylibcudf/pylibcudf/rolling.pyi @@ -1,14 +1,14 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.aggregation import Aggregation from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.table import Table from pylibcudf.types import DataType, NullOrder, Order +from pylibcudf.utils import CudaStreamLike class Unbounded: ... class CurrentRow: ... @@ -36,7 +36,7 @@ def grouped_range_rolling_window( preceding: RangeWindowType, following: RangeWindowType, requests: list[RollingRequest], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def rolling_window[WindowType: (Column, int)]( @@ -45,7 +45,7 @@ def rolling_window[WindowType: (Column, int)]( following_window: WindowType, min_periods: int, agg: Aggregation, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_valid_rolling_aggregation( @@ -58,6 +58,6 @@ def make_range_windows( null_order: NullOrder, preceding: RangeWindowType, following: RangeWindowType, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Column, Column]: ... diff --git a/python/pylibcudf/pylibcudf/rolling.pyx b/python/pylibcudf/pylibcudf/rolling.pyx index 73c10e53d57..ae9d7665d69 100644 --- a/python/pylibcudf/pylibcudf/rolling.pyx +++ b/python/pylibcudf/pylibcudf/rolling.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator cimport dereference @@ -20,6 +20,7 @@ from .column cimport Column from .scalar cimport Scalar from .types cimport DataType from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ @@ -125,7 +126,7 @@ cpdef Table grouped_range_rolling_window( PrecedingRangeWindowType preceding, FollowingRangeWindowType following, list requests, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -164,7 +165,8 @@ cpdef Table grouped_range_rolling_window( for req in requests: crequests.push_back(move((req).view())) - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -176,10 +178,10 @@ cpdef Table grouped_range_rolling_window( dereference(preceding.c_obj.get()), dereference(following.c_obj.get()), crequests, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(result), stream, mr) + return Table.from_libcudf(move(result), _stream, mr) cpdef Column rolling_window( @@ -188,7 +190,7 @@ cpdef Column rolling_window( WindowType following_window, size_type min_periods, Aggregation agg, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a rolling window operation on a column @@ -224,7 +226,8 @@ cpdef Column rolling_window( # reclaim the GIL internally for just the necessary scope like column.view() cdef const rolling_aggregation *c_agg = agg.view_underlying_as_rolling() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if WindowType is Column: @@ -235,7 +238,7 @@ cpdef Column rolling_window( following_window.view(), min_periods, dereference(c_agg), - stream.view(), + _cs, mr.get_mr() ) else: @@ -246,11 +249,11 @@ cpdef Column rolling_window( following_window, min_periods, dereference(c_agg), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef bool is_valid_rolling_aggregation(DataType source, Aggregation agg): @@ -278,7 +281,7 @@ cpdef tuple make_range_windows( null_order null_order, PrecedingRangeWindowType preceding, FollowingRangeWindowType following, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -308,7 +311,8 @@ cpdef tuple make_range_windows( """ cdef pair[unique_ptr[column], unique_ptr[column]] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -319,10 +323,10 @@ cpdef tuple make_range_windows( null_order, dereference(preceding.c_obj.get()), dereference(following.c_obj.get()), - stream.view(), + _cs, mr.get_mr() ) return ( - Column.from_libcudf(move(result.first), stream, mr), - Column.from_libcudf(move(result.second), stream, mr) + Column.from_libcudf(move(result.first), _stream, mr), + Column.from_libcudf(move(result.second), _stream, mr) ) diff --git a/python/pylibcudf/pylibcudf/round.pxd b/python/pylibcudf/pylibcudf/round.pxd index ecd72c62c0a..0ac0c22346f 100644 --- a/python/pylibcudf/pylibcudf/round.pxd +++ b/python/pylibcudf/pylibcudf/round.pxd @@ -5,7 +5,6 @@ from pylibcudf.libcudf.round cimport rounding_method from .column cimport Column -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource @@ -13,7 +12,7 @@ cpdef Column round( Column source, int32_t decimal_places = *, rounding_method round_method = *, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = * ) @@ -21,6 +20,6 @@ cpdef Column round_decimal( Column source, int32_t decimal_places = *, rounding_method round_method = *, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = * ) diff --git a/python/pylibcudf/pylibcudf/round.pyi b/python/pylibcudf/pylibcudf/round.pyi index 848e43aeda7..30d08f234d5 100644 --- a/python/pylibcudf/pylibcudf/round.pyi +++ b/python/pylibcudf/pylibcudf/round.pyi @@ -4,9 +4,9 @@ from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike class RoundingMethod(IntEnum): HALF_UP = ... @@ -16,13 +16,13 @@ def round( source: Column, decimal_places: int = 0, round_method: RoundingMethod = RoundingMethod.HALF_UP, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def round_decimal( source: Column, decimal_places: int = 0, round_method: RoundingMethod = RoundingMethod.HALF_UP, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/round.pyx b/python/pylibcudf/pylibcudf/round.pyx index 84a7ba6dbdf..f5baa6bbd23 100644 --- a/python/pylibcudf/pylibcudf/round.pyx +++ b/python/pylibcudf/pylibcudf/round.pyx @@ -19,6 +19,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["RoundingMethod", "round"] @@ -26,7 +27,7 @@ cpdef Column round( Column source, int32_t decimal_places = 0, rounding_method round_method = rounding_method.HALF_UP, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Rounds all the values in a column to the specified number of decimal places. @@ -58,7 +59,8 @@ cpdef Column round( A Column with values rounded """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -66,18 +68,18 @@ cpdef Column round( source.view(), decimal_places, round_method, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column round_decimal( Column source, int32_t decimal_places = 0, rounding_method round_method = rounding_method.HALF_UP, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """Rounds all the values in a column to the specified number of decimal places. @@ -106,7 +108,8 @@ cpdef Column round_decimal( A Column with values rounded """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -114,10 +117,10 @@ cpdef Column round_decimal( source.view(), decimal_places, round_method, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) RoundingMethod.__str__ = RoundingMethod.__repr__ diff --git a/python/pylibcudf/pylibcudf/scalar.pxd b/python/pylibcudf/pylibcudf/scalar.pxd index 5230c0316be..b628b9185a6 100644 --- a/python/pylibcudf/pylibcudf/scalar.pxd +++ b/python/pylibcudf/pylibcudf/scalar.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool @@ -6,7 +6,6 @@ from libcpp.memory cimport unique_ptr from pylibcudf.libcudf.scalar.scalar cimport scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream from .column cimport Column from .types cimport DataType @@ -24,10 +23,10 @@ cdef class Scalar: cdef const scalar* get(self) noexcept nogil cpdef DataType type(self) - cpdef bool is_valid(self, Stream stream=*) + cpdef bool is_valid(self, object stream = *) @staticmethod - cdef Scalar empty_like(Column column, Stream stream, DeviceMemoryResource mr) + cdef Scalar empty_like(Column column, object stream, DeviceMemoryResource mr) @staticmethod cdef Scalar from_libcudf(unique_ptr[scalar] libcudf_scalar, dtype=*) diff --git a/python/pylibcudf/pylibcudf/scalar.pyi b/python/pylibcudf/pylibcudf/scalar.pyi index ef940d8c021..a204894afd8 100644 --- a/python/pylibcudf/pylibcudf/scalar.pyi +++ b/python/pylibcudf/pylibcudf/scalar.pyi @@ -3,11 +3,10 @@ from typing import Any -from rmm.pylibrmm.stream import Stream - from pylibcudf._interop_helpers import ColumnMetadata from pylibcudf.column import Column from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike NpGeneric = type[Any] @@ -16,31 +15,33 @@ PaScalar = type[Any] class Scalar: def __init__(self): ... def type(self) -> DataType: ... - def is_valid(self, stream: Stream) -> bool: ... + def is_valid(self, stream: CudaStreamLike) -> bool: ... @staticmethod - def empty_like(column: Column, stream: Stream | None = None) -> Scalar: ... + def empty_like( + column: Column, stream: CudaStreamLike | None = None + ) -> Scalar: ... def to_arrow( self, metadata: ColumnMetadata | str | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> PaScalar: ... @staticmethod def from_arrow( pa_val: Any, dtype: DataType | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> Scalar: ... @classmethod def from_py( cls, py_val: Any, dtype: DataType | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> Scalar: ... @classmethod def from_numpy( - cls, np_val: NpGeneric, stream: Stream | None = None + cls, np_val: NpGeneric, stream: CudaStreamLike | None = None ) -> Scalar: ... def to_py( - self, stream: Stream | None = None + self, stream: CudaStreamLike | None = None ) -> None | int | float | str | bool: ... diff --git a/python/pylibcudf/pylibcudf/scalar.pyx b/python/pylibcudf/pylibcudf/scalar.pyx index 8771b4a75fd..54e088787a5 100644 --- a/python/pylibcudf/pylibcudf/scalar.pyx +++ b/python/pylibcudf/pylibcudf/scalar.pyx @@ -57,6 +57,7 @@ from rmm.pylibrmm.memory_resource cimport ( get_current_device_resource, ) from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t from .column cimport Column from .traits cimport is_floating_point @@ -151,10 +152,11 @@ cdef class Scalar: """The type of data in the column.""" return self._data_type - cpdef bool is_valid(self, Stream stream = None): + cpdef bool is_valid(self, object stream = None): """True if the scalar is valid, false if not""" - stream = _get_stream(stream) - return self.get().is_valid(stream.view()) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() + return self.get().is_valid(_cs) def to_arrow( self, @@ -176,7 +178,9 @@ cdef class Scalar: """ # Note that metadata for scalars is primarily important for preserving # information on nested types since names are otherwise irrelevant. - return Column.from_scalar(self, 1, stream).to_arrow(metadata=metadata)[0] + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() + return Column.from_scalar(self, 1, _stream).to_arrow(metadata=metadata)[0] @staticmethod def from_arrow( @@ -205,7 +209,7 @@ cdef class Scalar: return _from_arrow(pa_val, dtype, stream) @staticmethod - cdef Scalar empty_like(Column column, Stream stream, DeviceMemoryResource mr): + cdef Scalar empty_like(Column column, object stream, DeviceMemoryResource mr): """Construct a null scalar with the same type as column. Parameters @@ -221,8 +225,10 @@ cdef class Scalar: ------- New empty (null) scalar of the given type. """ + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() return Scalar.from_libcudf( - move(make_empty_scalar_like(column.view(), stream.view(), mr.get_mr())) + move(make_empty_scalar_like(column.view(), _cs, mr.get_mr())) ) @staticmethod @@ -266,9 +272,10 @@ cdef class Scalar: Scalar New pylibcudf.Scalar """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) - return _from_py(py_val, dtype, stream, mr) + return _from_py(py_val, dtype, _stream, mr) @classmethod def from_numpy( @@ -294,9 +301,10 @@ cdef class Scalar: Scalar New pylibcudf.Scalar """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) - return _from_numpy(np_val, stream, mr) + return _from_numpy(np_val, _stream, mr) def to_py(self, stream: Stream | None = None): """ @@ -312,39 +320,40 @@ cdef class Scalar: Python scalar A Python scalar associated with the type of the Scalar. """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() if not self.is_valid(stream): return None cdef type_id tid = self.type().id() cdef const scalar* slr = self.c_obj.get() if tid == type_id.BOOL8: - return (slr).value(stream.view()) + return (slr).value(_cs) elif tid == type_id.STRING: - return (slr).to_string(stream.view()).decode() + return (slr).to_string(_cs).decode() elif tid == type_id.FLOAT32: - return (slr).value(stream.view()) + return (slr).value(_cs) elif tid == type_id.FLOAT64: - return (slr).value(stream.view()) + return (slr).value(_cs) elif tid == type_id.INT8: - return (slr).value(stream.view()) + return (slr).value(_cs) elif tid == type_id.INT16: - return (slr).value(stream.view()) + return (slr).value(_cs) elif tid == type_id.INT32: - return (slr).value(stream.view()) + return (slr).value(_cs) elif tid == type_id.INT64: - return (slr).value(stream.view()) + return (slr).value(_cs) elif tid == type_id.UINT8: - return (slr).value(stream.view()) + return (slr).value(_cs) elif tid == type_id.UINT16: - return (slr).value(stream.view()) + return (slr).value(_cs) elif tid == type_id.UINT32: - return (slr).value(stream.view()) + return (slr).value(_cs) elif tid == type_id.UINT64: - return (slr).value(stream.view()) + return (slr).value(_cs) elif tid == type_id.DECIMAL128: return decimal.Decimal( - (slr).value(stream.view()).value() + (slr).value(_cs).value() ).scaleb( (slr).type().scale() ) @@ -375,6 +384,8 @@ def _from_py( def _( py_val, dtype: DataType | None, stream: Stream, mr: DeviceMemoryResource ): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() cdef DataType c_dtype if dtype is None: raise ValueError("Must specify a dtype for a None value.") @@ -382,7 +393,7 @@ def _( c_dtype = dtype cdef unique_ptr[scalar] c_obj = make_default_constructed_scalar( c_dtype.c_obj, - stream.view(), + _cs, mr.get_mr() ) return _new_scalar(move(c_obj), dtype) @@ -402,6 +413,8 @@ def _( def _( py_val, dtype: DataType | None, stream: Stream, mr: DeviceMemoryResource ): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() cdef unique_ptr[scalar] c_obj cdef DataType c_dtype if dtype is None: @@ -414,11 +427,11 @@ def _( if tid == type_id.FLOAT32: if abs(py_val) > numeric_limits[float].max(): raise OverflowError(f"{py_val} out of range for FLOAT32 scalar") - c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) - (c_obj.get()).set_value(py_val, stream.view()) + c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr()) + (c_obj.get()).set_value(py_val, _cs) elif tid == type_id.FLOAT64: - c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) - (c_obj.get()).set_value(py_val, stream.view()) + c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr()) + (c_obj.get()).set_value(py_val, _cs) else: typ = c_dtype.id() raise TypeError(f"Cannot convert float to Scalar with dtype {typ.name}") @@ -430,6 +443,8 @@ def _( def _( py_val, dtype: DataType | None, stream: Stream, mr: DeviceMemoryResource ): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() cdef unique_ptr[scalar] c_obj cdef DataType c_dtype cdef duration_ns c_duration_ns @@ -440,7 +455,7 @@ def _( if dtype is None: c_dtype = dtype = DataType(type_id.INT64) elif is_floating_point(dtype): - return _from_py(float(py_val), dtype, stream, mr) + return _from_py(float(py_val), dtype, _stream, mr) else: c_dtype = dtype cdef type_id tid = c_dtype.id() @@ -450,80 +465,80 @@ def _( numeric_limits[int8_t].min() <= py_val <= numeric_limits[int8_t].max() ): raise OverflowError(f"{py_val} out of range for INT8 scalar") - c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) - (c_obj.get()).set_value(py_val, stream.view()) + c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr()) + (c_obj.get()).set_value(py_val, _cs) elif tid == type_id.INT16: if not ( numeric_limits[int16_t].min() <= py_val <= numeric_limits[int16_t].max() ): raise OverflowError(f"{py_val} out of range for INT16 scalar") - c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) - (c_obj.get()).set_value(py_val, stream.view()) + c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr()) + (c_obj.get()).set_value(py_val, _cs) elif tid == type_id.INT32: if not ( numeric_limits[int32_t].min() <= py_val <= numeric_limits[int32_t].max() ): raise OverflowError(f"{py_val} out of range for INT32 scalar") - c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) - (c_obj.get()).set_value(py_val, stream.view()) + c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr()) + (c_obj.get()).set_value(py_val, _cs) elif tid == type_id.INT64: if not ( numeric_limits[int64_t].min() <= py_val <= numeric_limits[int64_t].max() ): raise OverflowError(f"{py_val} out of range for INT64 scalar") - c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) - (c_obj.get()).set_value(py_val, stream.view()) + c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr()) + (c_obj.get()).set_value(py_val, _cs) elif tid == type_id.UINT8: if py_val < 0: raise ValueError("Cannot assign negative value to UINT8 scalar") if py_val > numeric_limits[uint8_t].max(): raise OverflowError(f"{py_val} out of range for UINT8 scalar") - c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) - (c_obj.get()).set_value(py_val, stream.view()) + c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr()) + (c_obj.get()).set_value(py_val, _cs) elif tid == type_id.UINT16: if py_val < 0: raise ValueError("Cannot assign negative value to UINT16 scalar") if py_val > numeric_limits[uint16_t].max(): raise OverflowError(f"{py_val} out of range for UINT16 scalar") - c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) - (c_obj.get()).set_value(py_val, stream.view()) + c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr()) + (c_obj.get()).set_value(py_val, _cs) elif tid == type_id.UINT32: if py_val < 0: raise ValueError("Cannot assign negative value to UINT32 scalar") if py_val > numeric_limits[uint32_t].max(): raise OverflowError(f"{py_val} out of range for UINT32 scalar") - c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) - (c_obj.get()).set_value(py_val, stream.view()) + c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr()) + (c_obj.get()).set_value(py_val, _cs) elif tid == type_id.UINT64: if py_val < 0: raise ValueError("Cannot assign negative value to UINT64 scalar") if py_val > numeric_limits[uint64_t].max(): raise OverflowError(f"{py_val} out of range for UINT64 scalar") - c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) - (c_obj.get()).set_value(py_val, stream.view()) + c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr()) + (c_obj.get()).set_value(py_val, _cs) elif tid == type_id.BOOL8: if py_val not in (0, 1): raise ValueError(f"Cannot convert {py_val} to BOOL8 scalar") - c_obj = make_numeric_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) - (c_obj.get()).set_value(py_val != 0, stream.view()) + c_obj = make_numeric_scalar(c_dtype.c_obj, _cs, mr.get_mr()) + (c_obj.get()).set_value(py_val != 0, _cs) elif tid == type_id.DURATION_NANOSECONDS: if py_val > numeric_limits[int64_t].max(): raise OverflowError( f"{py_val} nanoseconds out of range for INT64 limit." ) - c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_ns = duration_ns(py_val) (c_obj.get()).set_value( - c_duration_ns, stream.view() + c_duration_ns, _cs ) elif tid == type_id.DURATION_MICROSECONDS: @@ -531,10 +546,10 @@ def _( raise OverflowError( f"{py_val} microseconds out of range for INT64 limit." ) - c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_us = duration_us(py_val) (c_obj.get()).set_value( - c_duration_us, stream.view() + c_duration_us, _cs ) elif tid == type_id.DURATION_MILLISECONDS: @@ -542,10 +557,10 @@ def _( raise OverflowError( f"{py_val} milliseconds out of range for INT64 limit." ) - c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_ms = duration_ms(py_val) (c_obj.get()).set_value( - c_duration_ms, stream.view() + c_duration_ms, _cs ) elif tid == type_id.DURATION_SECONDS: @@ -553,10 +568,10 @@ def _( raise OverflowError( f"{py_val} seconds out of range for INT64 limit." ) - c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_s = duration_s(py_val) (c_obj.get()).set_value( - c_duration_s, stream.view() + c_duration_s, _cs ) elif tid == type_id.DURATION_DAYS: @@ -564,10 +579,10 @@ def _( raise OverflowError( f"{py_val} days out of range for INT32 limit." ) - c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_D = duration_D(py_val) (c_obj.get()).set_value( - c_duration_D, stream.view() + c_duration_D, _cs ) else: @@ -581,6 +596,8 @@ def _( def _( py_val, dtype: DataType | None, stream: Stream, mr: DeviceMemoryResource ): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() if dtype is None: dtype = DataType(type_id.BOOL8) elif dtype.id() != type_id.BOOL8: @@ -591,10 +608,10 @@ def _( cdef unique_ptr[scalar] c_obj = make_numeric_scalar( (dtype).c_obj, - stream.view(), + _cs, mr.get_mr() ) - (c_obj.get()).set_value(py_val, stream.view()) + (c_obj.get()).set_value(py_val, _cs) return _new_scalar(move(c_obj), dtype) @@ -602,6 +619,8 @@ def _( def _( py_val, dtype: DataType | None, stream: Stream, mr: DeviceMemoryResource ): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() if dtype is None: dtype = DataType(type_id.STRING) elif dtype.id() != type_id.STRING: @@ -610,7 +629,7 @@ def _( f"Cannot convert str to Scalar with dtype {tid.name}" ) cdef unique_ptr[scalar] c_obj = make_string_scalar( - py_val.encode(), stream.view(), mr.get_mr() + py_val.encode(), _cs, mr.get_mr() ) return _new_scalar(move(c_obj), dtype) @@ -619,6 +638,8 @@ def _( def _( py_val, dtype: DataType | None, stream: Stream, mr: DeviceMemoryResource ): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() cdef unique_ptr[scalar] c_obj cdef duration_us c_duration_us cdef duration_ns c_duration_ns @@ -637,10 +658,10 @@ def _( raise OverflowError( f"{total_nanoseconds} nanoseconds out of range for INT64 limit." ) - c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_ns = duration_ns(total_nanoseconds) (c_obj.get()).set_value( - c_duration_ns, stream.view() + c_duration_ns, _cs ) elif tid == type_id.DURATION_MICROSECONDS: total_microseconds = int(total_seconds * 1_000_000) @@ -648,10 +669,10 @@ def _( raise OverflowError( f"{total_microseconds} microseconds out of range for INT64 limit." ) - c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_us = duration_us(total_microseconds) (c_obj.get()).set_value( - c_duration_us, stream.view() + c_duration_us, _cs ) elif tid == type_id.DURATION_MILLISECONDS: total_milliseconds = int(total_seconds * 1_000) @@ -659,10 +680,10 @@ def _( raise OverflowError( f"{total_milliseconds} milliseconds out of range for INT64 limit." ) - c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_ms = duration_ms(total_milliseconds) (c_obj.get()).set_value( - c_duration_ms, stream.view() + c_duration_ms, _cs ) elif tid == type_id.DURATION_SECONDS: total_seconds = int(total_seconds) @@ -670,10 +691,10 @@ def _( raise OverflowError( f"{total_seconds} seconds out of range for INT64 limit." ) - c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_s = duration_s(total_seconds) (c_obj.get()).set_value( - c_duration_s, stream.view() + c_duration_s, _cs ) elif tid == type_id.DURATION_DAYS: total_days = int(total_seconds // 86400) @@ -681,10 +702,10 @@ def _( raise OverflowError( f"{total_days} days out of range for INT32 limit." ) - c_obj = make_duration_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_duration_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_D = duration_D(total_days) (c_obj.get()).set_value( - c_duration_D, stream.view() + c_duration_D, _cs ) else: typ = c_dtype.id() @@ -696,6 +717,8 @@ def _( def _( py_val, dtype: DataType | None, stream: Stream, mr: DeviceMemoryResource ): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() cdef unique_ptr[scalar] c_obj cdef duration_us c_duration_us cdef duration_ns c_duration_ns @@ -727,11 +750,11 @@ def _( raise OverflowError( f"{epoch_nanoseconds} nanoseconds out of range for INT64 limit." ) - c_obj = make_timestamp_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_timestamp_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_ns = duration_ns(epoch_nanoseconds) c_timestamp_ns = timestamp_ns(c_duration_ns) (c_obj.get()).set_value( - c_timestamp_ns, stream.view() + c_timestamp_ns, _cs ) elif tid == type_id.TIMESTAMP_MICROSECONDS: epoch_microseconds = int(epoch_seconds * 1_000_000) @@ -739,11 +762,11 @@ def _( raise OverflowError( f"{epoch_microseconds} microseconds out of range for INT64 limit." ) - c_obj = make_timestamp_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_timestamp_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_us = duration_us(epoch_microseconds) c_timestamp_us = timestamp_us(c_duration_us) (c_obj.get()).set_value( - c_timestamp_us, stream.view() + c_timestamp_us, _cs ) elif tid == type_id.TIMESTAMP_MILLISECONDS: epoch_milliseconds = int(epoch_seconds * 1_000) @@ -751,11 +774,11 @@ def _( raise OverflowError( f"{epoch_milliseconds} milliseconds out of range for INT64 limit." ) - c_obj = make_timestamp_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_timestamp_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_ms = duration_ms(epoch_milliseconds) c_timestamp_ms = timestamp_ms(c_duration_ms) (c_obj.get()).set_value( - c_timestamp_ms, stream.view() + c_timestamp_ms, _cs ) elif tid == type_id.TIMESTAMP_SECONDS: epoch_seconds = int(epoch_seconds) @@ -763,11 +786,11 @@ def _( raise OverflowError( f"{epoch_seconds} seconds out of range for INT64 limit." ) - c_obj = make_timestamp_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_timestamp_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_s = duration_s(epoch_seconds) c_timestamp_s = timestamp_s(c_duration_s) (c_obj.get()).set_value( - c_timestamp_s, stream.view() + c_timestamp_s, _cs ) elif tid == type_id.TIMESTAMP_DAYS: epoch_days = int(epoch_seconds // 86400) @@ -775,11 +798,11 @@ def _( raise OverflowError( f"{epoch_days} days out of range for INT32 limit." ) - c_obj = make_timestamp_scalar(c_dtype.c_obj, stream.view(), mr.get_mr()) + c_obj = make_timestamp_scalar(c_dtype.c_obj, _cs, mr.get_mr()) c_duration_D = duration_D(epoch_days) c_timestamp_D = timestamp_D(c_duration_D) (c_obj.get()).set_value( - c_timestamp_D, stream.view() + c_timestamp_D, _cs ) else: typ = c_dtype.id() @@ -791,6 +814,8 @@ def _( def _( py_val, dtype: DataType | None, stream: Stream, mr: DeviceMemoryResource ): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() scale = py_val.as_tuple().exponent as_int = int(py_val.scaleb(-scale)) @@ -804,7 +829,7 @@ def _( cdef unique_ptr[scalar] c_obj = make_fixed_point_scalar[decimal128]( val, scale_type(scale), - stream.view(), + _cs, mr.get_mr() ) return _new_scalar(move(c_obj), dtype) @@ -829,21 +854,25 @@ if np is not None: @_from_numpy.register(np.bool_) def _(np_val, stream: Stream, mr: DeviceMemoryResource): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() cdef DataType dtype = DataType(type_id.BOOL8) cdef unique_ptr[scalar] c_obj = make_numeric_scalar( - dtype.c_obj, stream.view(), mr.get_mr() + dtype.c_obj, _cs, mr.get_mr() ) cdef cbool c_val = np_val - (c_obj.get()).set_value(c_val, stream.view()) + (c_obj.get()).set_value(c_val, _cs) cdef Scalar slr = _new_scalar(move(c_obj), dtype) return slr @_from_numpy.register(np.str_) def _(np_val, stream: Stream, mr: DeviceMemoryResource): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() cdef DataType dtype = DataType(type_id.STRING) cdef unique_ptr[scalar] c_obj = make_string_scalar( np_val.item().encode(), - stream.view(), + _cs, mr.get_mr() ) cdef Scalar slr = _new_scalar(move(c_obj), dtype) @@ -851,101 +880,121 @@ if np is not None: @_from_numpy.register(np.int8) def _(np_val, stream: Stream, mr: DeviceMemoryResource): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() dtype = DataType(type_id.INT8) cdef unique_ptr[scalar] c_obj = make_numeric_scalar( - dtype.c_obj, stream.view(), mr.get_mr() + dtype.c_obj, _cs, mr.get_mr() ) - (c_obj.get()).set_value(np_val, stream.view()) + (c_obj.get()).set_value(np_val, _cs) cdef Scalar slr = _new_scalar(move(c_obj), dtype) return slr @_from_numpy.register(np.int16) def _(np_val, stream: Stream, mr: DeviceMemoryResource): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() dtype = DataType(type_id.INT16) cdef unique_ptr[scalar] c_obj = make_numeric_scalar( - dtype.c_obj, stream.view(), mr.get_mr() + dtype.c_obj, _cs, mr.get_mr() ) - (c_obj.get()).set_value(np_val, stream.view()) + (c_obj.get()).set_value(np_val, _cs) cdef Scalar slr = _new_scalar(move(c_obj), dtype) return slr @_from_numpy.register(np.int32) def _(np_val, stream: Stream, mr: DeviceMemoryResource): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() dtype = DataType(type_id.INT32) cdef unique_ptr[scalar] c_obj = make_numeric_scalar( - dtype.c_obj, stream.view(), mr.get_mr() + dtype.c_obj, _cs, mr.get_mr() ) - (c_obj.get()).set_value(np_val, stream.view()) + (c_obj.get()).set_value(np_val, _cs) cdef Scalar slr = _new_scalar(move(c_obj), dtype) return slr @_from_numpy.register(np.int64) def _(np_val, stream: Stream, mr: DeviceMemoryResource): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() dtype = DataType(type_id.INT64) cdef unique_ptr[scalar] c_obj = make_numeric_scalar( - dtype.c_obj, stream.view(), mr.get_mr() + dtype.c_obj, _cs, mr.get_mr() ) - (c_obj.get()).set_value(np_val, stream.view()) + (c_obj.get()).set_value(np_val, _cs) cdef Scalar slr = _new_scalar(move(c_obj), dtype) return slr @_from_numpy.register(np.uint8) def _(np_val, stream: Stream, mr: DeviceMemoryResource): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() dtype = DataType(type_id.UINT8) cdef unique_ptr[scalar] c_obj = make_numeric_scalar( - dtype.c_obj, stream.view(), mr.get_mr() + dtype.c_obj, _cs, mr.get_mr() ) - (c_obj.get()).set_value(np_val, stream.view()) + (c_obj.get()).set_value(np_val, _cs) cdef Scalar slr = _new_scalar(move(c_obj), dtype) return slr @_from_numpy.register(np.uint16) def _(np_val, stream: Stream, mr: DeviceMemoryResource): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() dtype = DataType(type_id.UINT16) cdef unique_ptr[scalar] c_obj = make_numeric_scalar( - dtype.c_obj, stream.view(), mr.get_mr() + dtype.c_obj, _cs, mr.get_mr() ) - (c_obj.get()).set_value(np_val, stream.view()) + (c_obj.get()).set_value(np_val, _cs) cdef Scalar slr = _new_scalar(move(c_obj), dtype) return slr @_from_numpy.register(np.uint32) def _(np_val, stream: Stream, mr: DeviceMemoryResource): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() dtype = DataType(type_id.UINT32) cdef unique_ptr[scalar] c_obj = make_numeric_scalar( - dtype.c_obj, stream.view(), mr.get_mr() + dtype.c_obj, _cs, mr.get_mr() ) - (c_obj.get()).set_value(np_val, stream.view()) + (c_obj.get()).set_value(np_val, _cs) cdef Scalar slr = _new_scalar(move(c_obj), dtype) return slr @_from_numpy.register(np.uint64) def _(np_val, stream: Stream, mr: DeviceMemoryResource): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() dtype = DataType(type_id.UINT64) cdef unique_ptr[scalar] c_obj = make_numeric_scalar( - dtype.c_obj, stream.view(), mr.get_mr() + dtype.c_obj, _cs, mr.get_mr() ) - (c_obj.get()).set_value(np_val, stream.view()) + (c_obj.get()).set_value(np_val, _cs) cdef Scalar slr = _new_scalar(move(c_obj), dtype) return slr @_from_numpy.register(np.float32) def _(np_val, stream: Stream, mr: DeviceMemoryResource): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() dtype = DataType(type_id.FLOAT32) cdef unique_ptr[scalar] c_obj = make_numeric_scalar( - dtype.c_obj, stream.view(), mr.get_mr() + dtype.c_obj, _cs, mr.get_mr() ) - (c_obj.get()).set_value(np_val, stream.view()) + (c_obj.get()).set_value(np_val, _cs) cdef Scalar slr = _new_scalar(move(c_obj), dtype) return slr @_from_numpy.register(np.float64) def _(np_val, stream: Stream, mr: DeviceMemoryResource): + cdef Stream _stream = stream + cdef cudaStream_t _cs = _stream.view().value() dtype = DataType(type_id.FLOAT64) cdef unique_ptr[scalar] c_obj = make_numeric_scalar( - dtype.c_obj, stream.view(), mr.get_mr() + dtype.c_obj, _cs, mr.get_mr() ) - (c_obj.get()).set_value(np_val, stream.view()) + (c_obj.get()).set_value(np_val, _cs) cdef Scalar slr = _new_scalar(move(c_obj), dtype) return slr diff --git a/python/pylibcudf/pylibcudf/search.pxd b/python/pylibcudf/pylibcudf/search.pxd index 7b0725bf60b..c26a6689240 100644 --- a/python/pylibcudf/pylibcudf/search.pxd +++ b/python/pylibcudf/pylibcudf/search.pxd @@ -1,7 +1,6 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column @@ -13,7 +12,7 @@ cpdef Column lower_bound( Table needles, list column_order, list null_precedence, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -22,10 +21,10 @@ cpdef Column upper_bound( Table needles, list column_order, list null_precedence, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) cpdef Column contains( - Column haystack, Column needles, Stream stream = *, DeviceMemoryResource mr = * + Column haystack, Column needles, object stream = *, DeviceMemoryResource mr = * ) diff --git a/python/pylibcudf/pylibcudf/search.pyi b/python/pylibcudf/pylibcudf/search.pyi index eaec283a32a..6cc58946f56 100644 --- a/python/pylibcudf/pylibcudf/search.pyi +++ b/python/pylibcudf/pylibcudf/search.pyi @@ -1,19 +1,19 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.table import Table from pylibcudf.types import NullOrder, Order +from pylibcudf.utils import CudaStreamLike def lower_bound( haystack: Table, needles: Table, column_order: list[Order], null_precedence: list[NullOrder], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def upper_bound( @@ -21,12 +21,12 @@ def upper_bound( needles: Table, column_order: list[Order], null_precedence: list[NullOrder], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def contains( haystack: Column, needles: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/search.pyx b/python/pylibcudf/pylibcudf/search.pyx index 4915b1b8be9..885d25f2d49 100644 --- a/python/pylibcudf/pylibcudf/search.pyx +++ b/python/pylibcudf/pylibcudf/search.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -13,6 +13,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column from .table cimport Table from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["contains", "lower_bound", "upper_bound"] @@ -21,7 +22,7 @@ cpdef Column lower_bound( Table needles, list column_order, list null_precedence, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Find smallest indices in haystack where needles may be inserted to retain order. @@ -52,7 +53,8 @@ cpdef Column lower_bound( cdef vector[order] c_orders = column_order cdef vector[null_order] c_null_precedence = null_precedence - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -61,10 +63,10 @@ cpdef Column lower_bound( needles.view(), c_orders, c_null_precedence, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column upper_bound( @@ -72,7 +74,7 @@ cpdef Column upper_bound( Table needles, list column_order, list null_precedence, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Find largest indices in haystack where needles may be inserted to retain order. @@ -103,7 +105,8 @@ cpdef Column upper_bound( cdef vector[order] c_orders = column_order cdef vector[null_order] c_null_precedence = null_precedence - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -112,14 +115,14 @@ cpdef Column upper_bound( needles.view(), c_orders, c_null_precedence, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column contains( - Column haystack, Column needles, Stream stream=None, DeviceMemoryResource mr=None + Column haystack, Column needles, object stream=None, DeviceMemoryResource mr=None ): """Check whether needles are present in haystack. @@ -143,14 +146,15 @@ cpdef Column contains( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_search.contains( haystack.view(), needles.view(), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/sorting.pxd b/python/pylibcudf/pylibcudf/sorting.pxd index 701b6803c34..a081ece747a 100644 --- a/python/pylibcudf/pylibcudf/sorting.pxd +++ b/python/pylibcudf/pylibcudf/sorting.pxd @@ -1,11 +1,10 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool from pylibcudf.libcudf.aggregation cimport rank_method from pylibcudf.libcudf.types cimport null_order, null_policy, order, size_type from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream from .column cimport Column from .table cimport Table @@ -15,7 +14,7 @@ cpdef Column sorted_order( Table source_table, list column_order, list null_precedence, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -23,7 +22,7 @@ cpdef Column stable_sorted_order( Table source_table, list column_order, list null_precedence, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -34,12 +33,12 @@ cpdef Column rank( null_policy null_handling, null_order null_precedence, bool percentage, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef bool is_sorted( - Table table, list column_order, list null_precedence, Stream stream=* + Table table, list column_order, list null_precedence, object stream = * ) cpdef Table segmented_sort_by_key( @@ -48,7 +47,7 @@ cpdef Table segmented_sort_by_key( Column segment_offsets, list column_order, list null_precedence, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -58,7 +57,7 @@ cpdef Table stable_segmented_sort_by_key( Column segment_offsets, list column_order, list null_precedence, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -67,7 +66,7 @@ cpdef Table sort_by_key( Table keys, list column_order, list null_precedence, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -76,7 +75,7 @@ cpdef Table stable_sort_by_key( Table keys, list column_order, list null_precedence, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -84,7 +83,7 @@ cpdef Table sort( Table source_table, list column_order, list null_precedence, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -92,7 +91,7 @@ cpdef Table stable_sort( Table source_table, list column_order, list null_precedence, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -100,7 +99,7 @@ cpdef Column top_k( Column col, size_type k, order sort_order=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -108,6 +107,6 @@ cpdef Column top_k_order( Column col, size_type k, order sort_order=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/sorting.pyi b/python/pylibcudf/pylibcudf/sorting.pyi index 8f00fcade6e..a06586a8f39 100644 --- a/python/pylibcudf/pylibcudf/sorting.pyi +++ b/python/pylibcudf/pylibcudf/sorting.pyi @@ -1,26 +1,26 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.aggregation import RankMethod from pylibcudf.column import Column from pylibcudf.table import Table from pylibcudf.types import NullOrder, NullPolicy, Order +from pylibcudf.utils import CudaStreamLike def sorted_order( source_table: Table, column_order: list[Order], null_precedence: list[NullOrder], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def stable_sorted_order( source_table: Table, column_order: list[Order], null_precedence: list[NullOrder], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def rank( @@ -30,14 +30,14 @@ def rank( null_handling: NullPolicy, null_precedence: NullOrder, percentage: bool, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_sorted( tbl: Table, column_order: list[Order], null_precedence: list[NullOrder], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> bool: ... def segmented_sort_by_key( values: Table, @@ -45,7 +45,7 @@ def segmented_sort_by_key( segment_offsets: Column, column_order: list[Order], null_precedence: list[NullOrder], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def stable_segmented_sort_by_key( @@ -54,7 +54,7 @@ def stable_segmented_sort_by_key( segment_offsets: Column, column_order: list[Order], null_precedence: list[NullOrder], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def sort_by_key( @@ -62,7 +62,7 @@ def sort_by_key( keys: Table, column_order: list[Order], null_precedence: list[NullOrder], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def stable_sort_by_key( @@ -70,34 +70,34 @@ def stable_sort_by_key( keys: Table, column_order: list[Order], null_precedence: list[NullOrder], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def sort( source_table: Table, column_order: list[Order], null_precedence: list[NullOrder], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def stable_sort( source_table: Table, column_order: list[Order], null_precedence: list[NullOrder], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def top_k( col: Column, k: int, sort_order: Order = Order.DESCENDING, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def top_k_order( col: Column, k: int, sort_order: Order = Order.DESCENDING, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/sorting.pyx b/python/pylibcudf/pylibcudf/sorting.pyx index be668ff2526..fa0ed78b709 100644 --- a/python/pylibcudf/pylibcudf/sorting.pyx +++ b/python/pylibcudf/pylibcudf/sorting.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -15,6 +15,7 @@ from rmm.pylibrmm.stream cimport Stream from .column cimport Column from .table cimport Table from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "is_sorted", @@ -33,7 +34,7 @@ cpdef Column sorted_order( Table source_table, list column_order, list null_precedence, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes the row indices required to sort the table. @@ -58,7 +59,8 @@ cpdef Column sorted_order( cdef vector[order] c_orders = column_order cdef vector[null_order] c_null_precedence = null_precedence - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -66,17 +68,17 @@ cpdef Column sorted_order( source_table.view(), c_orders, c_null_precedence, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column stable_sorted_order( Table source_table, list column_order, list null_precedence, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes the row indices required to sort the table, @@ -102,7 +104,8 @@ cpdef Column stable_sorted_order( cdef vector[order] c_orders = column_order cdef vector[null_order] c_null_precedence = null_precedence - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -110,10 +113,10 @@ cpdef Column stable_sorted_order( source_table.view(), c_orders, c_null_precedence, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column rank( @@ -123,7 +126,7 @@ cpdef Column rank( null_policy null_handling, null_order null_precedence, bool percentage, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Computes the rank of each element in the column. @@ -152,7 +155,8 @@ cpdef Column rank( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -163,14 +167,14 @@ cpdef Column rank( null_handling, null_precedence, percentage, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef bool is_sorted( - Table tbl, list column_order, list null_precedence, Stream stream=None + Table tbl, list column_order, list null_precedence, object stream=None ): """Checks if the table is sorted. @@ -194,14 +198,15 @@ cpdef bool is_sorted( cdef vector[order] c_orders = column_order cdef vector[null_order] c_null_precedence = null_precedence - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() with nogil: c_result = cpp_sorting.is_sorted( tbl.view(), c_orders, c_null_precedence, - stream.view() + _cs ) return c_result @@ -212,7 +217,7 @@ cpdef Table segmented_sort_by_key( Column segment_offsets, list column_order, list null_precedence, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Sorts the table by key, within segments. @@ -241,7 +246,8 @@ cpdef Table segmented_sort_by_key( cdef vector[order] c_orders = column_order cdef vector[null_order] c_null_precedence = null_precedence - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -251,10 +257,10 @@ cpdef Table segmented_sort_by_key( segment_offsets.view(), c_orders, c_null_precedence, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table stable_segmented_sort_by_key( @@ -263,7 +269,7 @@ cpdef Table stable_segmented_sort_by_key( Column segment_offsets, list column_order, list null_precedence, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Sorts the table by key preserving order of equal elements, @@ -293,7 +299,8 @@ cpdef Table stable_segmented_sort_by_key( cdef vector[order] c_orders = column_order cdef vector[null_order] c_null_precedence = null_precedence - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -303,10 +310,10 @@ cpdef Table stable_segmented_sort_by_key( segment_offsets.view(), c_orders, c_null_precedence, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table sort_by_key( @@ -314,7 +321,7 @@ cpdef Table sort_by_key( Table keys, list column_order, list null_precedence, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Sorts the table by key. @@ -341,7 +348,8 @@ cpdef Table sort_by_key( cdef vector[order] c_orders = column_order cdef vector[null_order] c_null_precedence = null_precedence - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -350,10 +358,10 @@ cpdef Table sort_by_key( keys.view(), c_orders, c_null_precedence, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table stable_sort_by_key( @@ -361,7 +369,7 @@ cpdef Table stable_sort_by_key( Table keys, list column_order, list null_precedence, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Sorts the table by key preserving order of equal elements. @@ -388,7 +396,8 @@ cpdef Table stable_sort_by_key( cdef vector[order] c_orders = column_order cdef vector[null_order] c_null_precedence = null_precedence - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -397,17 +406,17 @@ cpdef Table stable_sort_by_key( keys.view(), c_orders, c_null_precedence, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table sort( Table source_table, list column_order, list null_precedence, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Sorts the table. @@ -432,7 +441,8 @@ cpdef Table sort( cdef vector[order] c_orders = column_order cdef vector[null_order] c_null_precedence = null_precedence - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -440,17 +450,17 @@ cpdef Table sort( source_table.view(), c_orders, c_null_precedence, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table stable_sort( Table source_table, list column_order, list null_precedence, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Sorts the table preserving order of equal elements. @@ -475,7 +485,8 @@ cpdef Table stable_sort( cdef vector[order] c_orders = column_order cdef vector[null_order] c_null_precedence = null_precedence - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -483,17 +494,17 @@ cpdef Table stable_sort( source_table.view(), c_orders, c_null_precedence, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Column top_k( Column col, size_type k, order sort_order = order.DESCENDING, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -518,7 +529,8 @@ cpdef Column top_k( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -526,17 +538,17 @@ cpdef Column top_k( col.view(), k, sort_order, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column top_k_order( Column col, size_type k, order sort_order = order.DESCENDING, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -564,7 +576,8 @@ cpdef Column top_k_order( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -572,7 +585,7 @@ cpdef Column top_k_order( col.view(), k, sort_order, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pxd b/python/pylibcudf/pylibcudf/stream_compaction.pxd index 03b463f5f3a..6e904e11ce1 100644 --- a/python/pylibcudf/pylibcudf/stream_compaction.pxd +++ b/python/pylibcudf/pylibcudf/stream_compaction.pxd @@ -8,7 +8,6 @@ from pylibcudf.libcudf.types cimport ( size_type, ) from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream from .column cimport Column from .expressions cimport Expression @@ -19,7 +18,7 @@ cpdef Table drop_nulls( Table source_table, list keys, size_type keep_threshold, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -27,14 +26,14 @@ cpdef Table drop_nans( Table source_table, list keys, size_type keep_threshold, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) cpdef Table apply_boolean_mask( Table source_table, Column boolean_mask, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -43,7 +42,7 @@ cpdef Table unique( list keys, duplicate_keep_option keep, null_equality nulls_equal, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -53,7 +52,7 @@ cpdef Table distinct( duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -62,7 +61,7 @@ cpdef Column distinct_indices( duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -72,7 +71,7 @@ cpdef Table stable_distinct( duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -80,6 +79,6 @@ cpdef Table filter( Table predicate_table, Expression predicate_expr, Table filter_table, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pyi b/python/pylibcudf/pylibcudf/stream_compaction.pyi index 49c44f82486..afdd692dde2 100644 --- a/python/pylibcudf/pylibcudf/stream_compaction.pyi +++ b/python/pylibcudf/pylibcudf/stream_compaction.pyi @@ -4,12 +4,12 @@ from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.expressions import Expression from pylibcudf.table import Table from pylibcudf.types import NanEquality, NullEquality +from pylibcudf.utils import CudaStreamLike class DuplicateKeepOption(IntEnum): KEEP_ANY = ... @@ -21,20 +21,20 @@ def drop_nulls( source_table: Table, keys: list[int], keep_threshold: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def drop_nans( source_table: Table, keys: list[int], keep_threshold: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def apply_boolean_mask( source_table: Table, boolean_mask: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def unique( @@ -42,7 +42,7 @@ def unique( keys: list[int], keep: DuplicateKeepOption, nulls_equal: NullEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def distinct( @@ -51,7 +51,7 @@ def distinct( keep: DuplicateKeepOption, nulls_equal: NullEquality, nans_equal: NanEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def distinct_indices( @@ -59,7 +59,7 @@ def distinct_indices( keep: DuplicateKeepOption, nulls_equal: NullEquality, nans_equal: NanEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def stable_distinct( @@ -68,13 +68,13 @@ def stable_distinct( keep: DuplicateKeepOption, nulls_equal: NullEquality, nans_equal: NanEquality, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def filter( predicate_table: Table, predicate_expr: Expression, filter_table: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pyx b/python/pylibcudf/pylibcudf/stream_compaction.pyx index 4e676602cf8..b4751078acb 100644 --- a/python/pylibcudf/pylibcudf/stream_compaction.pyx +++ b/python/pylibcudf/pylibcudf/stream_compaction.pyx @@ -24,6 +24,7 @@ from .column cimport Column from .expressions cimport Expression from .table cimport Table from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "DuplicateKeepOption", @@ -41,7 +42,7 @@ cpdef Table drop_nulls( Table source_table, list keys, size_type keep_threshold, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Filters out rows from the input table based on the presence of nulls. @@ -65,21 +66,22 @@ cpdef Table drop_nulls( cdef unique_ptr[table] c_result cdef vector[size_type] c_keys = keys - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_stream_compaction.drop_nulls( - source_table.view(), c_keys, keep_threshold, stream.view(), mr.get_mr() + source_table.view(), c_keys, keep_threshold, _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table drop_nans( Table source_table, list keys, size_type keep_threshold, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Filters out rows from the input table based on the presence of NaNs. @@ -103,20 +105,21 @@ cpdef Table drop_nans( cdef unique_ptr[table] c_result cdef vector[size_type] c_keys = keys - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_stream_compaction.drop_nans( - source_table.view(), c_keys, keep_threshold, stream.view(), mr.get_mr() + source_table.view(), c_keys, keep_threshold, _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table apply_boolean_mask( Table source_table, Column boolean_mask, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Filters out rows from the input table based on a boolean mask. @@ -137,14 +140,15 @@ cpdef Table apply_boolean_mask( """ cdef unique_ptr[table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_stream_compaction.apply_boolean_mask( - source_table.view(), boolean_mask.view(), stream.view(), mr.get_mr() + source_table.view(), boolean_mask.view(), _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table unique( @@ -152,7 +156,7 @@ cpdef Table unique( list keys, duplicate_keep_option keep, null_equality nulls_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Filter duplicate consecutive rows from the input table. @@ -184,14 +188,15 @@ cpdef Table unique( cdef unique_ptr[table] c_result cdef vector[size_type] c_keys = keys - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_stream_compaction.unique( - input.view(), c_keys, keep, nulls_equal, stream.view(), mr.get_mr() + input.view(), c_keys, keep, nulls_equal, _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table distinct( @@ -200,7 +205,7 @@ cpdef Table distinct( duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Get the distinct rows from the input table. @@ -229,15 +234,16 @@ cpdef Table distinct( cdef unique_ptr[table] c_result cdef vector[size_type] c_keys = keys - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_stream_compaction.distinct( - input.view(), c_keys, keep, nulls_equal, nans_equal, stream.view(), + input.view(), c_keys, keep, nulls_equal, nans_equal, _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Column distinct_indices( @@ -245,7 +251,7 @@ cpdef Column distinct_indices( duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Get the indices of the distinct rows from the input table. @@ -270,14 +276,15 @@ cpdef Column distinct_indices( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_stream_compaction.distinct_indices( - input.view(), keep, nulls_equal, nans_equal, stream.view(), mr.get_mr() + input.view(), keep, nulls_equal, nans_equal, _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Table stable_distinct( @@ -286,7 +293,7 @@ cpdef Table stable_distinct( duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Get the distinct rows from the input table, preserving input order. @@ -315,22 +322,23 @@ cpdef Table stable_distinct( cdef unique_ptr[table] c_result cdef vector[size_type] c_keys = keys - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_stream_compaction.stable_distinct( - input.view(), c_keys, keep, nulls_equal, nans_equal, stream.view(), + input.view(), c_keys, keep, nulls_equal, nans_equal, _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table filter( Table predicate_table, Expression predicate_expr, Table filter_table, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Filters a table using a predicate expression. @@ -353,7 +361,8 @@ cpdef Table filter( """ cdef unique_ptr[table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -361,9 +370,9 @@ cpdef Table filter( predicate_table.view(), dereference(predicate_expr.c_obj.get()), filter_table.view(), - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) DuplicateKeepOption.__str__ = DuplicateKeepOption.__repr__ diff --git a/python/pylibcudf/pylibcudf/strings/attributes.pxd b/python/pylibcudf/pylibcudf/strings/attributes.pxd index 68b1ce9b5a0..64533b1ce3d 100644 --- a/python/pylibcudf/pylibcudf/strings/attributes.pxd +++ b/python/pylibcudf/pylibcudf/strings/attributes.pxd @@ -1,19 +1,18 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column count_characters( - Column source_strings, Stream stream=*, DeviceMemoryResource mr=* + Column source_strings, object stream = *, DeviceMemoryResource mr=* ) cpdef Column count_bytes( - Column source_strings, Stream stream=*, DeviceMemoryResource mr=* + Column source_strings, object stream = *, DeviceMemoryResource mr=* ) cpdef Column code_points( - Column source_strings, Stream stream=*, DeviceMemoryResource mr=* + Column source_strings, object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/attributes.pyi b/python/pylibcudf/pylibcudf/strings/attributes.pyi index 06b76e669d3..2e28fb9f186 100644 --- a/python/pylibcudf/pylibcudf/strings/attributes.pyi +++ b/python/pylibcudf/pylibcudf/strings/attributes.pyi @@ -1,23 +1,23 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike def count_characters( source_strings: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def count_bytes( source_strings: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def code_points( source_strings: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/attributes.pyx b/python/pylibcudf/pylibcudf/strings/attributes.pyx index 2449d51122f..334270ea834 100644 --- a/python/pylibcudf/pylibcudf/strings/attributes.pyx +++ b/python/pylibcudf/pylibcudf/strings/attributes.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -9,11 +9,12 @@ from pylibcudf.libcudf.strings cimport attributes as cpp_attributes from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["code_points", "count_bytes", "count_characters"] cpdef Column count_characters( - Column source_strings, Stream stream=None, DeviceMemoryResource mr=None + Column source_strings, object stream=None, DeviceMemoryResource mr=None ): """ Returns a column containing character lengths of each string @@ -32,19 +33,20 @@ cpdef Column count_characters( New column with lengths for each string """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_attributes.count_characters( - source_strings.view(), stream.view(), mr.get_mr() + source_strings.view(), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column count_bytes( - Column source_strings, Stream stream=None, DeviceMemoryResource mr=None + Column source_strings, object stream=None, DeviceMemoryResource mr=None ): """ Returns a column containing byte lengths of each string @@ -63,19 +65,20 @@ cpdef Column count_bytes( New column with the number of bytes for each string """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_attributes.count_bytes( - source_strings.view(), stream.view(), mr.get_mr() + source_strings.view(), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column code_points( - Column source_strings, Stream stream=None, DeviceMemoryResource mr=None + Column source_strings, object stream=None, DeviceMemoryResource mr=None ): """ Creates a numeric column with code point values (integers) @@ -94,12 +97,13 @@ cpdef Column code_points( New column with code point integer values for each character """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_attributes.code_points( - source_strings.view(), stream.view(), mr.get_mr() + source_strings.view(), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/capitalize.pxd b/python/pylibcudf/pylibcudf/strings/capitalize.pxd index ccbe15b3794..1a68c29e05c 100644 --- a/python/pylibcudf/pylibcudf/strings/capitalize.pxd +++ b/python/pylibcudf/pylibcudf/strings/capitalize.pxd @@ -1,20 +1,19 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.scalar cimport Scalar from pylibcudf.libcudf.strings.char_types cimport string_character_types from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column capitalize( - Column input, Scalar delimiters=*, Stream stream=*, DeviceMemoryResource mr=* + Column input, Scalar delimiters=*, object stream = *, DeviceMemoryResource mr=* ) cpdef Column title( Column input, string_character_types sequence_type=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) -cpdef Column is_title(Column input, Stream stream=*, DeviceMemoryResource mr=*) +cpdef Column is_title(Column input, object stream = *, DeviceMemoryResource mr=*) diff --git a/python/pylibcudf/pylibcudf/strings/capitalize.pyi b/python/pylibcudf/pylibcudf/strings/capitalize.pyi index 35554e6fff3..031d244bf25 100644 --- a/python/pylibcudf/pylibcudf/strings/capitalize.pyi +++ b/python/pylibcudf/pylibcudf/strings/capitalize.pyi @@ -1,27 +1,27 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.strings.char_types import StringCharacterTypes +from pylibcudf.utils import CudaStreamLike def capitalize( input: Column, delimiters: Scalar | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def title( input: Column, sequence_type: StringCharacterTypes = StringCharacterTypes.ALPHA, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_title( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/capitalize.pyx b/python/pylibcudf/pylibcudf/strings/capitalize.pyx index 11291bd1243..be8c52a59b5 100644 --- a/python/pylibcudf/pylibcudf/strings/capitalize.pyx +++ b/python/pylibcudf/pylibcudf/strings/capitalize.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -17,13 +17,14 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream from cython.operator import dereference +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["capitalize", "is_title", "title"] cpdef Column capitalize( Column input, Scalar delimiters=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, # TODO: default scalar values # https://github.com/rapidsai/cudf/issues/15505 @@ -45,12 +46,13 @@ cpdef Column capitalize( Column of strings capitalized from the input column """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if delimiters is None: delimiters = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) cdef const string_scalar* cpp_delimiters = ( @@ -61,17 +63,17 @@ cpdef Column capitalize( c_result = cpp_capitalize.capitalize( input.view(), dereference(cpp_delimiters), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column title( Column input, string_character_types sequence_type=string_character_types.ALPHA, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Modifies first character of each word to upper-case and lower-cases @@ -92,17 +94,18 @@ cpdef Column title( Column of titled strings """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_capitalize.title( - input.view(), sequence_type, stream.view(), mr.get_mr() + input.view(), sequence_type, _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) -cpdef Column is_title(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column is_title(Column input, object stream=None, DeviceMemoryResource mr=None): """Checks if the strings in the input column are title formatted. For details, see :cpp:func:`is_title`. @@ -118,9 +121,10 @@ cpdef Column is_title(Column input, Stream stream=None, DeviceMemoryResource mr= Column of type BOOL8 """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_capitalize.is_title(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_capitalize.is_title(input.view(), _cs, mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/case.pxd b/python/pylibcudf/pylibcudf/strings/case.pxd index 8a959fb61d5..fea9f68e95e 100644 --- a/python/pylibcudf/pylibcudf/strings/case.pxd +++ b/python/pylibcudf/pylibcudf/strings/case.pxd @@ -1,11 +1,10 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream -cpdef Column to_lower(Column input, Stream stream=*, DeviceMemoryResource mr=*) -cpdef Column to_upper(Column input, Stream stream=*, DeviceMemoryResource mr=*) -cpdef Column swapcase(Column input, Stream stream=*, DeviceMemoryResource mr=*) +cpdef Column to_lower(Column input, object stream = *, DeviceMemoryResource mr=*) +cpdef Column to_upper(Column input, object stream = *, DeviceMemoryResource mr=*) +cpdef Column swapcase(Column input, object stream = *, DeviceMemoryResource mr=*) diff --git a/python/pylibcudf/pylibcudf/strings/case.pyi b/python/pylibcudf/pylibcudf/strings/case.pyi index ecdb614fcd7..1337e7df5a9 100644 --- a/python/pylibcudf/pylibcudf/strings/case.pyi +++ b/python/pylibcudf/pylibcudf/strings/case.pyi @@ -1,23 +1,23 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike def to_lower( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def to_upper( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def swapcase( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/case.pyx b/python/pylibcudf/pylibcudf/strings/case.pyx index 5e7d20f01f8..ec6539f42e1 100644 --- a/python/pylibcudf/pylibcudf/strings/case.pyx +++ b/python/pylibcudf/pylibcudf/strings/case.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -9,10 +9,11 @@ from pylibcudf.libcudf.strings cimport case as cpp_case from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["swapcase", "to_lower", "to_upper"] -cpdef Column to_lower(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column to_lower(Column input, object stream=None, DeviceMemoryResource mr=None): """Returns a column of lowercased strings. For details, see :cpp:func:`to_lower`. @@ -32,14 +33,15 @@ cpdef Column to_lower(Column input, Stream stream=None, DeviceMemoryResource mr= Column of strings lowercased from the input column """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_case.to_lower(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_case.to_lower(input.view(), _cs, mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) -cpdef Column to_upper(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column to_upper(Column input, object stream=None, DeviceMemoryResource mr=None): """Returns a column of uppercased strings. For details, see :cpp:func:`to_upper`. @@ -59,14 +61,15 @@ cpdef Column to_upper(Column input, Stream stream=None, DeviceMemoryResource mr= Column of strings uppercased from the input column """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_case.to_upper(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_case.to_upper(input.view(), _cs, mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) -cpdef Column swapcase(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column swapcase(Column input, object stream=None, DeviceMemoryResource mr=None): """Returns a column of strings where the lowercase characters are converted to uppercase and the uppercase characters are converted to lowercase. @@ -88,9 +91,10 @@ cpdef Column swapcase(Column input, Stream stream=None, DeviceMemoryResource mr= Column of strings """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_case.swapcase(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_case.swapcase(input.view(), _cs, mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/char_types.pxd b/python/pylibcudf/pylibcudf/strings/char_types.pxd index 009886f3e9f..59c045dba15 100644 --- a/python/pylibcudf/pylibcudf/strings/char_types.pxd +++ b/python/pylibcudf/pylibcudf/strings/char_types.pxd @@ -1,18 +1,17 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.libcudf.strings.char_types cimport string_character_types from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column all_characters_of_type( Column source_strings, string_character_types types, string_character_types verify_types, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) @@ -21,6 +20,6 @@ cpdef Column filter_characters_of_type( string_character_types types_to_remove, Scalar replacement, string_character_types types_to_keep, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/char_types.pyi b/python/pylibcudf/pylibcudf/strings/char_types.pyi index 12749d79f6d..1740a67eb00 100644 --- a/python/pylibcudf/pylibcudf/strings/char_types.pyi +++ b/python/pylibcudf/pylibcudf/strings/char_types.pyi @@ -1,13 +1,13 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike class StringCharacterTypes(IntEnum): DECIMAL = ... @@ -25,7 +25,7 @@ def all_characters_of_type( source_strings: Column, types: StringCharacterTypes, verify_types: StringCharacterTypes, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def filter_characters_of_type( @@ -33,6 +33,6 @@ def filter_characters_of_type( types_to_remove: StringCharacterTypes, replacement: Scalar, types_to_keep: StringCharacterTypes, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/char_types.pyx b/python/pylibcudf/pylibcudf/strings/char_types.pyx index 5cb5025798e..2567ab8ee4b 100644 --- a/python/pylibcudf/pylibcudf/strings/char_types.pyx +++ b/python/pylibcudf/pylibcudf/strings/char_types.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -14,6 +14,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream from cython.operator import dereference +from cuda.bindings.cyruntime cimport cudaStream_t from pylibcudf.libcudf.strings.char_types import \ string_character_types as StringCharacterTypes # no-cython-lint @@ -27,7 +28,7 @@ cpdef Column all_characters_of_type( Column source_strings, string_character_types types, string_character_types verify_types, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -50,7 +51,8 @@ cpdef Column all_characters_of_type( New column of boolean results for each string """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -58,18 +60,18 @@ cpdef Column all_characters_of_type( source_strings.view(), types, verify_types, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column filter_characters_of_type( Column source_strings, string_character_types types_to_remove, Scalar replacement, string_character_types types_to_keep, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -99,7 +101,8 @@ cpdef Column filter_characters_of_type( replacement.c_obj.get() ) cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -108,10 +111,10 @@ cpdef Column filter_characters_of_type( types_to_remove, dereference(c_replacement), types_to_keep, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) StringCharacterTypes.__str__ = StringCharacterTypes.__repr__ diff --git a/python/pylibcudf/pylibcudf/strings/combine.pxd b/python/pylibcudf/pylibcudf/strings/combine.pxd index b889169c7c7..32a58abdc23 100644 --- a/python/pylibcudf/pylibcudf/strings/combine.pxd +++ b/python/pylibcudf/pylibcudf/strings/combine.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column @@ -9,7 +9,6 @@ from pylibcudf.libcudf.strings.combine cimport ( from pylibcudf.scalar cimport Scalar from pylibcudf.table cimport Table from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream ctypedef fused ColumnOrScalar: Column @@ -21,7 +20,7 @@ cpdef Column concatenate( Scalar narep=*, Scalar col_narep=*, separator_on_nulls separate_nulls=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -29,7 +28,7 @@ cpdef Column join_strings( Column input, Scalar separator, Scalar narep, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -40,6 +39,6 @@ cpdef Column join_list_elements( Scalar string_narep, separator_on_nulls separate_nulls, output_if_empty_list empty_list_policy, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/strings/combine.pyi b/python/pylibcudf/pylibcudf/strings/combine.pyi index fa568046fa8..3186709996f 100644 --- a/python/pylibcudf/pylibcudf/strings/combine.pyi +++ b/python/pylibcudf/pylibcudf/strings/combine.pyi @@ -1,14 +1,14 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike class SeparatorOnNulls(IntEnum): YES = ... @@ -24,14 +24,14 @@ def concatenate( narep: Scalar | None = None, col_narep: Scalar | None = None, separate_nulls: SeparatorOnNulls = SeparatorOnNulls.YES, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def join_strings( input: Column, separator: Scalar, narep: Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def join_list_elements( @@ -41,6 +41,6 @@ def join_list_elements( string_narep: Scalar, separate_nulls: SeparatorOnNulls, empty_list_policy: OutputIfEmptyList, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/combine.pyx b/python/pylibcudf/pylibcudf/strings/combine.pyx index e570a18c585..82903002907 100644 --- a/python/pylibcudf/pylibcudf/strings/combine.pyx +++ b/python/pylibcudf/pylibcudf/strings/combine.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr from libcpp.utility cimport move @@ -16,6 +16,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream from cython.operator import dereference +from cuda.bindings.cyruntime cimport cudaStream_t from pylibcudf.libcudf.strings.combine import \ output_if_empty_list as OutputIfEmptyList # no-cython-lint from pylibcudf.libcudf.strings.combine import \ @@ -35,7 +36,7 @@ cpdef Column concatenate( Scalar narep=None, Scalar col_narep=None, separator_on_nulls separate_nulls=separator_on_nulls.YES, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -68,12 +69,13 @@ cpdef Column concatenate( cdef unique_ptr[column] c_result cdef const string_scalar* c_col_narep cdef const string_scalar* c_separator - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if narep is None: narep = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) cdef const string_scalar* c_narep = ( narep.c_obj.get() @@ -82,7 +84,7 @@ cpdef Column concatenate( if ColumnOrScalar is Column: if col_narep is None: col_narep = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) c_col_narep = ( col_narep.c_obj.get() @@ -95,7 +97,7 @@ cpdef Column concatenate( dereference(c_narep), dereference(c_col_narep), separate_nulls, - stream.view(), + _cs, mr.get_mr() ) ) @@ -112,20 +114,20 @@ cpdef Column concatenate( dereference(c_separator), dereference(c_narep), separate_nulls, - stream.view(), + _cs, mr.get_mr() ) ) else: raise ValueError("separator must be a Column or a Scalar") - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column join_strings( Column input, Scalar separator, Scalar narep, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -149,7 +151,8 @@ cpdef Column join_strings( New column containing one string """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) cdef const string_scalar* c_separator = ( separator.c_obj.get() @@ -163,12 +166,12 @@ cpdef Column join_strings( input.view(), dereference(c_separator), dereference(c_narep), - stream.view(), + _cs, mr.get_mr() ) ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column join_list_elements( @@ -178,7 +181,7 @@ cpdef Column join_list_elements( Scalar string_narep, separator_on_nulls separate_nulls, output_if_empty_list empty_list_policy, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -217,7 +220,8 @@ cpdef Column join_list_elements( New strings column with concatenated results """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) cdef const string_scalar* c_separator_narep = ( separator_narep.c_obj.get() @@ -237,7 +241,7 @@ cpdef Column join_list_elements( dereference(c_string_narep), separate_nulls, empty_list_policy, - stream.view(), + _cs, mr.get_mr() ) ) @@ -251,13 +255,13 @@ cpdef Column join_list_elements( dereference(c_separator_narep), separate_nulls, empty_list_policy, - stream.view(), + _cs, mr.get_mr() ) ) else: raise ValueError("separator must be a Column or a Scalar") - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) OutputIfEmptyList.__str__ = OutputIfEmptyList.__repr__ SeparatorOnNulls.__str__ = SeparatorOnNulls.__repr__ diff --git a/python/pylibcudf/pylibcudf/strings/contains.pxd b/python/pylibcudf/pylibcudf/strings/contains.pxd index b3b0f06efb5..585f2fac1ff 100644 --- a/python/pylibcudf/pylibcudf/strings/contains.pxd +++ b/python/pylibcudf/pylibcudf/strings/contains.pxd @@ -1,28 +1,27 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.strings.regex_program cimport RegexProgram from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column contains_re( - Column input, RegexProgram prog, Stream stream=*, DeviceMemoryResource mr=* + Column input, RegexProgram prog, object stream = *, DeviceMemoryResource mr=* ) cpdef Column count_re( - Column input, RegexProgram prog, Stream stream=*, DeviceMemoryResource mr=* + Column input, RegexProgram prog, object stream = *, DeviceMemoryResource mr=* ) cpdef Column matches_re( - Column input, RegexProgram prog, Stream stream=*, DeviceMemoryResource mr=* + Column input, RegexProgram prog, object stream = *, DeviceMemoryResource mr=* ) cpdef Column like( Column input, str pattern, str escape_character=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/strings/contains.pyi b/python/pylibcudf/pylibcudf/strings/contains.pyi index 3685cf5345a..b751ef0b24c 100644 --- a/python/pylibcudf/pylibcudf/strings/contains.pyi +++ b/python/pylibcudf/pylibcudf/strings/contains.pyi @@ -1,34 +1,34 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.strings.regex_program import RegexProgram +from pylibcudf.utils import CudaStreamLike def contains_re( input: Column, prog: RegexProgram, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def count_re( input: Column, prog: RegexProgram, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def matches_re( input: Column, prog: RegexProgram, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def like( input: Column, pattern: str, escape_character: str | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/contains.pyx b/python/pylibcudf/pylibcudf/strings/contains.pyx index 8fe74228854..495d1637d8a 100644 --- a/python/pylibcudf/pylibcudf/strings/contains.pyx +++ b/python/pylibcudf/pylibcudf/strings/contains.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr from libcpp.utility cimport move @@ -11,13 +11,14 @@ from pylibcudf.strings.regex_program cimport RegexProgram from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["contains_re", "count_re", "like", "matches_re"] cpdef Column contains_re( Column input, RegexProgram prog, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Returns a boolean column identifying rows which match the given @@ -39,24 +40,27 @@ cpdef Column contains_re( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() + if _stream is None: + _stream = _get_stream(None) mr = _get_memory_resource(mr) with nogil: result = cpp_contains.contains_re( input.view(), prog.c_obj.get()[0], - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column count_re( Column input, RegexProgram prog, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Returns the number of times the given regex_program's pattern @@ -78,24 +82,25 @@ cpdef Column count_re( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: result = cpp_contains.count_re( input.view(), prog.c_obj.get()[0], - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column matches_re( Column input, RegexProgram prog, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Returns a boolean column identifying rows which @@ -118,25 +123,26 @@ cpdef Column matches_re( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: result = cpp_contains.matches_re( input.view(), prog.c_obj.get()[0], - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column like( Column input, str pattern, str escape_character=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -161,7 +167,8 @@ cpdef Column like( New column of boolean results for each string """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if escape_character is None: @@ -175,9 +182,9 @@ cpdef Column like( input.view(), c_pattern, c_escape_character, - stream.view(), + _cs, mr.get_mr() ) - stream.synchronize() + _stream.synchronize() - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pxd index cc1206cf29b..0929544287f 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pxd +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pxd @@ -1,20 +1,19 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column to_booleans( - Column input, Scalar true_string, Stream stream=*, DeviceMemoryResource mr=* + Column input, Scalar true_string, object stream = *, DeviceMemoryResource mr=* ) cpdef Column from_booleans( Column booleans, Scalar true_string, Scalar false_string, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyi index 608b47bad8c..10c7b96bfc0 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyi +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyi @@ -1,22 +1,22 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike def to_booleans( input: Column, true_string: Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def from_booleans( booleans: Column, true_string: Scalar, false_string: Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx index 6f7965f8a3b..e8f963cf0f3 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -15,11 +15,12 @@ from pylibcudf.utils cimport _get_stream, _get_memory_resource from cython.operator import dereference from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["from_booleans", "to_booleans"] cpdef Column to_booleans( - Column input, Scalar true_string, Stream stream=None, DeviceMemoryResource mr=None + Column input, Scalar true_string, object stream=None, DeviceMemoryResource mr=None ): """ Returns a new bool column by parsing boolean values from the strings @@ -47,24 +48,25 @@ cpdef Column to_booleans( cdef const string_scalar* c_true_string = ( true_string.c_obj.get() ) - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_convert_booleans.to_booleans( input.view(), dereference(c_true_string), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column from_booleans( Column booleans, Scalar true_string, Scalar false_string, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -99,7 +101,8 @@ cpdef Column from_booleans( cdef const string_scalar* c_false_string = ( false_string.c_obj.get() ) - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -107,8 +110,8 @@ cpdef Column from_booleans( booleans.view(), dereference(c_true_string), dereference(c_false_string), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pxd index 407eb06ce6a..d0a5d2fc829 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pxd +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pxd @@ -1,18 +1,17 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.string cimport string from pylibcudf.column cimport Column from pylibcudf.types cimport DataType from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column to_timestamps( Column input, DataType timestamp_type, str format, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) @@ -20,13 +19,13 @@ cpdef Column from_timestamps( Column timestamps, str format, Column input_strings_names, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) cpdef Column is_timestamp( Column input, str format, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyi index 5fdc863705d..99f067ecb04 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyi +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyi @@ -1,29 +1,29 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike def to_timestamps( input: Column, timestamp_type: DataType, format: str, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def from_timestamps( timestamps: Column, format: str, input_strings_names: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_timestamp( input: Column, format: str, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx index 07b35de7c54..633445a7383 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -14,6 +14,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream from pylibcudf.types import DataType +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["from_timestamps", "is_timestamp", "to_timestamps"] @@ -21,7 +22,7 @@ cpdef Column to_timestamps( Column input, DataType timestamp_type, str format, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -51,24 +52,25 @@ cpdef Column to_timestamps( """ cdef unique_ptr[column] c_result cdef string c_format = format.encode() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_convert_datetime.to_timestamps( input.view(), timestamp_type.c_obj, c_format, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column from_timestamps( Column timestamps, str format, Column input_strings_names, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -98,23 +100,24 @@ cpdef Column from_timestamps( """ cdef unique_ptr[column] c_result cdef string c_format = format.encode() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_convert_datetime.from_timestamps( timestamps.view(), c_format, input_strings_names.view(), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column is_timestamp( Column input, str format, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -141,14 +144,15 @@ cpdef Column is_timestamp( """ cdef unique_ptr[column] c_result cdef string c_format = format.encode() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_convert_datetime.is_timestamp( input.view(), c_format, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd index 62b372d0af4..a912d939a83 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd @@ -1,24 +1,23 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.string cimport string from pylibcudf.column cimport Column from pylibcudf.types cimport DataType from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column to_durations( Column input, DataType duration_type, str format, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) cpdef Column from_durations( Column durations, str format=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyi index 95ba392ec94..ac9fd9825dc 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyi +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyi @@ -1,22 +1,22 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike def to_durations( input: Column, duration_type: DataType, format: str, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def from_durations( durations: Column, format: str | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx index 9bf8eb96009..548df7398b4 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -14,6 +14,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream from pylibcudf.types import DataType +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["from_durations", "to_durations"] @@ -21,7 +22,7 @@ cpdef Column to_durations( Column input, DataType duration_type, str format, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """ @@ -51,7 +52,8 @@ cpdef Column to_durations( """ cdef unique_ptr[column] c_result cdef string c_format = format.encode() - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -59,16 +61,16 @@ cpdef Column to_durations( input.view(), duration_type.c_obj, c_format, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column from_durations( Column durations, str format=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ): """ @@ -95,7 +97,8 @@ cpdef Column from_durations( New strings column with formatted durations. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if format is None: @@ -106,8 +109,8 @@ cpdef Column from_durations( c_result = cpp_convert_durations.from_durations( durations.view(), c_format, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd index 046556db181..439f8884008 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd @@ -1,26 +1,25 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.types cimport DataType from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column to_fixed_point( Column input, DataType output_type, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) cpdef Column from_fixed_point( - Column input, Stream stream=*, DeviceMemoryResource mr=* + Column input, object stream = *, DeviceMemoryResource mr=* ) cpdef Column is_fixed_point( Column input, DataType decimal_type=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyi index 7269f970069..a9d4a0eac98 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyi +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyi @@ -1,26 +1,26 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike def to_fixed_point( input: Column, output_type: DataType, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def from_fixed_point( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_fixed_point( input: Column, decimal_type: DataType | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx index 13020a5ee73..059373790c5 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -12,12 +12,13 @@ from pylibcudf.types cimport DataType, type_id from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["from_fixed_point", "is_fixed_point", "to_fixed_point"] cpdef Column to_fixed_point( - Column input, DataType output_type, Stream stream=None, DeviceMemoryResource mr=None + Column input, DataType output_type, object stream=None, DeviceMemoryResource mr=None ): """ Returns a new fixed-point column parsing decimal values from the @@ -42,21 +43,22 @@ cpdef Column to_fixed_point( New column of output_type. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_fixed_point.to_fixed_point( input.view(), output_type.c_obj, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column from_fixed_point( - Column input, Stream stream=None, DeviceMemoryResource mr=None + Column input, object stream=None, DeviceMemoryResource mr=None ): """ Returns a new strings column converting the fixed-point values @@ -78,20 +80,21 @@ cpdef Column from_fixed_point( New strings column. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_fixed_point.from_fixed_point( - input.view(), stream.view(), mr.get_mr() + input.view(), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column is_fixed_point( Column input, DataType decimal_type=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -118,7 +121,8 @@ cpdef Column is_fixed_point( New column of boolean results for each string. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if decimal_type is None: @@ -128,8 +132,8 @@ cpdef Column is_fixed_point( c_result = cpp_fixed_point.is_fixed_point( input.view(), decimal_type.c_obj, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pxd index a2b98fa0b74..0d394fa1fe7 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pxd +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pxd @@ -1,16 +1,15 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.types cimport DataType from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column to_floats( - Column strings, DataType output_type, Stream stream=*, DeviceMemoryResource mr=* + Column strings, DataType output_type, object stream = *, DeviceMemoryResource mr=* ) -cpdef Column from_floats(Column floats, Stream stream=*, DeviceMemoryResource mr=*) +cpdef Column from_floats(Column floats, object stream = *, DeviceMemoryResource mr=*) -cpdef Column is_float(Column input, Stream stream=*, DeviceMemoryResource mr=*) +cpdef Column is_float(Column input, object stream = *, DeviceMemoryResource mr=*) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyi index b5c8d7e7497..b334dfef9c7 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyi +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyi @@ -1,25 +1,25 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike def to_floats( strings: Column, output_type: DataType, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def from_floats( floats: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_float( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx index 59ac17a3e1c..d4901ce7be6 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -12,13 +12,14 @@ from pylibcudf.types cimport DataType from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["from_floats", "is_float", "to_floats"] cpdef Column to_floats( Column strings, DataType output_type, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -44,22 +45,23 @@ cpdef Column to_floats( New column with floats converted from strings. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_convert_floats.to_floats( strings.view(), output_type.c_obj, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column from_floats( - Column floats, Stream stream=None, DeviceMemoryResource mr=None + Column floats, object stream=None, DeviceMemoryResource mr=None ): """ Returns a new strings column converting the float values from the @@ -81,18 +83,19 @@ cpdef Column from_floats( New strings column with floats as strings. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_convert_floats.from_floats( - floats.view(), stream.view(), mr.get_mr() + floats.view(), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) -cpdef Column is_float(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column is_float(Column input, object stream=None, DeviceMemoryResource mr=None): """ Returns a boolean column identifying strings in which all characters are valid for conversion to floats. @@ -113,10 +116,13 @@ cpdef Column is_float(Column input, Stream stream=None, DeviceMemoryResource mr= New column of boolean results for each string. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_convert_floats.is_float(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_convert_floats.is_float( + input.view(), _cs, mr.get_mr() + ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pxd index 376081e9b20..059e8c31f19 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pxd +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pxd @@ -1,32 +1,31 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.types cimport DataType from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column to_integers( - Column input, DataType output_type, Stream stream=*, DeviceMemoryResource mr=* + Column input, DataType output_type, object stream = *, DeviceMemoryResource mr=* ) cpdef Column from_integers( - Column integers, Stream stream=*, DeviceMemoryResource mr=* + Column integers, object stream = *, DeviceMemoryResource mr=* ) cpdef Column is_integer( - Column input, DataType int_type=*, Stream stream=*, DeviceMemoryResource mr=* + Column input, DataType int_type=*, object stream = *, DeviceMemoryResource mr=* ) cpdef Column hex_to_integers( - Column input, DataType output_type, Stream stream=*, DeviceMemoryResource mr=* + Column input, DataType output_type, object stream = *, DeviceMemoryResource mr=* ) cpdef Column is_hex( - Column input, Stream stream=*, DeviceMemoryResource mr=* + Column input, object stream = *, DeviceMemoryResource mr=* ) cpdef Column integers_to_hex( - Column input, Stream stream=*, DeviceMemoryResource mr=* + Column input, object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyi index 4625ee5e883..88a66350466 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyi +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyi @@ -1,42 +1,42 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike def to_integers( input: Column, output_type: DataType, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def from_integers( integers: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_integer( input: Column, int_type: DataType | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def hex_to_integers( input: Column, output_type: DataType, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_hex( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def integers_to_hex( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyx index c5945e5e1e5..b717ddbbcda 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_integers.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -12,6 +12,7 @@ from pylibcudf.types cimport DataType from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "from_integers", @@ -23,7 +24,7 @@ __all__ = [ ] cpdef Column to_integers( - Column input, DataType output_type, Stream stream=None, DeviceMemoryResource mr=None + Column input, DataType output_type, object stream=None, DeviceMemoryResource mr=None ): """ Returns a new integer numeric column parsing integer values from the @@ -48,7 +49,8 @@ cpdef Column to_integers( New column with integers converted from strings. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -56,16 +58,16 @@ cpdef Column to_integers( cpp_convert_integers.to_integers( input.view(), output_type.c_obj, - stream.view(), + _cs, mr.get_mr() ) ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column from_integers( - Column integers, Stream stream=None, DeviceMemoryResource mr=None + Column integers, object stream=None, DeviceMemoryResource mr=None ): """ Returns a new strings column converting the integer values from the @@ -87,25 +89,26 @@ cpdef Column from_integers( New strings column with integers as strings. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = move( cpp_convert_integers.from_integers( integers.view(), - stream.view(), + _cs, mr.get_mr() ) ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column is_integer( Column input, DataType int_type=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -133,7 +136,8 @@ cpdef Column is_integer( New column of boolean results for each string. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if int_type is None: @@ -141,7 +145,7 @@ cpdef Column is_integer( c_result = move( cpp_convert_integers.is_integer( input.view(), - stream.view(), + _cs, mr.get_mr() ) ) @@ -151,16 +155,16 @@ cpdef Column is_integer( cpp_convert_integers.is_integer( input.view(), int_type.c_obj, - stream.view(), + _cs, mr.get_mr() ) ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column hex_to_integers( - Column input, DataType output_type, Stream stream=None, DeviceMemoryResource mr=None + Column input, DataType output_type, object stream=None, DeviceMemoryResource mr=None ): """ Returns a new integer numeric column parsing hexadecimal values @@ -185,7 +189,8 @@ cpdef Column hex_to_integers( New column with integers converted from strings. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -193,15 +198,15 @@ cpdef Column hex_to_integers( cpp_convert_integers.hex_to_integers( input.view(), output_type.c_obj, - stream.view(), + _cs, mr.get_mr() ) ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) -cpdef Column is_hex(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column is_hex(Column input, object stream=None, DeviceMemoryResource mr=None): """ Returns a boolean column identifying strings in which all characters are valid for conversion to integers from hex. @@ -222,23 +227,24 @@ cpdef Column is_hex(Column input, Stream stream=None, DeviceMemoryResource mr=No New column of boolean results for each string. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = move( cpp_convert_integers.is_hex( input.view(), - stream.view(), + _cs, mr.get_mr() ) ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column integers_to_hex( - Column input, Stream stream=None, DeviceMemoryResource mr=None + Column input, object stream=None, DeviceMemoryResource mr=None ): """ Returns a new strings column converting integer columns to hexadecimal @@ -260,16 +266,17 @@ cpdef Column integers_to_hex( New strings column with hexadecimal characters. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = move( cpp_convert_integers.integers_to_hex( input.view(), - stream.view(), + _cs, mr.get_mr() ) ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pxd index 53a3927af41..04df2862c31 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pxd +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pxd @@ -1,19 +1,18 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column ipv4_to_integers( - Column input, Stream stream=*, DeviceMemoryResource mr=* + Column input, object stream = *, DeviceMemoryResource mr=* ) cpdef Column integers_to_ipv4( - Column integers, Stream stream=*, DeviceMemoryResource mr=* + Column integers, object stream = *, DeviceMemoryResource mr=* ) cpdef Column is_ipv4( - Column input, Stream stream=*, DeviceMemoryResource mr=* + Column input, object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyi index 86a969a4021..16e4d8d990a 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyi +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyi @@ -1,23 +1,23 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike def ipv4_to_integers( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def integers_to_ipv4( integers: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_ipv4( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx index 72021e85a9d..45b98190aa7 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -9,11 +9,12 @@ from pylibcudf.libcudf.strings.convert cimport convert_ipv4 as cpp_convert_ipv4 from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["integers_to_ipv4", "ipv4_to_integers", "is_ipv4"] cpdef Column ipv4_to_integers( - Column input, Stream stream=None, DeviceMemoryResource mr=None + Column input, object stream=None, DeviceMemoryResource mr=None ): """ Converts IPv4 addresses into integers. @@ -34,19 +35,20 @@ cpdef Column ipv4_to_integers( New uint32 column converted from strings. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_convert_ipv4.ipv4_to_integers( - input.view(), stream.view(), mr.get_mr() + input.view(), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column integers_to_ipv4( - Column integers, Stream stream=None, DeviceMemoryResource mr=None + Column integers, object stream=None, DeviceMemoryResource mr=None ): """ Converts integers into IPv4 addresses as strings. @@ -67,18 +69,19 @@ cpdef Column integers_to_ipv4( New strings column. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_convert_ipv4.integers_to_ipv4( - integers.view(), stream.view(), mr.get_mr() + integers.view(), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) -cpdef Column is_ipv4(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column is_ipv4(Column input, object stream=None, DeviceMemoryResource mr=None): """ Returns a boolean column identifying strings in which all characters are valid for conversion to integers from IPv4 format. @@ -99,10 +102,11 @@ cpdef Column is_ipv4(Column input, Stream stream=None, DeviceMemoryResource mr=N New column of boolean results for each string. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_convert_ipv4.is_ipv4(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_convert_ipv4.is_ipv4(input.view(), _cs, mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pxd index a2dcc15dacd..c25cf9d7146 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pxd +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pxd @@ -1,16 +1,15 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column format_list_column( Column input, Scalar na_rep=*, Column separators=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyi index cf301dd9a1b..29f94a30123 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyi +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyi @@ -1,16 +1,16 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike def format_list_column( input: Column, na_rep: Scalar | None = None, separators: Column | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx index 79648efcc3f..9c8f9d7b02e 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -20,6 +20,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream from cython.operator import dereference +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["format_list_column"] @@ -27,7 +28,7 @@ cpdef Column format_list_column( Column input, Scalar na_rep=None, Column separators=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -58,12 +59,13 @@ cpdef Column format_list_column( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if na_rep is None: na_rep = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) cdef const string_scalar* c_na_rep = ( @@ -78,8 +80,8 @@ cpdef Column format_list_column( input.view(), dereference(c_na_rep), separators.view(), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pxd index dce44f5e547..56b1f803d38 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pxd +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pxd @@ -1,15 +1,14 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column url_encode( - Column Input, Stream stream=*, DeviceMemoryResource mr=* + Column Input, object stream = *, DeviceMemoryResource mr=* ) cpdef Column url_decode( - Column Input, Stream stream=*, DeviceMemoryResource mr=* + Column Input, object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi index 6a248cdc974..8707da953b5 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyi @@ -1,18 +1,18 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike def url_encode( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def url_decode( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx index 30ca51f27f7..efe009e6c02 100644 --- a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx +++ b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -10,10 +10,11 @@ from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["url_decode", "url_encode"] -cpdef Column url_encode(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column url_encode(Column input, object stream=None, DeviceMemoryResource mr=None): """ Encodes each string using URL encoding. @@ -33,16 +34,19 @@ cpdef Column url_encode(Column input, Stream stream=None, DeviceMemoryResource m New strings column. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_convert_urls.url_encode(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_convert_urls.url_encode( + input.view(), _cs, mr.get_mr() + ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) -cpdef Column url_decode(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column url_decode(Column input, object stream=None, DeviceMemoryResource mr=None): """ Decodes each string using URL encoding. @@ -62,10 +66,13 @@ cpdef Column url_decode(Column input, Stream stream=None, DeviceMemoryResource m New strings column. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_convert_urls.url_decode(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_convert_urls.url_decode( + input.view(), _cs, mr.get_mr() + ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/extract.pxd b/python/pylibcudf/pylibcudf/strings/extract.pxd index c8fcb900d2b..85f722970c8 100644 --- a/python/pylibcudf/pylibcudf/strings/extract.pxd +++ b/python/pylibcudf/pylibcudf/strings/extract.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column @@ -6,21 +6,20 @@ from pylibcudf.strings.regex_program cimport RegexProgram from pylibcudf.table cimport Table from pylibcudf.libcudf.types cimport size_type from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Table extract( - Column input, RegexProgram prog, Stream stream=*, DeviceMemoryResource mr=* + Column input, RegexProgram prog, object stream = *, DeviceMemoryResource mr=* ) cpdef Column extract_all_record( - Column input, RegexProgram prog, Stream stream=*, DeviceMemoryResource mr=* + Column input, RegexProgram prog, object stream = *, DeviceMemoryResource mr=* ) cpdef Column extract_single( Column input, RegexProgram prog, size_type group, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/strings/extract.pyi b/python/pylibcudf/pylibcudf/strings/extract.pyi index 853420a8091..a9607266bbc 100644 --- a/python/pylibcudf/pylibcudf/strings/extract.pyi +++ b/python/pylibcudf/pylibcudf/strings/extract.pyi @@ -1,29 +1,29 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.strings.regex_program import RegexProgram from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike def extract( input: Column, prog: RegexProgram, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def extract_all_record( input: Column, prog: RegexProgram, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def extract_single( input: Column, prog: RegexProgram, group: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/extract.pyx b/python/pylibcudf/pylibcudf/strings/extract.pyx index bac20c2cd15..c670b226e84 100644 --- a/python/pylibcudf/pylibcudf/strings/extract.pyx +++ b/python/pylibcudf/pylibcudf/strings/extract.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -13,11 +13,12 @@ from pylibcudf.libcudf.types cimport size_type from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["extract", "extract_all_record", "extract_single"] cpdef Table extract( - Column input, RegexProgram prog, Stream stream=None, DeviceMemoryResource mr=None + Column input, RegexProgram prog, object stream=None, DeviceMemoryResource mr=None ): """ Returns a table of strings columns where each column @@ -41,22 +42,23 @@ cpdef Table extract( Columns of strings extracted from the input column. """ cdef unique_ptr[table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_extract.extract( input.view(), prog.c_obj.get()[0], - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Column extract_all_record( - Column input, RegexProgram prog, Stream stream=None, DeviceMemoryResource mr=None + Column input, RegexProgram prog, object stream=None, DeviceMemoryResource mr=None ): """ Returns a lists column of strings where each string column @@ -80,25 +82,26 @@ cpdef Column extract_all_record( Lists column containing strings extracted from the input column """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_extract.extract_all_record( input.view(), prog.c_obj.get()[0], - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column extract_single( Column input, RegexProgram prog, size_type group, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -124,7 +127,8 @@ cpdef Column extract_single( Column of strings extracted from the input column """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -132,8 +136,8 @@ cpdef Column extract_single( input.view(), prog.c_obj.get()[0], group, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/find.pxd b/python/pylibcudf/pylibcudf/strings/find.pxd index 3ec32563c5a..1a04cf4eca2 100644 --- a/python/pylibcudf/pylibcudf/strings/find.pxd +++ b/python/pylibcudf/pylibcudf/strings/find.pxd @@ -1,11 +1,10 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.libcudf.types cimport size_type from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream ctypedef fused ColumnOrScalar: Column @@ -16,7 +15,7 @@ cpdef Column find( ColumnOrScalar target, size_type start=*, size_type stop=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) @@ -25,27 +24,27 @@ cpdef Column rfind( Scalar target, size_type start=*, size_type stop=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column contains( Column input, ColumnOrScalar target, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column starts_with( Column input, ColumnOrScalar target, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column ends_with( Column input, ColumnOrScalar target, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/strings/find.pyi b/python/pylibcudf/pylibcudf/strings/find.pyi index a566fbdd72a..a8b3ca1da7c 100644 --- a/python/pylibcudf/pylibcudf/strings/find.pyi +++ b/python/pylibcudf/pylibcudf/strings/find.pyi @@ -1,18 +1,18 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike def find( input: Column, target: Column | Scalar, start: int = 0, stop: int = -1, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def rfind( @@ -20,24 +20,24 @@ def rfind( target: Scalar, start: int = 0, stop: int = -1, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def contains( input: Column, target: Column | Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def starts_with( input: Column, target: Column | Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def ends_with( input: Column, target: Column | Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/find.pyx b/python/pylibcudf/pylibcudf/strings/find.pyx index 7323a924342..102a8787651 100644 --- a/python/pylibcudf/pylibcudf/strings/find.pyx +++ b/python/pylibcudf/pylibcudf/strings/find.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr from libcpp.utility cimport move @@ -14,6 +14,7 @@ from rmm.pylibrmm.stream cimport Stream from cython.operator import dereference from pylibcudf.libcudf.scalar.scalar cimport string_scalar +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["contains", "ends_with", "find", "rfind", "starts_with"] @@ -22,7 +23,7 @@ cpdef Column find( ColumnOrScalar target, size_type start=0, size_type stop=-1, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Returns a column of character position values where the target string is @@ -58,7 +59,8 @@ cpdef Column find( New integer column with character position values """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if ColumnOrScalar is Column: with nogil: @@ -66,7 +68,7 @@ cpdef Column find( input.view(), target.view(), start, - stream.view(), + _cs, mr.get_mr() ) elif ColumnOrScalar is Scalar: @@ -76,13 +78,13 @@ cpdef Column find( dereference((target.c_obj.get())), start, stop, - stream.view(), + _cs, mr.get_mr() ) else: raise ValueError(f"Invalid target {target}") - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column rfind( @@ -90,7 +92,7 @@ cpdef Column rfind( Scalar target, size_type start=0, size_type stop=-1, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -119,7 +121,8 @@ cpdef Column rfind( New integer column with character position values """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: result = cpp_find.rfind( @@ -127,16 +130,16 @@ cpdef Column rfind( dereference((target.c_obj.get())), start, stop, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column contains( Column input, ColumnOrScalar target, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -169,14 +172,15 @@ cpdef Column contains( New boolean column with True for each string that contains the target """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if ColumnOrScalar is Column: with nogil: result = cpp_find.contains( input.view(), target.view(), - stream.view(), + _cs, mr.get_mr() ) elif ColumnOrScalar is Scalar: @@ -184,19 +188,19 @@ cpdef Column contains( result = cpp_find.contains( input.view(), dereference((target.c_obj.get())), - stream.view(), + _cs, mr.get_mr() ) else: raise ValueError(f"Invalid target {target}") - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column starts_with( Column input, ColumnOrScalar target, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -229,7 +233,8 @@ cpdef Column starts_with( New boolean column with True for each string that starts with the target """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if ColumnOrScalar is Column: @@ -237,7 +242,7 @@ cpdef Column starts_with( result = cpp_find.starts_with( input.view(), target.view(), - stream.view(), + _cs, mr.get_mr() ) elif ColumnOrScalar is Scalar: @@ -245,18 +250,18 @@ cpdef Column starts_with( result = cpp_find.starts_with( input.view(), dereference((target.c_obj.get())), - stream.view(), + _cs, mr.get_mr() ) else: raise ValueError(f"Invalid target {target}") - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column ends_with( Column input, ColumnOrScalar target, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -288,14 +293,15 @@ cpdef Column ends_with( New boolean column with True for each string that ends with the target """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if ColumnOrScalar is Column: with nogil: result = cpp_find.ends_with( input.view(), target.view(), - stream.view(), + _cs, mr.get_mr() ) elif ColumnOrScalar is Scalar: @@ -303,10 +309,10 @@ cpdef Column ends_with( result = cpp_find.ends_with( input.view(), dereference((target.c_obj.get())), - stream.view(), + _cs, mr.get_mr() ) else: raise ValueError(f"Invalid target {target}") - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/find_multiple.pxd b/python/pylibcudf/pylibcudf/strings/find_multiple.pxd index f6677607c5e..e01cb33fdb8 100644 --- a/python/pylibcudf/pylibcudf/strings/find_multiple.pxd +++ b/python/pylibcudf/pylibcudf/strings/find_multiple.pxd @@ -1,21 +1,20 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.table cimport Table from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column find_multiple( Column input, Column targets, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Table contains_multiple( Column input, Column targets, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/strings/find_multiple.pyi b/python/pylibcudf/pylibcudf/strings/find_multiple.pyi index 48de0eac0e1..76115cd7496 100644 --- a/python/pylibcudf/pylibcudf/strings/find_multiple.pyi +++ b/python/pylibcudf/pylibcudf/strings/find_multiple.pyi @@ -1,21 +1,21 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike def find_multiple( input: Column, targets: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def contains_multiple( input: Column, targets: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/strings/find_multiple.pyx b/python/pylibcudf/pylibcudf/strings/find_multiple.pyx index e18b178f803..ed5f0d78506 100644 --- a/python/pylibcudf/pylibcudf/strings/find_multiple.pyx +++ b/python/pylibcudf/pylibcudf/strings/find_multiple.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -11,13 +11,14 @@ from pylibcudf.table cimport Table from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["find_multiple", "contains_multiple"] cpdef Column find_multiple( Column input, Column targets, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -41,24 +42,25 @@ cpdef Column find_multiple( Lists column with character position values """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_find_multiple.find_multiple( input.view(), targets.view(), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Table contains_multiple( Column input, Column targets, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -82,15 +84,16 @@ cpdef Table contains_multiple( Columns of booleans """ cdef unique_ptr[table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_find_multiple.contains_multiple( input.view(), targets.view(), - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/findall.pxd b/python/pylibcudf/pylibcudf/strings/findall.pxd index 2dc75fa6d34..ec7e01f7539 100644 --- a/python/pylibcudf/pylibcudf/strings/findall.pxd +++ b/python/pylibcudf/pylibcudf/strings/findall.pxd @@ -1,15 +1,14 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.strings.regex_program cimport RegexProgram from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column find_re( - Column input, RegexProgram pattern, Stream stream=*, DeviceMemoryResource mr=* + Column input, RegexProgram pattern, object stream = *, DeviceMemoryResource mr=* ) cpdef Column findall( - Column input, RegexProgram pattern, Stream stream=*, DeviceMemoryResource mr=* + Column input, RegexProgram pattern, object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/findall.pyi b/python/pylibcudf/pylibcudf/strings/findall.pyi index 5677a99d325..f72e786cf1d 100644 --- a/python/pylibcudf/pylibcudf/strings/findall.pyi +++ b/python/pylibcudf/pylibcudf/strings/findall.pyi @@ -1,21 +1,21 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.strings.regex_program import RegexProgram +from pylibcudf.utils import CudaStreamLike def find_re( input: Column, pattern: RegexProgram, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def findall( input: Column, pattern: RegexProgram, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/findall.pyx b/python/pylibcudf/pylibcudf/strings/findall.pyx index 881664faced..5647a791ef1 100644 --- a/python/pylibcudf/pylibcudf/strings/findall.pyx +++ b/python/pylibcudf/pylibcudf/strings/findall.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -10,11 +10,12 @@ from pylibcudf.strings.regex_program cimport RegexProgram from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["findall", "find_re"] cpdef Column findall( - Column input, RegexProgram pattern, Stream stream=None, DeviceMemoryResource mr=None + Column input, RegexProgram pattern, object stream=None, DeviceMemoryResource mr=None ): """ Returns a lists column of strings for each matching occurrence using @@ -37,22 +38,23 @@ cpdef Column findall( New lists column of strings """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_findall.findall( input.view(), pattern.c_obj.get()[0], - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column find_re( - Column input, RegexProgram pattern, Stream stream=None, DeviceMemoryResource mr=None + Column input, RegexProgram pattern, object stream=None, DeviceMemoryResource mr=None ): """ Returns character positions where the pattern first matches @@ -75,15 +77,16 @@ cpdef Column find_re( New column of integers """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_findall.find_re( input.view(), pattern.c_obj.get()[0], - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/padding.pxd b/python/pylibcudf/pylibcudf/strings/padding.pxd index 1dfbbd9950f..61dcaf7cba9 100644 --- a/python/pylibcudf/pylibcudf/strings/padding.pxd +++ b/python/pylibcudf/pylibcudf/strings/padding.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.string cimport string @@ -6,7 +6,6 @@ from pylibcudf.column cimport Column from pylibcudf.libcudf.strings.side_type cimport side_type from pylibcudf.libcudf.types cimport size_type from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column pad( @@ -14,14 +13,14 @@ cpdef Column pad( size_type width, side_type side, str fill_char, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column zfill( - Column input, size_type width, Stream stream=*, DeviceMemoryResource mr=* + Column input, size_type width, object stream = *, DeviceMemoryResource mr=* ) cpdef Column zfill_by_widths( - Column input, Column widths, Stream stream=*, DeviceMemoryResource mr=* + Column input, Column widths, object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/padding.pyi b/python/pylibcudf/pylibcudf/strings/padding.pyi index 26af5429acb..904b0022317 100644 --- a/python/pylibcudf/pylibcudf/strings/padding.pyi +++ b/python/pylibcudf/pylibcudf/strings/padding.pyi @@ -1,29 +1,29 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.strings.side_type import SideType +from pylibcudf.utils import CudaStreamLike def pad( input: Column, width: int, side: SideType, fill_char: str, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def zfill( input: Column, width: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def zfill_by_widths( input: Column, widths: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/padding.pyx b/python/pylibcudf/pylibcudf/strings/padding.pyx index 9409970b075..d8eb4f1da4a 100644 --- a/python/pylibcudf/pylibcudf/strings/padding.pyx +++ b/python/pylibcudf/pylibcudf/strings/padding.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr from libcpp.utility cimport move @@ -10,6 +10,7 @@ from pylibcudf.libcudf.types cimport size_type from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["pad", "zfill", "zfill_by_widths"] @@ -18,7 +19,7 @@ cpdef Column pad( size_type width, side_type side, str fill_char, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -46,7 +47,8 @@ cpdef Column pad( """ cdef unique_ptr[column] c_result cdef string c_fill_char = fill_char.encode("utf-8") - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -55,14 +57,14 @@ cpdef Column pad( width, side, c_fill_char, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column zfill( - Column input, size_type width, Stream stream=None, DeviceMemoryResource mr=None + Column input, size_type width, object stream=None, DeviceMemoryResource mr=None ): """ Add '0' as padding to the left of each string. @@ -84,21 +86,22 @@ cpdef Column zfill( New column of strings. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_padding.zfill( input.view(), width, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column zfill_by_widths( - Column input, Column widths, Stream stream=None, DeviceMemoryResource mr=None + Column input, Column widths, object stream=None, DeviceMemoryResource mr=None ): """ Add '0' as padding to the left of each string. @@ -120,15 +123,16 @@ cpdef Column zfill_by_widths( New column of strings. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_padding.zfill_by_widths( input.view(), widths.view(), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/repeat.pxd b/python/pylibcudf/pylibcudf/strings/repeat.pxd index f1abe23ce59..60725aa688e 100644 --- a/python/pylibcudf/pylibcudf/strings/repeat.pxd +++ b/python/pylibcudf/pylibcudf/strings/repeat.pxd @@ -1,10 +1,9 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.libcudf.types cimport size_type from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream ctypedef fused ColumnorSizeType: Column @@ -13,6 +12,6 @@ ctypedef fused ColumnorSizeType: cpdef Column repeat_strings( Column input, ColumnorSizeType repeat_times, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/strings/repeat.pyi b/python/pylibcudf/pylibcudf/strings/repeat.pyi index 5b47213e956..fedb7dee76c 100644 --- a/python/pylibcudf/pylibcudf/strings/repeat.pyi +++ b/python/pylibcudf/pylibcudf/strings/repeat.pyi @@ -1,14 +1,14 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike def repeat_strings( input: Column, repeat_times: Column | int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/repeat.pyx b/python/pylibcudf/pylibcudf/strings/repeat.pyx index 84a305bf866..7a9c5285d02 100644 --- a/python/pylibcudf/pylibcudf/strings/repeat.pyx +++ b/python/pylibcudf/pylibcudf/strings/repeat.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr from libcpp.utility cimport move @@ -11,13 +11,14 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream from ..utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["repeat_strings"] cpdef Column repeat_strings( Column input, ColumnorSizeType repeat_times, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -44,7 +45,8 @@ cpdef Column repeat_strings( New column containing the repeated strings. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if ColumnorSizeType is Column: @@ -52,7 +54,7 @@ cpdef Column repeat_strings( c_result = cpp_repeat.repeat_strings( input.view(), repeat_times.view(), - stream.view(), + _cs, mr.get_mr() ) elif ColumnorSizeType is size_type: @@ -60,10 +62,10 @@ cpdef Column repeat_strings( c_result = cpp_repeat.repeat_strings( input.view(), repeat_times, - stream.view(), + _cs, mr.get_mr() ) else: raise ValueError("repeat_times must be size_type or integer") - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/replace.pxd b/python/pylibcudf/pylibcudf/strings/replace.pxd index a486869aada..aea2296b5f9 100644 --- a/python/pylibcudf/pylibcudf/strings/replace.pxd +++ b/python/pylibcudf/pylibcudf/strings/replace.pxd @@ -1,11 +1,10 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.libcudf.types cimport size_type from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column replace( @@ -13,7 +12,7 @@ cpdef Column replace( Scalar target, Scalar repl, size_type maxrepl=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column replace_multiple( @@ -21,7 +20,7 @@ cpdef Column replace_multiple( Column target, Column repl, size_type maxrepl=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) cpdef Column replace_slice( @@ -29,6 +28,6 @@ cpdef Column replace_slice( Scalar repl=*, size_type start=*, size_type stop=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/strings/replace.pyi b/python/pylibcudf/pylibcudf/strings/replace.pyi index 3e62a76d2bf..0e76eb402f7 100644 --- a/python/pylibcudf/pylibcudf/strings/replace.pyi +++ b/python/pylibcudf/pylibcudf/strings/replace.pyi @@ -1,18 +1,18 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike def replace( input: Column, target: Scalar, repl: Scalar, maxrepl: int = -1, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def replace_multiple( @@ -20,7 +20,7 @@ def replace_multiple( target: Column, repl: Column, maxrepl: int = -1, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def replace_slice( @@ -28,6 +28,6 @@ def replace_slice( repl: Scalar | None = None, start: int = 0, stop: int = -1, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/replace.pyx b/python/pylibcudf/pylibcudf/strings/replace.pyx index e1d88fed464..ccd6c924441 100644 --- a/python/pylibcudf/pylibcudf/strings/replace.pyx +++ b/python/pylibcudf/pylibcudf/strings/replace.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -19,6 +19,7 @@ from pylibcudf.scalar cimport Scalar from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["replace", "replace_multiple", "replace_slice"] @@ -27,7 +28,7 @@ cpdef Column replace( Scalar target, Scalar repl, size_type maxrepl=-1, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Replaces target string within each string with the specified replacement string. @@ -60,7 +61,8 @@ cpdef Column replace( target_str = (target.c_obj.get()) repl_str = (repl.c_obj.get()) - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -69,11 +71,11 @@ cpdef Column replace( target_str[0], repl_str[0], maxrepl, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column replace_multiple( @@ -81,7 +83,7 @@ cpdef Column replace_multiple( Column target, Column repl, size_type maxrepl=-1, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Replaces target string within each string with the specified replacement string. @@ -109,7 +111,8 @@ cpdef Column replace_multiple( New string column with target replaced. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -117,11 +120,11 @@ cpdef Column replace_multiple( input.view(), target.view(), repl.view(), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column replace_slice( @@ -131,7 +134,7 @@ cpdef Column replace_slice( Scalar repl=None, size_type start=0, size_type stop=-1, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Replaces each string in the column with the provided repl string @@ -162,12 +165,13 @@ cpdef Column replace_slice( New string column """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if repl is None: repl = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) cdef const string_scalar* scalar_str = (repl.c_obj.get()) @@ -178,8 +182,8 @@ cpdef Column replace_slice( scalar_str[0], start, stop, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/replace_re.pxd b/python/pylibcudf/pylibcudf/strings/replace_re.pxd index fc833a61045..0d360f8de6f 100644 --- a/python/pylibcudf/pylibcudf/strings/replace_re.pxd +++ b/python/pylibcudf/pylibcudf/strings/replace_re.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column @@ -7,7 +7,6 @@ from pylibcudf.scalar cimport Scalar from pylibcudf.strings.regex_flags cimport regex_flags from pylibcudf.strings.regex_program cimport RegexProgram from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream ctypedef fused Replacement: Column @@ -24,7 +23,7 @@ cpdef Column replace_re( Replacement replacement=*, size_type max_replace_count=*, regex_flags flags=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) @@ -32,6 +31,6 @@ cpdef Column replace_with_backrefs( Column input, RegexProgram prog, str replacement, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/replace_re.pyi b/python/pylibcudf/pylibcudf/strings/replace_re.pyi index 29f8ddfe925..64970928323 100644 --- a/python/pylibcudf/pylibcudf/strings/replace_re.pyi +++ b/python/pylibcudf/pylibcudf/strings/replace_re.pyi @@ -1,15 +1,15 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from typing import overload from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.strings.regex_flags import RegexFlags from pylibcudf.strings.regex_program import RegexProgram +from pylibcudf.utils import CudaStreamLike @overload def replace_re( @@ -17,7 +17,7 @@ def replace_re( pattern: RegexProgram, replacement: Scalar, max_replace_count: int = -1, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... @overload @@ -27,13 +27,13 @@ def replace_re( replacement: Column, max_replace_count: int = -1, flags: RegexFlags = RegexFlags.DEFAULT, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def replace_with_backrefs( input: Column, prog: RegexProgram, replacement: str, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/replace_re.pyx b/python/pylibcudf/pylibcudf/strings/replace_re.pyx index 1819dd0ba2b..60e9c4c1666 100644 --- a/python/pylibcudf/pylibcudf/strings/replace_re.pyx +++ b/python/pylibcudf/pylibcudf/strings/replace_re.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator cimport dereference from libcpp.memory cimport unique_ptr @@ -19,6 +19,7 @@ from pylibcudf.strings.regex_program cimport RegexProgram from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["replace_re", "replace_with_backrefs"] @@ -28,7 +29,7 @@ cpdef Column replace_re( Replacement replacement=None, size_type max_replace_count=-1, regex_flags flags=regex_flags.DEFAULT, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -64,13 +65,14 @@ cpdef Column replace_re( """ cdef unique_ptr[column] c_result cdef vector[string] c_patterns - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if Patterns is RegexProgram and Replacement is Scalar: if replacement is None: replacement = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) with nogil: c_result = move( @@ -79,12 +81,12 @@ cpdef Column replace_re( patterns.c_obj.get()[0], dereference((replacement.get())), max_replace_count, - stream.view(), + _cs, mr.get_mr() ) ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) elif Patterns is list and Replacement is Column: c_patterns.reserve(len(patterns)) for pattern in patterns: @@ -97,12 +99,12 @@ cpdef Column replace_re( c_patterns, replacement.view(), flags, - stream.view(), + _cs, mr.get_mr() ) ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) else: raise TypeError("Must pass either a RegexProgram and a Scalar or a list") @@ -111,7 +113,7 @@ cpdef Column replace_with_backrefs( Column input, RegexProgram prog, str replacement, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -137,7 +139,8 @@ cpdef Column replace_with_backrefs( New strings column. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) cdef string c_replacement = replacement.encode() @@ -146,8 +149,8 @@ cpdef Column replace_with_backrefs( input.view(), prog.c_obj.get()[0], c_replacement, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/reverse.pyi b/python/pylibcudf/pylibcudf/strings/reverse.pyi index 182f4768825..48c602e2d28 100644 --- a/python/pylibcudf/pylibcudf/strings/reverse.pyi +++ b/python/pylibcudf/pylibcudf/strings/reverse.pyi @@ -1,13 +1,13 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike def reverse( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/reverse.pyx b/python/pylibcudf/pylibcudf/strings/reverse.pyx index 49792b5661b..f1d06248523 100644 --- a/python/pylibcudf/pylibcudf/strings/reverse.pyx +++ b/python/pylibcudf/pylibcudf/strings/reverse.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -9,10 +9,11 @@ from pylibcudf.libcudf.strings cimport reverse as cpp_reverse from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["reverse"] -cpdef Column reverse(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column reverse(Column input, object stream=None, DeviceMemoryResource mr=None): """Reverses the characters within each string. Any null string entries return corresponding null output column entries. @@ -32,9 +33,10 @@ cpdef Column reverse(Column input, Stream stream=None, DeviceMemoryResource mr=N New strings column """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_reverse.reverse(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_reverse.reverse(input.view(), _cs, mr.get_mr()) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/slice.pxd b/python/pylibcudf/pylibcudf/strings/slice.pxd index 6bb5a8d3611..9612ead3108 100644 --- a/python/pylibcudf/pylibcudf/strings/slice.pxd +++ b/python/pylibcudf/pylibcudf/strings/slice.pxd @@ -1,10 +1,9 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream ctypedef fused ColumnOrScalar: Column @@ -15,6 +14,6 @@ cpdef Column slice_strings( ColumnOrScalar start=*, ColumnOrScalar stop=*, Scalar step=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/slice.pyi b/python/pylibcudf/pylibcudf/strings/slice.pyi index 73ee8c31b5b..ac2e4d12f1f 100644 --- a/python/pylibcudf/pylibcudf/strings/slice.pyi +++ b/python/pylibcudf/pylibcudf/strings/slice.pyi @@ -1,17 +1,17 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike def slice_strings( input: Column, start: Column | Scalar | None = None, stop: Column | Scalar | None = None, step: Scalar | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/slice.pyx b/python/pylibcudf/pylibcudf/strings/slice.pyx index 2b5bbf2f621..b3ac2cd8bfe 100644 --- a/python/pylibcudf/pylibcudf/strings/slice.pyx +++ b/python/pylibcudf/pylibcudf/strings/slice.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -18,6 +18,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream from ..utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["slice_strings"] @@ -26,7 +27,7 @@ cpdef Column slice_strings( ColumnOrScalar start=None, ColumnOrScalar stop=None, Scalar step=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Perform a slice operation on a strings column. @@ -60,7 +61,8 @@ cpdef Column slice_strings( cdef numeric_scalar[size_type]* cpp_start cdef numeric_scalar[size_type]* cpp_stop cdef numeric_scalar[size_type]* cpp_step - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if input is None: @@ -80,22 +82,22 @@ cpdef Column slice_strings( input.view(), start.view(), stop.view(), - stream.view(), + _cs, mr.get_mr() ) elif ColumnOrScalar is Scalar: if start is None: start = Scalar.from_libcudf( - cpp_make_fixed_width_scalar(0, stream.view(), mr.get_mr()) + cpp_make_fixed_width_scalar(0, _stream.view().value(), mr.get_mr()) ) if stop is None: stop = Scalar.from_libcudf( - cpp_make_fixed_width_scalar(0, stream.view(), mr.get_mr()) + cpp_make_fixed_width_scalar(0, _stream.view().value(), mr.get_mr()) ) if step is None: step = Scalar.from_libcudf( - cpp_make_fixed_width_scalar(1, stream.view(), mr.get_mr()) + cpp_make_fixed_width_scalar(1, _stream.view().value(), mr.get_mr()) ) cpp_start = start.c_obj.get() @@ -108,10 +110,10 @@ cpdef Column slice_strings( dereference(cpp_start), dereference(cpp_stop), dereference(cpp_step), - stream.view(), + _cs, mr.get_mr() ) else: raise ValueError("start, stop, and step must be either Column or Scalar") - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/split/partition.pxd b/python/pylibcudf/pylibcudf/strings/split/partition.pxd index d8001682b32..e3da533c90c 100644 --- a/python/pylibcudf/pylibcudf/strings/split/partition.pxd +++ b/python/pylibcudf/pylibcudf/strings/split/partition.pxd @@ -1,17 +1,16 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.scalar cimport Scalar from pylibcudf.table cimport Table from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Table partition( - Column input, Scalar delimiter=*, Stream stream=*, DeviceMemoryResource mr=* + Column input, Scalar delimiter=*, object stream = *, DeviceMemoryResource mr=* ) cpdef Table rpartition( - Column input, Scalar delimiter=*, Stream stream=*, DeviceMemoryResource mr=* + Column input, Scalar delimiter=*, object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/split/partition.pyi b/python/pylibcudf/pylibcudf/strings/split/partition.pyi index d919b68153c..cef2d16aea6 100644 --- a/python/pylibcudf/pylibcudf/strings/split/partition.pyi +++ b/python/pylibcudf/pylibcudf/strings/split/partition.pyi @@ -1,22 +1,22 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike def partition( input: Column, delimiter: Scalar | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def rpartition( input: Column, delimiter: Scalar | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/strings/split/partition.pyx b/python/pylibcudf/pylibcudf/strings/split/partition.pyx index 728d7b9975d..ce813c10bba 100644 --- a/python/pylibcudf/pylibcudf/strings/split/partition.pyx +++ b/python/pylibcudf/pylibcudf/strings/split/partition.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr from libcpp.utility cimport move @@ -16,13 +16,14 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream from cython.operator import dereference +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["partition", "rpartition"] cpdef Table partition( Column input, Scalar delimiter=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -46,12 +47,13 @@ cpdef Table partition( """ cdef unique_ptr[table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if delimiter is None: delimiter = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) cdef const string_scalar* c_delimiter = ( @@ -62,16 +64,16 @@ cpdef Table partition( c_result = cpp_partition.partition( input.view(), dereference(c_delimiter), - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table rpartition( Column input, Scalar delimiter=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -95,12 +97,13 @@ cpdef Table rpartition( """ cdef unique_ptr[table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if delimiter is None: delimiter = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) cdef const string_scalar* c_delimiter = ( @@ -111,8 +114,8 @@ cpdef Table rpartition( c_result = cpp_partition.rpartition( input.view(), dereference(c_delimiter), - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/split/split.pxd b/python/pylibcudf/pylibcudf/strings/split/split.pxd index 06b77154b18..2372a177944 100644 --- a/python/pylibcudf/pylibcudf/strings/split/split.pxd +++ b/python/pylibcudf/pylibcudf/strings/split/split.pxd @@ -7,50 +7,49 @@ from pylibcudf.scalar cimport Scalar from pylibcudf.strings.regex_program cimport RegexProgram from pylibcudf.table cimport Table from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Table split( - Column strings_column, Scalar delimiter, size_type maxsplit, Stream stream=*, + Column strings_column, Scalar delimiter, size_type maxsplit, object stream = *, DeviceMemoryResource mr=*, ) cpdef Table rsplit( - Column strings_column, Scalar delimiter, size_type maxsplit, Stream stream=*, + Column strings_column, Scalar delimiter, size_type maxsplit, object stream = *, DeviceMemoryResource mr=*, ) cpdef Column split_record( - Column strings, Scalar delimiter, size_type maxsplit, Stream stream=*, + Column strings, Scalar delimiter, size_type maxsplit, object stream = *, DeviceMemoryResource mr=*, ) cpdef Column rsplit_record( - Column strings, Scalar delimiter, size_type maxsplit, Stream stream=*, + Column strings, Scalar delimiter, size_type maxsplit, object stream = *, DeviceMemoryResource mr=*, ) cpdef Table split_re( - Column input, RegexProgram prog, size_type maxsplit, Stream stream=*, + Column input, RegexProgram prog, size_type maxsplit, object stream = *, DeviceMemoryResource mr=*, ) cpdef Table rsplit_re( - Column input, RegexProgram prog, size_type maxsplit, Stream stream=*, + Column input, RegexProgram prog, size_type maxsplit, object stream = *, DeviceMemoryResource mr=*, ) cpdef Column split_record_re( - Column input, RegexProgram prog, size_type maxsplit, Stream stream=*, + Column input, RegexProgram prog, size_type maxsplit, object stream = *, DeviceMemoryResource mr=*, ) cpdef Column rsplit_record_re( - Column input, RegexProgram prog, size_type maxsplit, Stream stream=*, + Column input, RegexProgram prog, size_type maxsplit, object stream = *, DeviceMemoryResource mr=*, ) cpdef Column split_part( - Column input, Scalar delimiter, size_type index, Stream stream=*, + Column input, Scalar delimiter, size_type index, object stream = *, DeviceMemoryResource mr=*, ) diff --git a/python/pylibcudf/pylibcudf/strings/split/split.pyi b/python/pylibcudf/pylibcudf/strings/split/split.pyi index ae64e300b63..7a775bd960c 100644 --- a/python/pylibcudf/pylibcudf/strings/split/split.pyi +++ b/python/pylibcudf/pylibcudf/strings/split/split.pyi @@ -2,73 +2,73 @@ # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.strings.regex_program import RegexProgram from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike def split( strings_column: Column, delimiter: Scalar, maxsplit: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def rsplit( strings_column: Column, delimiter: Scalar, maxsplit: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def split_record( strings: Column, delimiter: Scalar, maxsplit: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def rsplit_record( strings: Column, delimiter: Scalar, maxsplit: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def split_re( input: Column, prog: RegexProgram, maxsplit: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def rsplit_re( input: Column, prog: RegexProgram, maxsplit: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def split_record_re( input: Column, prog: RegexProgram, maxsplit: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def rsplit_record_re( input: Column, prog: RegexProgram, maxsplit: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def split_part( input: Column, delimiter: Scalar, index: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/split/split.pyx b/python/pylibcudf/pylibcudf/strings/split/split.pyx index 0635df87e13..52803b08eb0 100644 --- a/python/pylibcudf/pylibcudf/strings/split/split.pyx +++ b/python/pylibcudf/pylibcudf/strings/split/split.pyx @@ -16,6 +16,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream from cython.operator import dereference +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "rsplit", @@ -32,7 +33,7 @@ cpdef Table split( Column strings_column, Scalar delimiter, size_type maxsplit, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -65,7 +66,8 @@ cpdef Table split( cdef const string_scalar* c_delimiter = ( delimiter.c_obj.get() ) - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -73,18 +75,18 @@ cpdef Table split( strings_column.view(), dereference(c_delimiter), maxsplit, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table rsplit( Column strings_column, Scalar delimiter, size_type maxsplit, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -117,7 +119,8 @@ cpdef Table rsplit( cdef const string_scalar* c_delimiter = ( delimiter.c_obj.get() ) - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -125,17 +128,17 @@ cpdef Table rsplit( strings_column.view(), dereference(c_delimiter), maxsplit, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Column split_record( Column strings, Scalar delimiter, size_type maxsplit, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -164,7 +167,8 @@ cpdef Column split_record( cdef const string_scalar* c_delimiter = ( delimiter.c_obj.get() ) - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -172,18 +176,18 @@ cpdef Column split_record( strings.view(), dereference(c_delimiter), maxsplit, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column rsplit_record( Column strings, Scalar delimiter, size_type maxsplit, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -213,7 +217,8 @@ cpdef Column rsplit_record( cdef const string_scalar* c_delimiter = ( delimiter.c_obj.get() ) - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -221,18 +226,18 @@ cpdef Column rsplit_record( strings.view(), dereference(c_delimiter), maxsplit, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Table split_re( Column input, RegexProgram prog, size_type maxsplit, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -259,7 +264,8 @@ cpdef Table split_re( A table of columns of strings. """ cdef unique_ptr[table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -267,17 +273,17 @@ cpdef Table split_re( input.view(), prog.c_obj.get()[0], maxsplit, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Table rsplit_re( Column input, RegexProgram prog, size_type maxsplit, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -305,7 +311,8 @@ cpdef Table rsplit_re( A table of columns of strings. """ cdef unique_ptr[table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -313,17 +320,17 @@ cpdef Table rsplit_re( input.view(), prog.c_obj.get()[0], maxsplit, - stream.view(), + _cs, mr.get_mr() ) - return Table.from_libcudf(move(c_result), stream, mr) + return Table.from_libcudf(move(c_result), _stream, mr) cpdef Column split_record_re( Column input, RegexProgram prog, size_type maxsplit, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -350,7 +357,8 @@ cpdef Column split_record_re( Lists column of strings. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -358,14 +366,14 @@ cpdef Column split_record_re( input.view(), prog.c_obj.get()[0], maxsplit, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column rsplit_record_re( - Column input, RegexProgram prog, size_type maxsplit, Stream stream=None, + Column input, RegexProgram prog, size_type maxsplit, object stream=None, DeviceMemoryResource mr=None, ): """ @@ -392,7 +400,8 @@ cpdef Column rsplit_record_re( Lists column of strings. """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -400,22 +409,23 @@ cpdef Column rsplit_record_re( input.view(), prog.c_obj.get()[0], maxsplit, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column split_part( - Column input, Scalar delimiter, size_type index, Stream stream=None, + Column input, Scalar delimiter, size_type index, object stream=None, DeviceMemoryResource mr=None, ): cdef unique_ptr[column] c_result cdef const string_scalar* c_delimiter = ( delimiter.c_obj.get() ) - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -423,8 +433,8 @@ cpdef Column split_part( input.view(), dereference(c_delimiter), index, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/strip.pxd b/python/pylibcudf/pylibcudf/strings/strip.pxd index d3f41ce9a5c..a37ac40c523 100644 --- a/python/pylibcudf/pylibcudf/strings/strip.pxd +++ b/python/pylibcudf/pylibcudf/strings/strip.pxd @@ -1,17 +1,16 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.scalar cimport Scalar from pylibcudf.strings.side_type cimport side_type from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column strip( Column input, side_type side=*, Scalar to_strip=*, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/strip.pyi b/python/pylibcudf/pylibcudf/strings/strip.pyi index ecb80b632d7..786079769c7 100644 --- a/python/pylibcudf/pylibcudf/strings/strip.pyi +++ b/python/pylibcudf/pylibcudf/strings/strip.pyi @@ -1,17 +1,17 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar from pylibcudf.strings.side_type import SideType +from pylibcudf.utils import CudaStreamLike def strip( input: Column, side: SideType = SideType.BOTH, to_strip: Scalar | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/strip.pyx b/python/pylibcudf/pylibcudf/strings/strip.pyx index 3b477fa83ad..607428b6f69 100644 --- a/python/pylibcudf/pylibcudf/strings/strip.pyx +++ b/python/pylibcudf/pylibcudf/strings/strip.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from cython.operator cimport dereference @@ -16,6 +16,7 @@ from pylibcudf.strings.side_type cimport side_type from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["strip"] @@ -23,7 +24,7 @@ cpdef Column strip( Column input, side_type side=side_type.BOTH, Scalar to_strip=None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Removes the specified characters from the beginning @@ -47,12 +48,13 @@ cpdef Column strip( pylibcudf.Column New strings column. """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if to_strip is None: to_strip = Scalar.from_libcudf( - cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) + cpp_make_string_scalar("".encode(), _stream.view().value(), mr.get_mr()) ) cdef unique_ptr[column] c_result @@ -64,8 +66,8 @@ cpdef Column strip( input.view(), side, dereference(cpp_to_strip), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/strings/translate.pxd b/python/pylibcudf/pylibcudf/strings/translate.pxd index 2d74e2f4a2c..d6a80ddfd43 100644 --- a/python/pylibcudf/pylibcudf/strings/translate.pxd +++ b/python/pylibcudf/pylibcudf/strings/translate.pxd @@ -1,14 +1,13 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.libcudf.strings.translate cimport filter_type from pylibcudf.scalar cimport Scalar from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column translate( - Column input, dict chars_table, Stream stream=*, DeviceMemoryResource mr=* + Column input, dict chars_table, object stream = *, DeviceMemoryResource mr=* ) cpdef Column filter_characters( @@ -16,6 +15,6 @@ cpdef Column filter_characters( dict characters_to_filter, filter_type keep_characters, Scalar replacement, - Stream stream=*, + object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/translate.pyi b/python/pylibcudf/pylibcudf/strings/translate.pyi index a01b786fd6f..9e7624e0b17 100644 --- a/python/pylibcudf/pylibcudf/strings/translate.pyi +++ b/python/pylibcudf/pylibcudf/strings/translate.pyi @@ -1,13 +1,13 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from collections.abc import Mapping from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.scalar import Scalar +from pylibcudf.utils import CudaStreamLike class FilterType(IntEnum): KEEP = ... @@ -16,7 +16,7 @@ class FilterType(IntEnum): def translate( input: Column, chars_table: Mapping[int | str, int | str], - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def filter_characters( @@ -24,6 +24,6 @@ def filter_characters( characters_to_filter: Mapping[int | str, int | str], keep_characters: FilterType, replacement: Scalar, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/translate.pyx b/python/pylibcudf/pylibcudf/strings/translate.pyx index 06c772330df..2a60ff881d4 100644 --- a/python/pylibcudf/pylibcudf/strings/translate.pyx +++ b/python/pylibcudf/pylibcudf/strings/translate.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr from libcpp.pair cimport pair @@ -15,6 +15,7 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream from cython.operator import dereference +from cuda.bindings.cyruntime cimport cudaStream_t from pylibcudf.libcudf.strings.translate import \ filter_type as FilterType # no-cython-lint @@ -43,7 +44,7 @@ cdef vector[pair[char_utf8, char_utf8]] _table_to_c_table(dict table): cpdef Column translate( - Column input, dict chars_table, Stream stream=None, DeviceMemoryResource mr=None + Column input, dict chars_table, object stream=None, DeviceMemoryResource mr=None ): """ Translates individual characters within each string. @@ -69,17 +70,18 @@ cpdef Column translate( cdef vector[pair[char_utf8, char_utf8]] c_chars_table = _table_to_c_table( chars_table ) - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_translate.translate( input.view(), c_chars_table, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column filter_characters( @@ -87,7 +89,7 @@ cpdef Column filter_characters( dict characters_to_filter, filter_type keep_characters, Scalar replacement, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """ @@ -124,7 +126,8 @@ cpdef Column filter_characters( cdef const string_scalar* c_replacement = ( replacement.c_obj.get() ) - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -133,9 +136,9 @@ cpdef Column filter_characters( c_characters_to_filter, keep_characters, dereference(c_replacement), - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) FilterType.__str__ = FilterType.__repr__ diff --git a/python/pylibcudf/pylibcudf/strings/wrap.pxd b/python/pylibcudf/pylibcudf/strings/wrap.pxd index 62faaff36f0..ea74927498d 100644 --- a/python/pylibcudf/pylibcudf/strings/wrap.pxd +++ b/python/pylibcudf/pylibcudf/strings/wrap.pxd @@ -1,12 +1,11 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from pylibcudf.column cimport Column from pylibcudf.libcudf.types cimport size_type from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -from rmm.pylibrmm.stream cimport Stream cpdef Column wrap( - Column input, size_type width, Stream stream=*, DeviceMemoryResource mr=* + Column input, size_type width, object stream = *, DeviceMemoryResource mr=* ) diff --git a/python/pylibcudf/pylibcudf/strings/wrap.pyi b/python/pylibcudf/pylibcudf/strings/wrap.pyi index 00c939cc420..aa88b64a391 100644 --- a/python/pylibcudf/pylibcudf/strings/wrap.pyi +++ b/python/pylibcudf/pylibcudf/strings/wrap.pyi @@ -1,14 +1,14 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column +from pylibcudf.utils import CudaStreamLike def wrap( input: Column, width: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... diff --git a/python/pylibcudf/pylibcudf/strings/wrap.pyx b/python/pylibcudf/pylibcudf/strings/wrap.pyx index 504c469debc..28bc310b5a4 100644 --- a/python/pylibcudf/pylibcudf/strings/wrap.pyx +++ b/python/pylibcudf/pylibcudf/strings/wrap.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr @@ -10,11 +10,12 @@ from pylibcudf.libcudf.types cimport size_type from pylibcudf.utils cimport _get_stream, _get_memory_resource from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from rmm.pylibrmm.stream cimport Stream +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["wrap"] cpdef Column wrap( - Column input, size_type width, Stream stream=None, DeviceMemoryResource mr=None + Column input, size_type width, object stream=None, DeviceMemoryResource mr=None ): """ Wraps strings onto multiple lines shorter than `width` by @@ -41,15 +42,16 @@ cpdef Column wrap( Column of wrapped strings """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_wrap.wrap( input.view(), width, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) diff --git a/python/pylibcudf/pylibcudf/table.pxd b/python/pylibcudf/pylibcudf/table.pxd index 4a4a963e0de..76c38dacf3f 100644 --- a/python/pylibcudf/pylibcudf/table.pxd +++ b/python/pylibcudf/pylibcudf/table.pxd @@ -4,7 +4,6 @@ from libcpp.memory cimport unique_ptr from pylibcudf.libcudf.table.table cimport table from pylibcudf.libcudf.table.table_view cimport table_view -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource cdef class Table: @@ -20,7 +19,7 @@ cdef class Table: @staticmethod cdef Table from_libcudf( unique_ptr[table] libcudf_tbl, - Stream stream, + object stream, DeviceMemoryResource mr ) @@ -31,8 +30,8 @@ cdef class Table: cdef Table from_table_view_of_arbitrary( const table_view& tv, object owner, - Stream stream, + object stream, ) cpdef list columns(self) - cpdef Table copy(self, Stream stream=*, DeviceMemoryResource mr=*) + cpdef Table copy(self, object stream = *, DeviceMemoryResource mr=*) diff --git a/python/pylibcudf/pylibcudf/table.pyi b/python/pylibcudf/pylibcudf/table.pyi index 0f8de52b132..263bf813c75 100644 --- a/python/pylibcudf/pylibcudf/table.pyi +++ b/python/pylibcudf/pylibcudf/table.pyi @@ -4,11 +4,11 @@ from typing import Any from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf._interop_helpers import ArrowLike, ColumnMetadata from pylibcudf.column import Column from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike class Table: def __init__(self, column: list[Column]): ... @@ -18,22 +18,22 @@ class Table: def columns(self) -> list[Column]: ... def copy( self, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... def to_arrow( self, metadata: list[ColumnMetadata | str] | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, ) -> ArrowLike: ... # Private methods below are included because polars is currently using them, # but we want to remove stubs for these private methods eventually def _to_schema(self, metadata: Any = None) -> Any: ... - def _to_host_array(self, stream: Stream) -> Any: ... + def _to_host_array(self, stream: CudaStreamLike) -> Any: ... @staticmethod def from_arrow( arrow_like: ArrowLike, dtype: DataType | None = None, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/table.pyx b/python/pylibcudf/pylibcudf/table.pyx index 654cf9bb60b..6b62a5428f9 100644 --- a/python/pylibcudf/pylibcudf/table.pyx +++ b/python/pylibcudf/pylibcudf/table.pyx @@ -39,6 +39,7 @@ from pylibcudf._interop_helpers cimport ( _metadata_to_libcudf, ) from ._interop_helpers import ArrowLike, ColumnMetadata, _ObjectWithArrowMetadata +from cuda.bindings.cyruntime cimport cudaStream_t try: import pyarrow as pa @@ -105,7 +106,7 @@ cdef class Table: def from_arrow( obj: ArrowLike, dtype: DataType | None = None, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None ) -> Table: """ @@ -154,7 +155,8 @@ cdef class Table: cdef _ArrowTableHolder result cdef unique_ptr[arrow_table] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) if hasattr(obj, "__arrow_c_device_array__"): @@ -170,7 +172,7 @@ cdef class Table: c_result = make_unique[arrow_table]( move(dereference(c_schema)), move(dereference(c_array)), - stream.view(), + _cs, result.mr.get_mr(), ) result.tbl.swap(c_result) @@ -193,7 +195,7 @@ cdef class Table: with nogil: c_result = make_unique[arrow_table]( move(dereference(c_stream)), - stream.view(), + _cs, result.mr.get_mr(), ) result.tbl.swap(c_result) @@ -233,7 +235,7 @@ cdef class Table: @staticmethod cdef Table from_libcudf( unique_ptr[table] libcudf_tbl, - Stream stream, + object stream, DeviceMemoryResource mr ): """Create a Table from a libcudf table. @@ -275,7 +277,7 @@ cdef class Table: cdef Table from_table_view_of_arbitrary( const table_view& tv, object owner, - Stream stream, + object stream, ): """Create a Table from a libcudf table_view into an arbitrary owner. @@ -292,8 +294,9 @@ cdef class Table: # For efficiency, prohibit calling this overload with a Table owner. assert not isinstance(owner, Table) cdef int i + cdef Stream _stream = stream return Table([ - Column.from_column_view_of_arbitrary(tv.column(i), owner, stream) + Column.from_column_view_of_arbitrary(tv.column(i), owner, _stream) for i in range(tv.num_columns()) ]) @@ -315,7 +318,7 @@ cdef class Table: """The shape of this table""" return (self.num_rows(), self.num_columns()) - cpdef Table copy(self, Stream stream=None, DeviceMemoryResource mr=None): + cpdef Table copy(self, object stream=None, DeviceMemoryResource mr=None): """Create a deep copy of the table. Parameters @@ -330,9 +333,9 @@ cdef class Table: Table A new Table with deep copies of all columns. """ - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) mr = _get_memory_resource(mr) - return Table([col.copy(stream, mr) for col in self._columns]) + return Table([col.copy(_stream, mr) for col in self._columns]) def _to_schema(self, metadata=None): """Create an Arrow schema from this table.""" @@ -356,11 +359,13 @@ cdef class Table: return PyCapsule_New(raw_schema_ptr, "arrow_schema", _release_schema) - def _to_host_array(self, Stream stream): + def _to_host_array(self, object stream): cdef ArrowArray* raw_host_array_ptr + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() with nogil: - raw_host_array_ptr = to_arrow_host_raw(self.view(), stream.view()) + raw_host_array_ptr = to_arrow_host_raw(self.view(), _cs) return PyCapsule_New(raw_host_array_ptr, "arrow_array", _release_array) diff --git a/python/pylibcudf/pylibcudf/transform.pxd b/python/pylibcudf/pylibcudf/transform.pxd index a92ffb3f27e..8333abd6df0 100644 --- a/python/pylibcudf/pylibcudf/transform.pxd +++ b/python/pylibcudf/pylibcudf/transform.pxd @@ -3,7 +3,6 @@ from libcpp cimport bool from pylibcudf.libcudf.types cimport bitmask_type, data_type from pylibcudf.libcudf.types cimport null_aware, output_nullability -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column @@ -14,30 +13,30 @@ from .types cimport DataType cpdef tuple[gpumemoryview, int] nans_to_nulls( - Column input, Stream stream = *, DeviceMemoryResource mr = * + Column input, object stream = *, DeviceMemoryResource mr = * ) cpdef Column column_nans_to_nulls( - Column input, Stream stream = *, DeviceMemoryResource mr = * + Column input, object stream = *, DeviceMemoryResource mr = * ) cpdef Column compute_column( - Table input, Expression expr, Stream stream = *, DeviceMemoryResource mr = * + Table input, Expression expr, object stream = *, DeviceMemoryResource mr = * ) cpdef Column compute_column_jit( - Table input, Expression expr, Stream stream = *, DeviceMemoryResource mr = * + Table input, Expression expr, object stream = *, DeviceMemoryResource mr = * ) cpdef tuple[gpumemoryview, int] bools_to_mask( - Column input, Stream stream = *, DeviceMemoryResource mr = * + Column input, object stream = *, DeviceMemoryResource mr = * ) cpdef Column mask_to_bools( Py_ssize_t bitmask, int begin_bit, int end_bit, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) @@ -48,17 +47,17 @@ cpdef Column transform( bool is_ptx, null_aware is_null_aware, output_nullability null_policy, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) cpdef tuple[Table, Column] encode( - Table input, Stream stream = *, DeviceMemoryResource mr = * + Table input, object stream = *, DeviceMemoryResource mr = * ) cpdef Table one_hot_encode( Column input_column, Column categories, - Stream stream = *, + object stream = *, DeviceMemoryResource mr = *, ) diff --git a/python/pylibcudf/pylibcudf/transform.pyi b/python/pylibcudf/pylibcudf/transform.pyi index 2d2038f07a0..e979575f590 100644 --- a/python/pylibcudf/pylibcudf/transform.pyi +++ b/python/pylibcudf/pylibcudf/transform.pyi @@ -1,46 +1,46 @@ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.expressions import Expression from pylibcudf.gpumemoryview import gpumemoryview from pylibcudf.table import Table from pylibcudf.types import DataType, NullAware, OutputNullability +from pylibcudf.utils import CudaStreamLike def nans_to_nulls( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[gpumemoryview, int]: ... def column_nans_to_nulls( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def compute_column( input: Table, expr: Expression, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def compute_column_jit( input: Table, expr: Expression, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def bools_to_mask( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[gpumemoryview, int]: ... def mask_to_bools( bitmask: int, begin_bit: int, end_bit: int, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def transform( @@ -50,17 +50,17 @@ def transform( is_ptx: bool, null_aware: NullAware = NullAware.NO, null_policy: OutputNullability = OutputNullability.PRESERVE, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def encode( input: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> tuple[Table, Column]: ... def one_hot_encode( input: Column, categories: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/transform.pyx b/python/pylibcudf/pylibcudf/transform.pyx index 3baf6c5306e..0025ed7d566 100644 --- a/python/pylibcudf/pylibcudf/transform.pyx +++ b/python/pylibcudf/pylibcudf/transform.pyx @@ -26,6 +26,7 @@ from .expressions cimport Expression from .gpumemoryview cimport gpumemoryview from .types cimport DataType, null_aware, output_nullability from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "bools_to_mask", @@ -41,7 +42,7 @@ __all__ = [ cpdef tuple[gpumemoryview, int] nans_to_nulls( Column input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a null mask preserving existing nulls and converting nans to null. @@ -63,21 +64,26 @@ cpdef tuple[gpumemoryview, int] nans_to_nulls( """ cdef pair[unique_ptr[device_buffer], size_type] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_transform.nans_to_nulls(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_transform.nans_to_nulls( + input.view(), _cs, mr.get_mr() + ) return ( - gpumemoryview(DeviceBuffer.c_from_unique_ptr(move(c_result.first), stream, mr)), + gpumemoryview( + DeviceBuffer.c_from_unique_ptr(move(c_result.first), _stream, mr) + ), c_result.second ) cpdef Column column_nans_to_nulls( Column input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a column with nans converted to nulls. @@ -100,19 +106,20 @@ cpdef Column column_nans_to_nulls( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_transform.column_nans_to_nulls( - input.view(), stream.view(), mr.get_mr() + input.view(), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column compute_column( - Table input, Expression expr, Stream stream=None, DeviceMemoryResource mr=None + Table input, Expression expr, object stream=None, DeviceMemoryResource mr=None ): """Create a column by evaluating an expression on a table. @@ -135,19 +142,20 @@ cpdef Column compute_column( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_transform.compute_column( - input.view(), dereference(expr.c_obj.get()), stream.view(), mr.get_mr() + input.view(), dereference(expr.c_obj.get()), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column compute_column_jit( - Table input, Expression expr, Stream stream=None, DeviceMemoryResource mr=None + Table input, Expression expr, object stream=None, DeviceMemoryResource mr=None ): """ Create a column by evaluating an expression on a table @@ -172,20 +180,21 @@ cpdef Column compute_column_jit( """ cdef unique_ptr[column] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_transform.compute_column_jit( - input.view(), dereference(expr.c_obj.get()), stream.view(), mr.get_mr() + input.view(), dereference(expr.c_obj.get()), _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef tuple[gpumemoryview, int] bools_to_mask( Column input, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a bitmask from a column of boolean elements @@ -206,14 +215,19 @@ cpdef tuple[gpumemoryview, int] bools_to_mask( """ cdef pair[unique_ptr[device_buffer], size_type] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_transform.bools_to_mask(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_transform.bools_to_mask( + input.view(), _cs, mr.get_mr() + ) return ( - gpumemoryview(DeviceBuffer.c_from_unique_ptr(move(c_result.first), stream, mr)), + gpumemoryview( + DeviceBuffer.c_from_unique_ptr(move(c_result.first), _stream, mr) + ), c_result.second ) @@ -222,7 +236,7 @@ cpdef Column mask_to_bools( Py_ssize_t bitmask, int begin_bit, int end_bit, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Creates a boolean column from given bitmask. @@ -248,7 +262,8 @@ cpdef Column mask_to_bools( cdef unique_ptr[column] c_result cdef bitmask_type * bitmask_ptr = bitmask - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: @@ -256,11 +271,11 @@ cpdef Column mask_to_bools( bitmask_ptr, begin_bit, end_bit, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef Column transform( @@ -270,7 +285,7 @@ cpdef Column transform( bool is_ptx, null_aware is_null_aware, output_nullability null_policy, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Create a new column by applying a transform function against @@ -312,7 +327,8 @@ cpdef Column transform( cdef output_nullability c_null_policy = null_policy cdef optional[void *] user_data - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) for input in inputs: @@ -327,14 +343,14 @@ cpdef Column transform( user_data, c_is_null_aware, c_null_policy, - stream.view(), + _cs, mr.get_mr() ) - return Column.from_libcudf(move(c_result), stream, mr) + return Column.from_libcudf(move(c_result), _stream, mr) cpdef tuple[Table, Column] encode( - Table input, Stream stream=None, DeviceMemoryResource mr=None + Table input, object stream=None, DeviceMemoryResource mr=None ): """Encode the rows of the given table as integers. @@ -355,21 +371,22 @@ cpdef tuple[Table, Column] encode( """ cdef pair[unique_ptr[table], unique_ptr[column]] c_result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - c_result = cpp_transform.encode(input.view(), stream.view(), mr.get_mr()) + c_result = cpp_transform.encode(input.view(), _cs, mr.get_mr()) return ( - Table.from_libcudf(move(c_result.first), stream, mr), - Column.from_libcudf(move(c_result.second), stream, mr) + Table.from_libcudf(move(c_result.first), _stream, mr), + Column.from_libcudf(move(c_result.second), _stream, mr) ) cpdef Table one_hot_encode( Column input, Column categories, - Stream stream=None, + object stream=None, DeviceMemoryResource mr=None, ): """Encodes `input` by generating a new column @@ -395,19 +412,20 @@ cpdef Table one_hot_encode( cdef pair[unique_ptr[column], table_view] c_result cdef Table owner_table - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_transform.one_hot_encode( input.view(), categories.view(), - stream.view(), + _cs, mr.get_mr() ) owner_table = Table( - [Column.from_libcudf(move(c_result.first), stream, mr)] + [Column.from_libcudf(move(c_result.first), _stream, mr)] * c_result.second.num_columns() ) diff --git a/python/pylibcudf/pylibcudf/transpose.pxd b/python/pylibcudf/pylibcudf/transpose.pxd index 6c432a62b5f..a63d52da9e1 100644 --- a/python/pylibcudf/pylibcudf/transpose.pxd +++ b/python/pylibcudf/pylibcudf/transpose.pxd @@ -1,9 +1,8 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from .table cimport Table -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource -cpdef Table transpose(Table input_table, Stream stream=*, DeviceMemoryResource mr=*) +cpdef Table transpose(Table input_table, object stream = *, DeviceMemoryResource mr=*) diff --git a/python/pylibcudf/pylibcudf/transpose.pyi b/python/pylibcudf/pylibcudf/transpose.pyi index 4487e49feaf..fbf2d3fce2d 100644 --- a/python/pylibcudf/pylibcudf/transpose.pyi +++ b/python/pylibcudf/pylibcudf/transpose.pyi @@ -1,13 +1,13 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.table import Table +from pylibcudf.utils import CudaStreamLike def transpose( input_table: Table, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... diff --git a/python/pylibcudf/pylibcudf/transpose.pyx b/python/pylibcudf/pylibcudf/transpose.pyx index e7cdbe503eb..e15aa45ce77 100644 --- a/python/pylibcudf/pylibcudf/transpose.pyx +++ b/python/pylibcudf/pylibcudf/transpose.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.memory cimport unique_ptr from libcpp.pair cimport pair @@ -13,11 +13,12 @@ from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column from .table cimport Table from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = ["transpose"] cpdef Table transpose( - Table input_table, Stream stream=None, DeviceMemoryResource mr=None + Table input_table, object stream=None, DeviceMemoryResource mr=None ): """Transpose a Table. @@ -39,16 +40,17 @@ cpdef Table transpose( """ cdef pair[unique_ptr[column], table_view] c_result cdef Table owner_table - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: c_result = cpp_transpose.transpose( - input_table.view(), stream.view(), mr.get_mr() + input_table.view(), _cs, mr.get_mr() ) owner_table = Table( - [Column.from_libcudf(move(c_result.first), stream, mr)] * + [Column.from_libcudf(move(c_result.first), _stream, mr)] * c_result.second.num_columns() ) diff --git a/python/pylibcudf/pylibcudf/unary.pxd b/python/pylibcudf/pylibcudf/unary.pxd index 69ec06ecea6..44a4f796085 100644 --- a/python/pylibcudf/pylibcudf/unary.pxd +++ b/python/pylibcudf/pylibcudf/unary.pxd @@ -1,9 +1,8 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool from pylibcudf.libcudf.unary cimport unary_operator -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource from .column cimport Column @@ -11,19 +10,19 @@ from .types cimport DataType cpdef Column unary_operation( - Column input, unary_operator op, Stream stream = *, DeviceMemoryResource mr = * + Column input, unary_operator op, object stream = *, DeviceMemoryResource mr = * ) -cpdef Column is_null(Column input, Stream stream = *, DeviceMemoryResource mr = *) +cpdef Column is_null(Column input, object stream = *, DeviceMemoryResource mr = *) -cpdef Column is_valid(Column input, Stream stream = *, DeviceMemoryResource mr = *) +cpdef Column is_valid(Column input, object stream = *, DeviceMemoryResource mr = *) cpdef Column cast( - Column input, DataType data_type, Stream stream = *, DeviceMemoryResource mr = * + Column input, DataType data_type, object stream = *, DeviceMemoryResource mr = * ) -cpdef Column is_nan(Column input, Stream stream = *, DeviceMemoryResource mr = *) +cpdef Column is_nan(Column input, object stream = *, DeviceMemoryResource mr = *) -cpdef Column is_not_nan(Column input, Stream stream = *, DeviceMemoryResource mr = *) +cpdef Column is_not_nan(Column input, object stream = *, DeviceMemoryResource mr = *) cpdef bool is_supported_cast(DataType from_, DataType to) diff --git a/python/pylibcudf/pylibcudf/unary.pyi b/python/pylibcudf/pylibcudf/unary.pyi index 6a77f7998b9..dd3d42404e7 100644 --- a/python/pylibcudf/pylibcudf/unary.pyi +++ b/python/pylibcudf/pylibcudf/unary.pyi @@ -1,13 +1,13 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from enum import IntEnum from rmm.pylibrmm.memory_resource import DeviceMemoryResource -from rmm.pylibrmm.stream import Stream from pylibcudf.column import Column from pylibcudf.types import DataType +from pylibcudf.utils import CudaStreamLike class UnaryOperator(IntEnum): SIN = ... @@ -38,33 +38,33 @@ class UnaryOperator(IntEnum): def unary_operation( input: Column, op: UnaryOperator, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_null( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_valid( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def cast( input: Column, data_type: DataType, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_nan( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_not_nan( input: Column, - stream: Stream | None = None, + stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... def is_supported_cast(from_: DataType, to: DataType) -> bool: ... diff --git a/python/pylibcudf/pylibcudf/unary.pyx b/python/pylibcudf/pylibcudf/unary.pyx index da5b08df685..e0614037012 100644 --- a/python/pylibcudf/pylibcudf/unary.pyx +++ b/python/pylibcudf/pylibcudf/unary.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp cimport bool @@ -16,6 +16,7 @@ from pylibcudf.libcudf.unary import \ from .column cimport Column from .types cimport DataType from .utils cimport _get_stream, _get_memory_resource +from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "UnaryOperator", @@ -29,7 +30,7 @@ __all__ = [ ] cpdef Column unary_operation( - Column input, unary_operator op, Stream stream=None, DeviceMemoryResource mr=None + Column input, unary_operator op, object stream=None, DeviceMemoryResource mr=None ): """Perform a unary operation on a column. @@ -53,16 +54,19 @@ cpdef Column unary_operation( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_unary.unary_operation(input.view(), op, stream.view(), mr.get_mr()) + result = cpp_unary.unary_operation( + input.view(), op, _cs, mr.get_mr() + ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) -cpdef Column is_null(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column is_null(Column input, object stream=None, DeviceMemoryResource mr=None): """Check whether elements of a column are null. For details, see :cpp:func:`is_null`. @@ -83,16 +87,17 @@ cpdef Column is_null(Column input, Stream stream=None, DeviceMemoryResource mr=N """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_unary.is_null(input.view(), stream.view(), mr.get_mr()) + result = cpp_unary.is_null(input.view(), _cs, mr.get_mr()) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) -cpdef Column is_valid(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column is_valid(Column input, object stream=None, DeviceMemoryResource mr=None): """Check whether elements of a column are valid. For details, see :cpp:func:`is_valid`. @@ -113,17 +118,18 @@ cpdef Column is_valid(Column input, Stream stream=None, DeviceMemoryResource mr= """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_unary.is_valid(input.view(), stream.view(), mr.get_mr()) + result = cpp_unary.is_valid(input.view(), _cs, mr.get_mr()) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef Column cast( - Column input, DataType data_type, Stream stream=None, DeviceMemoryResource mr=None + Column input, DataType data_type, object stream=None, DeviceMemoryResource mr=None ): """Cast a column to a different data type. @@ -147,18 +153,19 @@ cpdef Column cast( """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: result = cpp_unary.cast( - input.view(), data_type.c_obj, stream.view(), mr.get_mr() + input.view(), data_type.c_obj, _cs, mr.get_mr() ) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) -cpdef Column is_nan(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column is_nan(Column input, object stream=None, DeviceMemoryResource mr=None): """Check whether elements of a column are nan. For details, see :cpp:func:`is_nan`. @@ -179,16 +186,17 @@ cpdef Column is_nan(Column input, Stream stream=None, DeviceMemoryResource mr=No """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_unary.is_nan(input.view(), stream.view(), mr.get_mr()) + result = cpp_unary.is_nan(input.view(), _cs, mr.get_mr()) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) -cpdef Column is_not_nan(Column input, Stream stream=None, DeviceMemoryResource mr=None): +cpdef Column is_not_nan(Column input, object stream=None, DeviceMemoryResource mr=None): """Check whether elements of a column are not nan. For details, see :cpp:func:`is_not_nan`. @@ -209,13 +217,14 @@ cpdef Column is_not_nan(Column input, Stream stream=None, DeviceMemoryResource m """ cdef unique_ptr[column] result - stream = _get_stream(stream) + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() mr = _get_memory_resource(mr) with nogil: - result = cpp_unary.is_not_nan(input.view(), stream.view(), mr.get_mr()) + result = cpp_unary.is_not_nan(input.view(), _cs, mr.get_mr()) - return Column.from_libcudf(move(result), stream, mr) + return Column.from_libcudf(move(result), _stream, mr) cpdef bool is_supported_cast(DataType from_, DataType to): """Check if a cast between datatypes is supported. diff --git a/python/pylibcudf/pylibcudf/utils.pxd b/python/pylibcudf/pylibcudf/utils.pxd index b3d2928f398..feb82cea18f 100644 --- a/python/pylibcudf/pylibcudf/utils.pxd +++ b/python/pylibcudf/pylibcudf/utils.pxd @@ -1,12 +1,12 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 from libcpp.functional cimport reference_wrapper from libcpp.vector cimport vector from pylibcudf.libcudf.scalar.scalar cimport scalar -from rmm.pylibrmm.stream cimport Stream from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource +from rmm.pylibrmm.stream cimport Stream cdef vector[reference_wrapper[const scalar]] _as_vector(list source) -cpdef Stream _get_stream(Stream stream = *) +cpdef Stream _get_stream(object stream = *) cdef DeviceMemoryResource _get_memory_resource(DeviceMemoryResource mr = *) diff --git a/python/pylibcudf/pylibcudf/utils.pyi b/python/pylibcudf/pylibcudf/utils.pyi index 21f669898ba..cc3cb93e6c0 100644 --- a/python/pylibcudf/pylibcudf/utils.pyi +++ b/python/pylibcudf/pylibcudf/utils.pyi @@ -1,6 +1,13 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 +from typing import Protocol + from rmm.pylibrmm.stream import Stream -def _get_stream(stream: Stream | None = None) -> Stream: ... +class HasCudaStream(Protocol): + def __cuda_stream__(self) -> tuple[int, int]: ... + +CudaStreamLike = Stream | HasCudaStream + +def _get_stream(stream: CudaStreamLike | None = None) -> Stream: ... diff --git a/python/pylibcudf/pylibcudf/utils.pyx b/python/pylibcudf/pylibcudf/utils.pyx index 70460e19481..314e62f7760 100644 --- a/python/pylibcudf/pylibcudf/utils.pyx +++ b/python/pylibcudf/pylibcudf/utils.pyx @@ -47,10 +47,12 @@ cdef vector[reference_wrapper[const scalar]] _as_vector(list source): return c_scalars -cpdef Stream _get_stream(Stream stream = None): +cpdef Stream _get_stream(object stream = None): if stream is None: return CUDF_DEFAULT_STREAM - return stream + if isinstance(stream, Stream): + return stream + return Stream(stream) # Handles __cuda_stream__ protocol cdef DeviceMemoryResource _get_memory_resource(DeviceMemoryResource mr = None): diff --git a/python/pylibcudf/tests/test_experimental.py b/python/pylibcudf/tests/test_experimental.py index eaf06ff62ae..ed180e8db29 100644 --- a/python/pylibcudf/tests/test_experimental.py +++ b/python/pylibcudf/tests/test_experimental.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 import pytest @@ -21,6 +21,7 @@ def test_join_streams(streams: list[Stream], stream: Stream): plc.experimental.join_streams(streams, stream) +@pytest.mark.uses_custom_stream def test_join_streams_type_error(): """Test that join_streams raises appropriate errors for invalid inputs.""" main_stream = Stream() @@ -29,16 +30,10 @@ def test_join_streams_type_error(): with pytest.raises(TypeError): plc.experimental.join_streams(None, main_stream) - # Test with non-Stream in list - with pytest.raises( - TypeError, - match="Cannot convert NoneType to rmm.pylibrmm.stream.Stream", - ): - plc.experimental.join_streams([None], main_stream) - - # Test with non-Stream as main stream - with pytest.raises( - TypeError, - match="Cannot convert NoneType to rmm.pylibrmm.stream.Stream", - ): - plc.experimental.join_streams([Stream()], None) + # Protocol stream should be accepted + class _CudaStreamProto: + def __cuda_stream__(self): + return (0, 0) + + plc.experimental.join_streams([_CudaStreamProto()], main_stream) + plc.experimental.join_streams([Stream()], _CudaStreamProto()) diff --git a/python/pylibcudf/tests/test_stream_protocol.py b/python/pylibcudf/tests/test_stream_protocol.py new file mode 100644 index 00000000000..075c49bd0b3 --- /dev/null +++ b/python/pylibcudf/tests/test_stream_protocol.py @@ -0,0 +1,74 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 + +import pyarrow as pa +import pytest + +from rmm.pylibrmm.stream import Stream + +import pylibcudf as plc + + +class _CudaStreamProto: + """Minimal __cuda_stream__ protocol object for testing.""" + + def __cuda_stream__(self): + return (0, 0) + + +def test_get_stream_none(): + stream = plc.utils._get_stream(None) + assert isinstance(stream, Stream) + + +def test_get_stream_stream_object(): + stream = Stream() + result = plc.utils._get_stream(stream) + assert result is stream + + +def test_get_stream_protocol_object(): + proto = _CudaStreamProto() + result = plc.utils._get_stream(proto) + assert isinstance(result, Stream) + + +@pytest.mark.parametrize("stream", [None, Stream(), _CudaStreamProto()]) +def test_reduce_accepts_stream_protocol(stream): + arr = pa.array([1, 2, 3], type=pa.int32()) + col = plc.Column.from_arrow(arr) + agg = plc.aggregation.sum() + dtype = plc.DataType.from_arrow(pa.int32()) + result = plc.reduce.reduce(col, agg, dtype, stream=stream) + assert result.to_py() == 6 + + +@pytest.mark.parametrize("stream", [None, Stream(), _CudaStreamProto()]) +def test_binary_operation_accepts_stream_protocol(stream): + lhs = plc.Column.from_arrow(pa.array([1, 2, 3], type=pa.int32())) + rhs = plc.Column.from_arrow(pa.array([4, 5, 6], type=pa.int32())) + dtype = plc.DataType.from_arrow(pa.int32()) + result = plc.binaryop.binary_operation( + lhs, + rhs, + plc.binaryop.BinaryOperator.ADD, + dtype, + stream=stream, + ) + expect = pa.array([5, 7, 9], type=pa.int32()) + assert result.to_arrow().equals(expect) + + +@pytest.mark.parametrize("stream", [None, Stream(), _CudaStreamProto()]) +def test_gather_accepts_stream_protocol(stream): + table = plc.Table.from_arrow(pa.table({"a": [1, 2, 3], "b": [4, 5, 6]})) + indices = plc.Column.from_arrow(pa.array([2, 0], type=pa.int32())) + result = plc.copying.gather( + table, + indices, + plc.copying.OutOfBoundsPolicy.DONT_CHECK, + stream=stream, + ) + expected = pa.table({"a": [3, 1], "b": [6, 4]}) + got = result.to_arrow().rename_columns(expected.column_names) + assert got.cast(expected.schema).equals(expected) From b45c5aaae0b396de9aead82a2f3daf6ba5ef3b10 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 6 May 2026 16:41:46 -0700 Subject: [PATCH 29/36] Use `language: script` for cudf-polars-ir-signatures pre-commit hook (#22384) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `cudf-polars-ir-signatures` pre-commit hook uses `language: python` but is just a local script (`./ci/check_cudf_polars_ir.py`) that only depends on stdlib modules (`ast`, `argparse`, `sys`, `typing`) and has a `#!/usr/bin/env python3` shebang. With `language: python`, pre-commit unnecessarily creates a virtualenv for this hook. `language: script` is the correct setting — it runs the entry point directly as an executable, relying on the shebang for interpreter selection, with no virtualenv overhead. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/cudf/pull/22384 --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1fb05425bd3..a51294a8f26 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -136,7 +136,7 @@ repos: name: cudf-polars-ir-signatures description: 'Validate cudf-polars IR.do_evaluate signatures.' entry: ./ci/check_cudf_polars_ir.py - language: python + language: script files: ^python/cudf_polars/cudf_polars/(dsl/ir|experimental/(shuffle|io|sort))\.py$ pass_filenames: true verbose: true From 8a0d5f951fee350b9557001639dda2002fb7a150 Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Wed, 6 May 2026 18:14:35 -0700 Subject: [PATCH 30/36] Fix potential errors in Parquet page header decode (#22274) This PR fixes a potential infinite loop in parquet page header count/decode kernels if case of malformed input. Authors: - Muhammad Haseeb (https://github.com/mhaseeb123) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Paul Mattione (https://github.com/pmattione-nvidia) URL: https://github.com/rapidsai/cudf/pull/22274 --- cpp/src/io/parquet/page_hdr.cu | 132 ++++++++++--------- cpp/src/io/parquet/parquet_gpu.hpp | 2 + cpp/src/io/parquet/reader_impl_preprocess.cu | 8 +- 3 files changed, 81 insertions(+), 61 deletions(-) diff --git a/cpp/src/io/parquet/page_hdr.cu b/cpp/src/io/parquet/page_hdr.cu index 83724dd71e2..8e7a6223447 100644 --- a/cpp/src/io/parquet/page_hdr.cu +++ b/cpp/src/io/parquet/page_hdr.cu @@ -110,8 +110,8 @@ inline __device__ int32_t get_i32(byte_stream_s* bs) */ __device__ void skip_struct_field(byte_stream_s* bs, int field_type) { - int struct_depth = 0; - int rep_cnt = 0; + uint32_t struct_depth = 0; + uint32_t rep_cnt = 0; do { if (rep_cnt != 0) { @@ -138,7 +138,7 @@ __device__ void skip_struct_field(byte_stream_s* bs, int field_type) case FieldType::LIST: case FieldType::SET: { // NOTE: skipping a list of lists is not handled auto const c = getb(bs); - int n = c >> 4; + uint32_t n = c >> 4; if (n == 0xf) { n = get_u32(bs); } field_type = c & 0xf; if (static_cast(field_type) == FieldType::STRUCT) { @@ -543,7 +543,6 @@ void __launch_bounds__(decode_page_headers_block_size) auto const block = cg::this_thread_block(); auto const warp = cg::tiled_partition(block); - auto const lane_id = warp.thread_rank(); auto const warp_id = warp.meta_group_rank(); auto const chunk_idx = static_cast((cg::this_grid().block_rank() * num_warps_per_block) + warp_id); @@ -554,20 +553,20 @@ void __launch_bounds__(decode_page_headers_block_size) auto const bs = &bs_g[warp_id]; - if (lane_id == 0) { + cg::invoke_one(warp, [&] { if (chunk_idx < num_chunks) { bs->ck = chunks[chunk_idx]; } error[warp_id] = 0; - } + }); block.sync(); if (chunk_idx < num_chunks) { - if (lane_id == 0) { + cg::invoke_one(warp, [&] { bs->base = bs->cur = bs->ck.compressed_data; bs->end = bs->base + bs->ck.compressed_size; bs->page.chunk_idx = chunk_idx; bs->page.src_col_schema = bs->ck.src_col_schema; zero_out_page_header_info(bs); - } + }); size_t const num_values = bs->ck.num_values; size_t values_found = 0; uint32_t data_page_count = 0; @@ -580,7 +579,7 @@ void __launch_bounds__(decode_page_headers_block_size) while (values_found < num_values and bs->cur < bs->end) { int index_out = -1; - if (lane_id == 0) { + cg::invoke_one(warp, [&] { // this computation is only valid for flat schemas. for nested schemas, // they will be recomputed in the preprocess step by examining repetition and // definition levels @@ -593,7 +592,7 @@ void __launch_bounds__(decode_page_headers_block_size) bs->page.num_nulls = 0; bs->page.lvl_bytes[level_type::DEFINITION] = 0; bs->page.lvl_bytes[level_type::REPETITION] = 0; - if (parse_page_header_fn{}(bs) and bs->page.compressed_page_size >= 0) { + if (parse_page_header_fn{}(bs) and bs->page.compressed_page_size > 0) { if (not is_supported_encoding(bs->page.encoding)) { error[warp_id] |= static_cast(decode_error::UNSUPPORTED_ENCODING); @@ -641,11 +640,13 @@ void __launch_bounds__(decode_page_headers_block_size) bs->cur = bs->end; } if (index_out >= 0 and index_out < max_num_pages) { page_info[index_out] = bs->page; } - } + }); values_found = shuffle(values_found); warp.sync(); } - if (lane_id == 0 and error[warp_id] != 0) { set_error(error[warp_id], error_code); } + cg::invoke_one(warp, [&] { + if (error[warp_id] != 0) { set_error(error[warp_id], error_code); } + }); } } @@ -664,7 +665,6 @@ CUDF_KERNEL void __launch_bounds__(count_page_headers_block_size) auto const block = cg::this_thread_block(); auto const warp = cg::tiled_partition(block); - auto const lane_id = warp.thread_rank(); auto const warp_id = warp.meta_group_rank(); auto const chunk_idx = static_cast((cg::this_grid().block_rank() * num_warps_per_block) + warp_id); @@ -675,25 +675,25 @@ CUDF_KERNEL void __launch_bounds__(count_page_headers_block_size) auto const bs = &bs_g[warp_id]; - if (lane_id == 0) { + cg::invoke_one(warp, [&] { if (chunk_idx < num_chunks) { bs->ck = chunks[chunk_idx]; } error[warp_id] = 0; - } + }); block.sync(); if (chunk_idx < num_chunks) { - if (lane_id == 0) { + cg::invoke_one(warp, [&] { bs->base = bs->cur = bs->ck.compressed_data; bs->end = bs->base + bs->ck.compressed_size; - } + }); size_t const num_values = bs->ck.num_values; size_t values_found = 0; uint32_t data_page_count = 0; uint32_t dictionary_page_count = 0; warp.sync(); while (values_found < num_values and bs->cur < bs->end) { - if (lane_id == 0) { - if (parse_page_header_fn{}(bs) and bs->page.compressed_page_size >= 0) { + cg::invoke_one(warp, [&] { + if (parse_page_header_fn{}(bs) and bs->page.compressed_page_size > 0) { if (not is_supported_encoding(bs->page.encoding)) { error[warp_id] |= static_cast(decode_error::UNSUPPORTED_ENCODING); @@ -724,15 +724,15 @@ CUDF_KERNEL void __launch_bounds__(count_page_headers_block_size) static_cast(decode_error::INVALID_PAGE_HEADER); bs->cur = bs->end; } - } + }); values_found = shuffle(values_found); warp.sync(); } - if (lane_id == 0) { + cg::invoke_one(warp, [&] { chunks[chunk_idx].num_data_pages = data_page_count; chunks[chunk_idx].num_dict_pages = dictionary_page_count; if (error[warp_id] != 0) { set_error(error[warp_id], error_code); } - } + }); } } @@ -784,8 +784,9 @@ struct decode_page_headers_with_pgidx_fn { // bs.page.chunk_row not computed here and will be filled in later by // `fill_in_page_info()`. - if (not parse_page_header_fn{}(&bs) or bs.page.compressed_page_size < 0) { - set_error(static_cast(decode_error::UNSUPPORTED_ENCODING), + // Parsed page must be valid and not empty + if (not parse_page_header_fn{}(&bs) or bs.page.compressed_page_size <= 0) { + set_error(static_cast(decode_error::INVALID_PAGE_HEADER), error_code); return; } @@ -834,54 +835,64 @@ struct decode_page_headers_with_pgidx_fn { * @param[in] num_chunks Number of column chunks */ CUDF_KERNEL void __launch_bounds__(build_string_dict_index_block_size) - build_string_dictionary_index_kernel(ColumnChunkDesc* chunks, int32_t num_chunks) + build_string_dictionary_index_kernel(ColumnChunkDesc* chunks, + int32_t num_chunks, + kernel_error::pointer error_code) { auto constexpr num_warps_per_block = build_string_dict_index_block_size / cudf::detail::warp_size; __shared__ ColumnChunkDesc chunk_g[num_warps_per_block]; - auto const block = cg::this_thread_block(); - auto const warp = cg::tiled_partition(block); - int const lane_id = warp.thread_rank(); - int const chunk = (cg::this_grid().block_rank() * num_warps_per_block) + warp.meta_group_rank(); + auto const block = cg::this_thread_block(); + auto const warp = cg::tiled_partition(block); + int const chunk = (cg::this_grid().block_rank() * num_warps_per_block) + warp.meta_group_rank(); ColumnChunkDesc* const ck = &chunk_g[warp.meta_group_rank()]; - if (chunk < num_chunks and lane_id == 0) *ck = chunks[chunk]; + cg::invoke_one(warp, [&] { + if (chunk < num_chunks) { *ck = chunks[chunk]; } + }); block.sync(); if (chunk >= num_chunks) { return; } - if (!lane_id && ck->num_dict_pages > 0 && ck->str_dict_index) { - // Data type to describe a string - string_index_pair* dict_index = ck->str_dict_index; - uint8_t const* dict = ck->dict_page->page_data; - int dict_size = ck->dict_page->uncompressed_page_size; - int num_entries = ck->dict_page->num_input_values; - int pos = 0, cur = 0; - for (int i = 0; i < num_entries; i++) { - int len = 0; - if (ck->physical_type == Type::FIXED_LEN_BYTE_ARRAY) { - if (cur + ck->type_length <= dict_size) { - len = ck->type_length; - pos = cur; - cur += len; - } else { - cur = dict_size; - } - } else { - if (cur + 4 <= dict_size) { - len = - dict[cur + 0] | (dict[cur + 1] << 8) | (dict[cur + 2] << 16) | (dict[cur + 3] << 24); - if (len >= 0 && cur + 4 + len <= dict_size) { - pos = cur + 4; - cur = pos + len; + cg::invoke_one(warp, [&] { + if (ck->num_dict_pages > 0 && ck->str_dict_index) { + // Data type to describe a string + string_index_pair* dict_index = ck->str_dict_index; + uint8_t const* dict = ck->dict_page->page_data; + int const dict_size = ck->dict_page->uncompressed_page_size; + int32_t const num_entries = ck->dict_page->num_input_values; + if (num_entries < 0 or dict_size < 0) { + set_error(static_cast(decode_error::INVALID_DICT_WIDTH), + error_code); + return; + } + int pos = 0, cur = 0; + for (int i = 0; i < num_entries; i++) { + int len = 0; + if (ck->physical_type == Type::FIXED_LEN_BYTE_ARRAY) { + if (cur + ck->type_length <= dict_size) { + len = ck->type_length; + pos = cur; + cur += len; } else { cur = dict_size; } + } else { + if (cur + 4 <= dict_size) { + len = + dict[cur + 0] | (dict[cur + 1] << 8) | (dict[cur + 2] << 16) | (dict[cur + 3] << 24); + if (len >= 0 && cur + 4 + len <= dict_size) { + pos = cur + 4; + cur = pos + len; + } else { + cur = dict_size; + } + } } + // TODO: Could store 8 entries in shared mem, then do a single warp-wide store + dict_index[i].first = reinterpret_cast(dict + pos); + dict_index[i].second = len; } - // TODO: Could store 8 entries in shared mem, then do a single warp-wide store - dict_index[i].first = reinterpret_cast(dict + pos); - dict_index[i].second = len; } - } + }); } } // namespace @@ -942,6 +953,7 @@ void decode_page_headers_with_pgidx(cudf::device_span chu void build_string_dictionary_index(ColumnChunkDesc* chunks, int32_t num_chunks, + kernel_error::pointer error_code, rmm::cuda_stream_view stream) { static_assert( @@ -954,8 +966,8 @@ void build_string_dictionary_index(ColumnChunkDesc* chunks, dim3 dim_block(build_string_dict_index_block_size, 1); dim3 dim_grid(num_blocks, 1); - build_string_dictionary_index_kernel<<>>(chunks, - num_chunks); + build_string_dictionary_index_kernel<<>>( + chunks, num_chunks, error_code); } } // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index 680d11959a1..7d07f39aa38 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -735,10 +735,12 @@ void decode_page_headers_with_pgidx(cudf::device_span chu * * @param[in] chunks List of column chunks * @param[in] num_chunks Number of column chunks + * @param[out] error_code Pointer to the error code for kernel failures * @param[in] stream CUDA stream to use */ void build_string_dictionary_index(ColumnChunkDesc* chunks, int32_t num_chunks, + kernel_error::pointer error_code, rmm::cuda_stream_view stream); /** diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu index 07db8ff0c23..8ebb8879d7e 100644 --- a/cpp/src/io/parquet/reader_impl_preprocess.cu +++ b/cpp/src/io/parquet/reader_impl_preprocess.cu @@ -94,8 +94,14 @@ void reader_impl::build_string_dict_indices() set_str_dict_index_ptr{pass.str_dict_index.data(), str_dict_index_offsets, pass.chunks}); // compute the indices - build_string_dictionary_index(pass.chunks.device_ptr(), pass.chunks.size(), _stream); + kernel_error error_code(_stream); + build_string_dictionary_index( + pass.chunks.device_ptr(), pass.chunks.size(), error_code.data(), _stream); pass.chunks.device_to_host(_stream); + auto const error = error_code.value_sync(_stream); + CUDF_EXPECTS( + error == 0, + "Parquet dictionary index construction failed with code(s) " + kernel_error::to_string(error)); } void reader_impl::allocate_nesting_info() From be407805d297b1b84878d021305eef3fe867f3e5 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 6 May 2026 18:55:29 -0700 Subject: [PATCH 31/36] Make RapidsMPF the default runtime for cudf_polars streaming executor (#22281) closes https://github.com/rapidsai/cudf/issues/21466 closes https://github.com/rapidsai/cudf/issues/21767 Waiting for https://github.com/rapidsai/cudf/pull/22212 * Makes rapidsmpf a required dependency of cudf_polars * Removes the following `StreamingExecutor` options as they were "experimental" with associated code paths * `StreamingExecutor.runtime` * `StreamingExecutor.shuffle_method` * `StreamingExecutor.unique_fraction` * `StreamingExecutor.groupby_n_ary` * `StreamingExecutor.rapidsmpf_spill` * Removes the task runtime and associated tests * Some tests we modified to only test 1 specific test configuration because of https://github.com/rapidsai/cudf/issues/22346 to pass these tests for now. Planning on revisiting this once rapidsmpf becomes the default Ops-Bot-Merge-Barrier: true Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Mads R. B. Kristensen (https://github.com/madsbk) - Bradley Dice (https://github.com/bdice) - Matthew Murray (https://github.com/Matt711) - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/cudf/pull/22281 --- .devcontainer/Dockerfile | 2 + .devcontainer/README.md | 8 + .../cuda12.9-conda/devcontainer.json | 5 +- .devcontainer/cuda12.9-pip/devcontainer.json | 7 +- .../cuda13.1-conda/devcontainer.json | 5 +- .devcontainer/cuda13.1-pip/devcontainer.json | 7 +- .github/workflows/pr.yaml | 41 +- ci/test_cudf_polars_experimental.sh | 2 +- ci/test_cudf_polars_polars_tests.sh | 2 +- .../all_cuda-129_arch-aarch64.yaml | 1 + .../all_cuda-129_arch-x86_64.yaml | 1 + .../all_cuda-131_arch-aarch64.yaml | 1 + .../all_cuda-131_arch-x86_64.yaml | 1 + conda/recipes/cudf-polars/recipe.yaml | 1 + dependencies.yaml | 56 ++- docs/cudf/source/cudf_polars/api.md | 1 - .../cudf/source/cudf_polars/engine_options.md | 2 +- python/cudf_polars/cudf_polars/callback.py | 5 +- python/cudf_polars/cudf_polars/dsl/expr.py | 2 - .../cudf_polars/dsl/expressions/base.py | 8 +- python/cudf_polars/cudf_polars/dsl/ir.py | 10 +- .../cudf_polars/experimental/base.py | 13 +- .../benchmarks/utils_new_frontends.py | 27 +- .../cudf_polars/experimental/dispatch.py | 39 +- .../cudf_polars/experimental/distinct.py | 77 +--- .../cudf_polars/experimental/explain.py | 5 +- .../cudf_polars/experimental/expressions.py | 21 +- .../cudf_polars/experimental/groupby.py | 63 +-- .../cudf_polars/experimental/io.py | 232 +--------- .../cudf_polars/experimental/join.py | 164 +------ .../cudf_polars/experimental/parallel.py | 125 +----- .../experimental/rapidsmpf/core.py | 4 - .../experimental/rapidsmpf/frontend/core.py | 2 +- .../experimental/rapidsmpf/frontend/dask.py | 2 - .../rapidsmpf/frontend/options.py | 18 - .../experimental/rapidsmpf/frontend/ray.py | 6 - .../experimental/rapidsmpf/frontend/spmd.py | 21 +- .../cudf_polars/experimental/repartition.py | 43 +- .../cudf_polars/experimental/scheduler.py | 153 ------- .../cudf_polars/experimental/shuffle.py | 279 +----------- .../cudf_polars/experimental/sort.py | 402 +----------------- .../cudf_polars/experimental/utils.py | 47 +- .../cudf_polars/testing/asserts.py | 5 +- .../cudf_polars/testing/inject_gpu_engine.py | 3 +- .../cudf_polars/cudf_polars/utils/config.py | 214 ++-------- .../cudf_polars/utils/cuda_stream.py | 5 - python/cudf_polars/pyproject.toml | 2 +- python/cudf_polars/tests/conftest.py | 30 +- .../tests/experimental/test_dask.py | 2 - .../tests/experimental/test_explain.py | 4 +- .../tests/experimental/test_groupby.py | 5 +- .../tests/experimental/test_hstack.py | 2 - .../tests/experimental/test_options.py | 5 - .../tests/experimental/test_parallel.py | 41 -- .../tests/experimental/test_ray.py | 4 +- .../tests/experimental/test_sort.py | 4 - .../tests/experimental/test_spmd.py | 3 +- .../tests/experimental/test_unique.py | 35 +- python/cudf_polars/tests/test_config.py | 175 ++------ python/cudf_polars/tests/test_scan.py | 2 +- python/cudf_polars/tests/test_sink.py | 1 + python/cudf_polars/tests/test_tracing.py | 14 +- .../tests/testing/test_engine_utils.py | 5 - 63 files changed, 275 insertions(+), 2197 deletions(-) delete mode 100644 python/cudf_polars/cudf_polars/experimental/scheduler.py diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index b4b2ecb69e0..57ccf6302c5 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -5,9 +5,11 @@ ARG PYTHON_PACKAGE_MANAGER=conda FROM ${BASE} as pip-base +# libnuma-dev is required for pip devcontainers for cucascade from rapidsmpf RUN apt update -y \ && DEBIAN_FRONTEND=noninteractive apt install -y \ librdkafka-dev \ + libnuma-dev \ && rm -rf /tmp/* /var/tmp/* /var/cache/apt/* /var/lib/apt/lists/*; ENV DEFAULT_VIRTUAL_ENV=rapids diff --git a/.devcontainer/README.md b/.devcontainer/README.md index 91ee7ef85f7..cc5fac22fde 100644 --- a/.devcontainer/README.md +++ b/.devcontainer/README.md @@ -20,6 +20,7 @@ This container is a turnkey development environment for building and testing the By default, the following directories are bind-mounted into the devcontainer: * `${repo}:/home/coder/cudf` +* `${repo}/../rapidsmpf:/home/coder/rapidsmpf` * `${repo}/../.aws:/home/coder/.aws` * `${repo}/../.local:/home/coder/.local` * `${repo}/../.cache:/home/coder/.cache` @@ -28,6 +29,13 @@ By default, the following directories are bind-mounted into the devcontainer: This ensures caches, configurations, dependencies, and your commits are persisted on the host across container runs. +The [rapidsmpf](https://github.com/rapidsai/rapidsmpf) repository is a required dependency of `cudf_polars` (that also requires `libcudf`) and must be cloned as a sibling directory to the cudf repo before launching the devcontainer: + +``` +# from the parent directory of your cudf clone +git clone https://github.com/rapidsai/rapidsmpf.git +``` + ## Launch a Dev Container To launch a devcontainer from VSCode, open the cuDF repo and select the "Reopen in Container" button in the bottom right:
diff --git a/.devcontainer/cuda12.9-conda/devcontainer.json b/.devcontainer/cuda12.9-conda/devcontainer.json index 9d672bdbde8..272007e7c95 100644 --- a/.devcontainer/cuda12.9-conda/devcontainer.json +++ b/.devcontainer/cuda12.9-conda/devcontainer.json @@ -49,7 +49,7 @@ "initializeCommand": [ "/bin/bash", "-c", - "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.9-envs}" + "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.9-envs} ${localWorkspaceFolder}/../rapidsmpf" ], "postAttachCommand": [ "/bin/bash", @@ -63,7 +63,8 @@ "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.9-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent" + "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.9-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../rapidsmpf,target=/home/coder/rapidsmpf,type=bind,consistency=consistent" ], "customizations": { "vscode": { diff --git a/.devcontainer/cuda12.9-pip/devcontainer.json b/.devcontainer/cuda12.9-pip/devcontainer.json index f99cc4ce5dc..5012dcfa979 100644 --- a/.devcontainer/cuda12.9-pip/devcontainer.json +++ b/.devcontainer/cuda12.9-pip/devcontainer.json @@ -5,7 +5,7 @@ "args": { "CUDA": "12.9", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:26.06-cpp-cuda12.9" + "BASE": "rapidsai/devcontainers:26.06-cpp-cuda12.9-ucx1.19.0-openmpi5.0.10" } }, "runArgs": [ @@ -27,7 +27,7 @@ "initializeCommand": [ "/bin/bash", "-c", - "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.9-venvs}" + "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.9-venvs} ${localWorkspaceFolder}/../rapidsmpf" ], "postAttachCommand": [ "/bin/bash", @@ -40,7 +40,8 @@ "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.9-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent" + "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.9-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../rapidsmpf,target=/home/coder/rapidsmpf,type=bind,consistency=consistent" ], "customizations": { "vscode": { diff --git a/.devcontainer/cuda13.1-conda/devcontainer.json b/.devcontainer/cuda13.1-conda/devcontainer.json index a73953b1989..785302c3c1d 100644 --- a/.devcontainer/cuda13.1-conda/devcontainer.json +++ b/.devcontainer/cuda13.1-conda/devcontainer.json @@ -49,7 +49,7 @@ "initializeCommand": [ "/bin/bash", "-c", - "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda13.1-envs}" + "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda13.1-envs} ${localWorkspaceFolder}/../rapidsmpf" ], "postAttachCommand": [ "/bin/bash", @@ -63,7 +63,8 @@ "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda13.1-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent" + "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda13.1-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../rapidsmpf,target=/home/coder/rapidsmpf,type=bind,consistency=consistent" ], "customizations": { "vscode": { diff --git a/.devcontainer/cuda13.1-pip/devcontainer.json b/.devcontainer/cuda13.1-pip/devcontainer.json index 8596ff6b503..730b1c1e8ca 100644 --- a/.devcontainer/cuda13.1-pip/devcontainer.json +++ b/.devcontainer/cuda13.1-pip/devcontainer.json @@ -5,7 +5,7 @@ "args": { "CUDA": "13.1", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:26.06-cpp-cuda13.1" + "BASE": "rapidsai/devcontainers:26.06-cpp-cuda13.1-ucx1.19.0-openmpi5.0.10" } }, "runArgs": [ @@ -27,7 +27,7 @@ "initializeCommand": [ "/bin/bash", "-c", - "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda13.1-venvs}" + "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda13.1-venvs} ${localWorkspaceFolder}/../rapidsmpf" ], "postAttachCommand": [ "/bin/bash", @@ -40,7 +40,8 @@ "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent", "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent", - "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda13.1-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent" + "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda13.1-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent", + "source=${localWorkspaceFolder}/../rapidsmpf,target=/home/coder/rapidsmpf,type=bind,consistency=consistent" ], "customizations": { "vscode": { diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index c20f7f7ea79..a7ef36049b0 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -537,23 +537,30 @@ jobs: matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) build_type: pull-request script: ci/test_wheel_dask_cudf.sh - # devcontainer: - # secrets: inherit - # needs: telemetry-setup - # uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@main - # with: - # arch: '["amd64", "arm64"]' - # cuda: '["13.1"]' - # node_type: "cpu8" - # timeout-minutes: 90 - # env: | - # SCCACHE_DIST_MAX_RETRIES=inf - # SCCACHE_SERVER_LOG=sccache=debug - # SCCACHE_DIST_FALLBACK_TO_LOCAL_COMPILE=false - # build_command: | - # sccache --zero-stats; - # build-all -j0 -DBUILD_BENCHMARKS=ON --verbose 2>&1 | tee telemetry-artifacts/build.log; - # sccache --show-adv-stats | tee telemetry-artifacts/sccache-stats.txt; + devcontainer: + secrets: inherit + needs: telemetry-setup + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@main + with: + arch: '["amd64", "arm64"]' + cuda: '["13.1"]' + node_type: "cpu8" + timeout-minutes: 90 + env: | + SCCACHE_DIST_MAX_RETRIES=inf + SCCACHE_SERVER_LOG=sccache=debug + SCCACHE_DIST_FALLBACK_TO_LOCAL_COMPILE=false + # clone-rapidsmpf to not use the rapidsmpf wheels from cudf_polars dependency + # librapidsmpf-cu13 wheels brings in a hardcoded libnuma-dev cmake target: https://github.com/NVIDIA/cuCascade/issues/118 + # -DBUILD_TESTS=OFF to match rapidsmpf https://github.com/rapidsai/rapidsmpf/blob/main/.github/workflows/pr.yaml#L351 (leads to compilation errors) + # -DCUDF_BUILD_TESTUTIL=OFF to avoid IMPORTED_GLOBAL promotion errors when cuCascade's find_package(cudf) loads cudf-config.cmake from a CPM subdirectory + build_command: | + sccache --zero-stats; + clone-rapidsmpf -j$(nproc) -v -q --branch "$(cat ~/cudf/RAPIDS_BRANCH)" --clone-upstream --depth 1 --single-branch --shallow-submodules; + if [ "$PYTHON_PACKAGE_MANAGER" = "pip" ]; then rapids-make-pip-env --force; elif [ "$PYTHON_PACKAGE_MANAGER" = "conda" ]; then rapids-make-conda-env --force; fi; + rapids-generate-scripts; + build-all -j0 -DBUILD_BENCHMARKS=OFF -DBUILD_NUMA_SUPPORT=OFF -DBUILD_TESTS=OFF -DCUDF_BUILD_TESTUTIL=OFF --verbose 2>&1 | tee telemetry-artifacts/build.log; + sccache --show-adv-stats | tee telemetry-artifacts/sccache-stats.txt; unit-tests-cudf-pandas: needs: [wheel-build-cudf, changed-files] secrets: inherit diff --git a/ci/test_cudf_polars_experimental.sh b/ci/test_cudf_polars_experimental.sh index 02eab86c0dd..aa3abd66254 100755 --- a/ci/test_cudf_polars_experimental.sh +++ b/ci/test_cudf_polars_experimental.sh @@ -28,7 +28,7 @@ rapids-pip-retry install \ -v \ --prefer-binary \ --constraint "${PIP_CONSTRAINT}" \ - "$(echo "${CUDF_POLARS_WHEELHOUSE}"/cudf_polars_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test,experimental,rapidsmpf]" \ + "$(echo "${CUDF_POLARS_WHEELHOUSE}"/cudf_polars_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test,experimental]" \ "$(echo "${LIBCUDF_WHEELHOUSE}"/libcudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" \ "$(echo "${PYLIBCUDF_WHEELHOUSE}"/pylibcudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" diff --git a/ci/test_cudf_polars_polars_tests.sh b/ci/test_cudf_polars_polars_tests.sh index 802110b18ac..4e19464a895 100755 --- a/ci/test_cudf_polars_polars_tests.sh +++ b/ci/test_cudf_polars_polars_tests.sh @@ -27,7 +27,7 @@ rapids-logger "Install libcudf, pylibcudf and cudf_polars" rapids-pip-retry install \ -v \ --constraint "${PIP_CONSTRAINT}" \ - "$(echo "${CUDF_POLARS_WHEELHOUSE}"/cudf_polars_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test,rapidsmpf]" \ + "$(echo "${CUDF_POLARS_WHEELHOUSE}"/cudf_polars_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test]" \ "$(echo "${LIBCUDF_WHEELHOUSE}"/libcudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" \ "$(echo "${PYLIBCUDF_WHEELHOUSE}"/pylibcudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index d1e0e59f79b..e0b3aa994f4 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -85,6 +85,7 @@ dependencies: - rapids-build-backend>=0.4.0,<0.5.0 - rapids-dask-dependency==26.6.*,>=0.0.0a0 - rapids-logger==0.2.*,>=0.0.0a0 +- rapidsmpf==26.6.*,>=0.0.0a0 - rich - rmm==26.6.*,>=0.0.0a0 - s3fs>=2022.3.0 diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index c080f732b55..3255ce84837 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -85,6 +85,7 @@ dependencies: - rapids-build-backend>=0.4.0,<0.5.0 - rapids-dask-dependency==26.6.*,>=0.0.0a0 - rapids-logger==0.2.*,>=0.0.0a0 +- rapidsmpf==26.6.*,>=0.0.0a0 - rich - rmm==26.6.*,>=0.0.0a0 - s3fs>=2022.3.0 diff --git a/conda/environments/all_cuda-131_arch-aarch64.yaml b/conda/environments/all_cuda-131_arch-aarch64.yaml index 3194a087894..44314a0544a 100644 --- a/conda/environments/all_cuda-131_arch-aarch64.yaml +++ b/conda/environments/all_cuda-131_arch-aarch64.yaml @@ -85,6 +85,7 @@ dependencies: - rapids-build-backend>=0.4.0,<0.5.0 - rapids-dask-dependency==26.6.*,>=0.0.0a0 - rapids-logger==0.2.*,>=0.0.0a0 +- rapidsmpf==26.6.*,>=0.0.0a0 - rich - rmm==26.6.*,>=0.0.0a0 - s3fs>=2022.3.0 diff --git a/conda/environments/all_cuda-131_arch-x86_64.yaml b/conda/environments/all_cuda-131_arch-x86_64.yaml index f81ec9b08d0..949a3602e4f 100644 --- a/conda/environments/all_cuda-131_arch-x86_64.yaml +++ b/conda/environments/all_cuda-131_arch-x86_64.yaml @@ -85,6 +85,7 @@ dependencies: - rapids-build-backend>=0.4.0,<0.5.0 - rapids-dask-dependency==26.6.*,>=0.0.0a0 - rapids-logger==0.2.*,>=0.0.0a0 +- rapidsmpf==26.6.*,>=0.0.0a0 - rich - rmm==26.6.*,>=0.0.0a0 - s3fs>=2022.3.0 diff --git a/conda/recipes/cudf-polars/recipe.yaml b/conda/recipes/cudf-polars/recipe.yaml index e3a21aa1afd..52ac74c7c8b 100644 --- a/conda/recipes/cudf-polars/recipe.yaml +++ b/conda/recipes/cudf-polars/recipe.yaml @@ -36,6 +36,7 @@ requirements: - nvidia-ml-py>=12 - python - pylibcudf =${{ version }} + - rapidsmpf =${{ minor_version }} - polars>=1.30,<1.39 - packaging - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }} diff --git a/dependencies.yaml b/dependencies.yaml index cae4816eec5..af7dfea460f 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -8,6 +8,51 @@ files: matrix: cuda: ["12.9", "13.1"] arch: [x86_64, aarch64] + includes: + - build_base + - build_all + - build_cpp + - build_python_common + - clang + - cuda + - cuda_version + - cudf_polars_trace + - depends_on_cupy + - depends_on_cuda_python + - depends_on_dask_cuda + - depends_on_libkvikio + - depends_on_librmm + - depends_on_libnvcomp + - depends_on_numba_cuda + - depends_on_rapids_logger + - depends_on_rapidsmpf + - depends_on_rmm + - develop + - docs + - iwyu + - notebooks + - numpy_run + - py_version + - pyarrow_run + - rapids_build_skbuild + - rapids_build_setuptools + - run_common + - run_cudf + - run_cudf_polars + - run_pylibcudf + - run_dask_cudf + - run_custreamz + - test_cpp + - test_python_common + - test_python_cudf + - test_python_cudf_common + - test_python_pylibcudf + - test_python_cudf_pandas + - test_python_cudf_polars + - test_python_s3 + devcontainers: + output: none + # Same as "all", excluding depends_on_rapidsmpf (which is built from source) includes: - build_base - build_all @@ -328,6 +373,7 @@ files: table: project includes: - run_cudf_polars + - depends_on_rapidsmpf - depends_on_pylibcudf - depends_on_cuda_python py_run_cudf_polars_experimental: @@ -338,15 +384,6 @@ files: key: experimental includes: - run_cudf_polars_experimental - py_run_cudf_polars_rapidsmpf: - output: pyproject - pyproject_dir: python/cudf_polars - extras: - table: project.optional-dependencies - key: rapidsmpf - includes: - - depends_on_rapidsmpf - - pyarrow_run py_test_cudf_polars: output: pyproject pyproject_dir: python/cudf_polars @@ -360,7 +397,6 @@ files: - test_python_common - test_python_cudf_polars - cudf_polars_trace - - depends_on_rapidsmpf py_trace_cudf_polars: output: pyproject pyproject_dir: python/cudf_polars diff --git a/docs/cudf/source/cudf_polars/api.md b/docs/cudf/source/cudf_polars/api.md index 741b2f6f758..823954a3b08 100644 --- a/docs/cudf/source/cudf_polars/api.md +++ b/docs/cudf/source/cudf_polars/api.md @@ -13,7 +13,6 @@ For the most part, the public API of `cudf-polars` is the polars API. ExecutorType, InMemoryExecutor, ParquetOptions, - ShuffleMethod, StreamingExecutor, StreamingFallbackMode, ``` diff --git a/docs/cudf/source/cudf_polars/engine_options.md b/docs/cudf/source/cudf_polars/engine_options.md index 67e601467d9..ba6085275b8 100644 --- a/docs/cudf/source/cudf_polars/engine_options.md +++ b/docs/cudf/source/cudf_polars/engine_options.md @@ -52,7 +52,7 @@ For example, the environment variable `max_rows_per_partition` to use if it isn't overridden through `executor_options`. -For boolean options, like `rapidsmpf_spill`, the values `{"1", "true", "yes", "y"}` +For boolean options, like `sink_to_directory`, the values `{"1", "true", "yes", "y"}` are considered `True` and `{"0", "false", "no", "n"}` are considered `False`. See [Configuration Reference](#cudf-polars-api) for a full list of options, and diff --git a/python/cudf_polars/cudf_polars/callback.py b/python/cudf_polars/cudf_polars/callback.py index fb915784f96..acd0452ae1b 100644 --- a/python/cudf_polars/cudf_polars/callback.py +++ b/python/cudf_polars/cudf_polars/callback.py @@ -159,13 +159,12 @@ def set_memory_resource( """ previous = rmm.mr.get_current_device_resource() if mr is None: - # Use cuda async by default with the rapidsmpf runtime. + # Use cuda async by default with the streaming executor. if ( memory_resource_config is None and executor.name == "streaming" - and executor.runtime == "rapidsmpf" and (device_size := get_total_device_memory()) is not None - ): # pragma: no cover; Requires rapidsmpf runtime. + ): # pragma: no cover memory_resource_config = MemoryResourceConfig( qualname="rmm.mr.CudaAsyncMemoryResource", options={ diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py index 5dd8898bde2..b21485ac41e 100644 --- a/python/cudf_polars/cudf_polars/dsl/expr.py +++ b/python/cudf_polars/cudf_polars/dsl/expr.py @@ -15,7 +15,6 @@ from cudf_polars.dsl.expressions.aggregation import Agg from cudf_polars.dsl.expressions.base import ( - AggInfo, Col, ColRef, ErrorExpr, @@ -37,7 +36,6 @@ __all__ = [ "Agg", - "AggInfo", "BinOp", "BooleanFunction", "Cast", diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/base.py b/python/cudf_polars/cudf_polars/dsl/expressions/base.py index 3336ea3fd7c..b97be71b771 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/base.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/base.py @@ -8,7 +8,7 @@ import enum from enum import IntEnum -from typing import TYPE_CHECKING, Any, ClassVar, NamedTuple +from typing import TYPE_CHECKING, Any, ClassVar import pylibcudf as plc @@ -20,11 +20,7 @@ from cudf_polars.containers import Column, DataFrame, DataType -__all__ = ["AggInfo", "Col", "ColRef", "ExecutionContext", "Expr", "NamedExpr"] - - -class AggInfo(NamedTuple): - requests: list[tuple[Expr | None, plc.aggregation.Aggregation, Expr]] +__all__ = ["Col", "ColRef", "ExecutionContext", "Expr", "NamedExpr"] class ExecutionContext(IntEnum): diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py index bee30183e1c..1c48f70bb11 100644 --- a/python/cudf_polars/cudf_polars/dsl/ir.py +++ b/python/cudf_polars/cudf_polars/dsl/ir.py @@ -819,11 +819,13 @@ def read_csv_header( # TODO: Nested column names names = chunk.column_names(include_children=False) concatenated_columns = chunk.tbl.columns() - while reader.has_next(): + while reader.has_next(): # pragma: no cover columns = reader.read_chunk().tbl.columns() # Discard columns while concatenating to reduce memory footprint. # Reverse order to avoid O(n^2) list popping cost. - for i in range(len(concatenated_columns) - 1, -1, -1): + for i in range( # pragma: no cover + len(concatenated_columns) - 1, -1, -1 + ): concatenated_columns[i] = plc.concatenate.concatenate( [concatenated_columns[i], columns.pop()], stream=stream ) @@ -840,7 +842,7 @@ def read_csv_header( num_rows=num_rows, ) if include_file_paths is not None: - df = Scan.add_file_paths( + df = Scan.add_file_paths( # pragma: no cover include_file_paths, paths, chunk.num_rows_per_source, df ) else: @@ -1164,7 +1166,7 @@ def _write_parquet( | plc.io.parquet.ParquetWriterOptionsBuilder ) - if ( + if ( # pragma: no cover parquet_options.chunked and parquet_options.n_output_chunks != 1 and df.table.num_rows() != 0 diff --git a/python/cudf_polars/cudf_polars/experimental/base.py b/python/cudf_polars/cudf_polars/experimental/base.py index 73ed9b3dbe1..80ff0dfacbd 100644 --- a/python/cudf_polars/cudf_polars/experimental/base.py +++ b/python/cudf_polars/cudf_polars/experimental/base.py @@ -11,11 +11,10 @@ from cudf_polars.dsl.traversal import traversal if TYPE_CHECKING: - from collections.abc import Generator, Iterator + from collections.abc import Generator from cudf_polars.dsl.expr import NamedExpr from cudf_polars.dsl.ir import IR - from cudf_polars.dsl.nodebase import Node class PartitionInfo: @@ -40,22 +39,12 @@ def __init__( self.partitioned_on = partitioned_on self.io_plan = io_plan - def keys(self, node: Node) -> Iterator[tuple[str, int]]: - """Return the partitioned keys for a given node.""" - name = get_key_name(node) - yield from ((name, i) for i in range(self.count)) - def __rich_repr__(self) -> Generator[Any, None, None]: """Formatting for rich.pretty.pprint.""" yield "count", self.count yield "partitioned_on", self.partitioned_on -def get_key_name(node: Node) -> str: - """Generate the key name for a Node.""" - return f"{type(node).__name__.lower()}-{hash(node)}" - - class SerializedDataSourceInfo(TypedDict): """The serialized form of DataSourceInfo.""" diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils_new_frontends.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils_new_frontends.py index d514d4c44e9..74386993737 100644 --- a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils_new_frontends.py +++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils_new_frontends.py @@ -636,26 +636,8 @@ def get_executor_options( executor_options: dict[str, Any] = ( run_config.streaming_options.to_executor_options() ) - executor_options["runtime"] = "rapidsmpf" executor_options["max_io_threads"] = run_config.max_io_threads - # PDSHQueries: inject unique_fraction when dynamic planning is explicitly disabled - if ( - benchmark - and benchmark.__name__ == "PDSHQueries" - and run_config.executor == "streaming" - and run_config.streaming_options.dynamic_planning is None - ): - executor_options.setdefault( - "unique_fraction", - { - "c_custkey": 0.05, - "l_orderkey": 1.0, - "l_partkey": 0.1, - "o_custkey": 0.25, - }, - ) - return executor_options @@ -1110,8 +1092,7 @@ def run_polars_spmd( from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine executor_options = get_executor_options(run_config, benchmark=benchmark) - # "runtime" and "cluster" are reserved — SPMDEngine sets them - executor_options.pop("runtime", None) + # "cluster" is reserved — SPMDEngine sets it executor_options.pop("cluster", None) engine_options = { **run_config.streaming_options.to_engine_options(), @@ -1168,8 +1149,7 @@ def run_polars_ray( from cudf_polars.experimental.rapidsmpf.frontend.ray import RayEngine executor_options = get_executor_options(run_config, benchmark=benchmark) - # "runtime", "cluster" are reserved — RayEngine sets them - executor_options.pop("runtime", None) + # "cluster" is reserved — RayEngine sets it executor_options.pop("cluster", None) engine_options: dict[str, Any] = { **run_config.streaming_options.to_engine_options(), @@ -1218,8 +1198,7 @@ def run_polars_dask( from cudf_polars.experimental.rapidsmpf.frontend.dask import DaskEngine executor_options = get_executor_options(run_config, benchmark=benchmark) - # "runtime", "cluster" are reserved — DaskEngine sets them - executor_options.pop("runtime", None) + # "cluster" is reserved — DaskEngine sets it executor_options.pop("cluster", None) engine_options: dict[str, Any] = { **run_config.streaming_options.to_engine_options(), diff --git a/python/cudf_polars/cudf_polars/experimental/dispatch.py b/python/cudf_polars/cudf_polars/experimental/dispatch.py index 3ac67b6af46..9ff0cc3156b 100644 --- a/python/cudf_polars/cudf_polars/experimental/dispatch.py +++ b/python/cudf_polars/cudf_polars/experimental/dispatch.py @@ -5,7 +5,7 @@ from __future__ import annotations from functools import singledispatch -from typing import TYPE_CHECKING, Any, TypeAlias, TypedDict +from typing import TYPE_CHECKING, TypeAlias, TypedDict from cudf_polars.typing import GenericTransformer @@ -13,7 +13,7 @@ from collections.abc import MutableMapping from cudf_polars.dsl import ir - from cudf_polars.dsl.ir import IR, IRExecutionContext + from cudf_polars.dsl.ir import IR from cudf_polars.experimental.base import ( PartitionInfo, StatsCollector, @@ -72,38 +72,3 @@ def lower_ir_node( lower_ir_graph """ raise AssertionError(f"Unhandled type {type(ir)}") # pragma: no cover - - -@singledispatch -def generate_ir_tasks( - ir: IR, - partition_info: MutableMapping[IR, PartitionInfo], - context: IRExecutionContext, -) -> MutableMapping[Any, Any]: - """ - Generate a task graph for evaluation of an IR node. - - Parameters - ---------- - ir - IR node to generate tasks for. - partition_info - Partitioning information, obtained from :func:`lower_ir_graph`. - context - Runtime context for IR node execution. - - Returns - ------- - mapping - A (partial) dask task graph for the evaluation of an ir node. - - Notes - ----- - Task generation should only produce the tasks for the current node, - referring to child tasks by name. - - See Also - -------- - task_graph - """ - raise AssertionError(f"Unhandled type {type(ir)}") # pragma: no cover diff --git a/python/cudf_polars/cudf_polars/experimental/distinct.py b/python/cudf_polars/cudf_polars/experimental/distinct.py index 9ae148f77d3..564fe570919 100644 --- a/python/cudf_polars/cudf_polars/experimental/distinct.py +++ b/python/cudf_polars/cudf_polars/experimental/distinct.py @@ -17,8 +17,6 @@ from cudf_polars.experimental.shuffle import Shuffle from cudf_polars.experimental.utils import ( _dynamic_planning_on, - _fallback_inform, - _get_unique_fractions, _lower_ir_fallback, ) @@ -35,8 +33,6 @@ def lower_distinct( child: IR, partition_info: MutableMapping[IR, PartitionInfo], config_options: ConfigOptions[StreamingExecutor], - *, - unique_fraction: float | None = None, ) -> tuple[IR, MutableMapping[IR, PartitionInfo]]: """ Lower a Distinct IR into partition-wise stages. @@ -56,9 +52,6 @@ def lower_distinct( associated partitioning information. config_options GPUEngine configuration options. - unique_fraction - Fraction of unique values to total values. Used for algorithm selection. - A value of `1.0` means the column is unique. Returns ------- @@ -68,69 +61,24 @@ def lower_distinct( A mapping from unique nodes in the new graph to associated partitioning information. """ - subset: frozenset[str] = ir.subset or frozenset(ir.schema) - distinct_keys = tuple( - NamedExpr(name, Col(ir.schema[name], name)) - for name in ir.schema - if name in subset - ) - child_count = partition_info[child].count - shuffled = partition_info[child].partitioned_on == distinct_keys - # Check for ordering requirements (shuffle is not stable) - require_tree_reduction = ir.stable or ir.keep in ( - plc.stream_compaction.DuplicateKeepOption.KEEP_FIRST, - plc.stream_compaction.DuplicateKeepOption.KEEP_LAST, - ) - - output_count = 1 - n_ary = 32 # Arbitrary default (for now) + n_ary = 32 if ir.zlice is not None and ir.zlice[1] is not None: - # Head/tail slice operation has been pushed into Distinct - # (caller ensures only simple slices reach here) n_ary = max(1_000_000 // ir.zlice[1], 2) - elif unique_fraction is not None: - # Use unique_fraction to determine partitioning - n_ary = min(max(int(1.0 / unique_fraction), 2), child_count) - output_count = max(int(unique_fraction * child_count), 1) - - if output_count > 1 and require_tree_reduction: - # Need to reduce down to a single partition even - # if the unique_fraction is large. - output_count = 1 - _fallback_inform( - "Unsupported unique options for multiple partitions.", - config_options, - ) # Partition-wise unique count = child_count new_node: IR = ir.reconstruct([child]) partition_info[new_node] = PartitionInfo(count=count) - if shuffled or output_count == 1: - # Tree reduction - while count > output_count: - new_node = Repartition(new_node.schema, new_node) - count = max(math.ceil(count / n_ary), output_count) - partition_info[new_node] = PartitionInfo(count=count) - new_node = ir.reconstruct([new_node]) - partition_info[new_node] = PartitionInfo(count=count) - else: - # Shuffle - new_node = Shuffle( - new_node.schema, - distinct_keys, - config_options.executor.shuffle_method, - new_node, - ) - partition_info[new_node] = PartitionInfo(count=output_count) + # Tree reduction + while count > 1: + new_node = Repartition(new_node.schema, new_node) + count = max(math.ceil(count / n_ary), 1) + partition_info[new_node] = PartitionInfo(count=count) new_node = ir.reconstruct([new_node]) - partition_info[new_node] = PartitionInfo( - count=output_count, - partitioned_on=distinct_keys, - ) + partition_info[new_node] = PartitionInfo(count=count) return new_node, partition_info @@ -172,7 +120,6 @@ def _( child = Shuffle( child.schema, distinct_keys, - config_options.executor.shuffle_method, child, ) partition_info[child] = PartitionInfo( @@ -202,19 +149,9 @@ def _( ) return dynamic_node, partition_info - # Non-dynamic planning: use unique_fraction heuristics - unique_fraction_dict = _get_unique_fractions( - tuple(subset), - config_options.executor.unique_fraction, - ) - unique_fraction = ( - max(unique_fraction_dict.values()) if unique_fraction_dict else None - ) - return lower_distinct( ir, child, partition_info, config_options, - unique_fraction=unique_fraction, ) diff --git a/python/cudf_polars/cudf_polars/experimental/explain.py b/python/cudf_polars/cudf_polars/experimental/explain.py index 82f023b229c..d50d9fae0ae 100644 --- a/python/cudf_polars/cudf_polars/experimental/explain.py +++ b/python/cudf_polars/cudf_polars/experimental/explain.py @@ -288,10 +288,7 @@ def _(ir: GroupBy) -> dict[str, Serializable]: @_serialize_properties.register def _(ir: Shuffle) -> dict[str, Serializable]: - return { - "keys": [ne.name for ne in ir.keys], - "shuffle_method": ir.shuffle_method.value, - } + return {"keys": [ne.name for ne in ir.keys]} @_serialize_properties.register diff --git a/python/cudf_polars/cudf_polars/experimental/expressions.py b/python/cudf_polars/cudf_polars/experimental/expressions.py index d2a0070d009..d6df4cae8f9 100644 --- a/python/cudf_polars/cudf_polars/experimental/expressions.py +++ b/python/cudf_polars/cudf_polars/experimental/expressions.py @@ -41,22 +41,18 @@ from cudf_polars.containers import DataType from cudf_polars.dsl.expressions.aggregation import Agg -from cudf_polars.dsl.expressions.base import Col, ExecutionContext, Expr, NamedExpr +from cudf_polars.dsl.expressions.base import Col, ExecutionContext, NamedExpr from cudf_polars.dsl.expressions.binaryop import BinOp from cudf_polars.dsl.expressions.literal import Literal from cudf_polars.dsl.expressions.ternary import Ternary from cudf_polars.dsl.expressions.unary import Cast, Len, UnaryFunction -from cudf_polars.dsl.ir import IR, Distinct, Empty, HConcat, Select +from cudf_polars.dsl.ir import Distinct, Empty, HConcat, Select from cudf_polars.dsl.traversal import ( CachingVisitor, ) from cudf_polars.experimental.base import PartitionInfo from cudf_polars.experimental.repartition import Repartition -from cudf_polars.experimental.utils import ( - _dynamic_planning_on, - _get_unique_fractions, - _leaf_column_names, -) +from cudf_polars.experimental.utils import _dynamic_planning_on if TYPE_CHECKING: from collections.abc import Generator, MutableMapping, Sequence @@ -197,15 +193,6 @@ def _decompose_unique( ) (column,) = columns - unique_fraction_dict = _get_unique_fractions( - _leaf_column_names(child), - config_options.executor.unique_fraction, - ) - - unique_fraction = ( - max(unique_fraction_dict.values()) if unique_fraction_dict else None - ) - input_ir, partition_info = lower_distinct( Distinct( {column.name: column.dtype}, @@ -218,7 +205,6 @@ def _decompose_unique( input_ir, partition_info, config_options, - unique_fraction=unique_fraction, ) return column, input_ir, partition_info @@ -344,7 +330,6 @@ def _decompose_agg_node( input_ir = Shuffle( input_ir.schema, shuffle_on, - config_options.executor.shuffle_method, input_ir, ) partition_info[input_ir] = PartitionInfo( diff --git a/python/cudf_polars/cudf_polars/experimental/groupby.py b/python/cudf_polars/cudf_polars/experimental/groupby.py index 898dfdbf03f..6a17b56bfc5 100644 --- a/python/cudf_polars/cudf_polars/experimental/groupby.py +++ b/python/cudf_polars/cudf_polars/experimental/groupby.py @@ -36,7 +36,6 @@ from cudf_polars.experimental.shuffle import Shuffle from cudf_polars.experimental.utils import ( _dynamic_planning_on, - _get_unique_fractions, _lower_ir_fallback, ) @@ -390,7 +389,6 @@ def _( # Check if we are dealing with any high-cardinality columns post_aggregation_count = 1 # Default tree reduction - groupby_key_columns = [ne.name for ne in ir.keys] shuffled = partition_info[child].partitioned_on == ir.keys child_count = partition_info[child].count @@ -421,7 +419,6 @@ def _( child = Shuffle( child.schema, ir.keys, - config_options.executor.shuffle_method, child, ) partition_info[child] = PartitionInfo( @@ -441,14 +438,6 @@ def _( ) return dynamic_node, partition_info - if unique_fraction_dict := _get_unique_fractions( - groupby_key_columns, - config_options.executor.unique_fraction, - ): - # Use unique_fraction to determine output partitioning - unique_fraction = max(unique_fraction_dict.values()) - post_aggregation_count = max(int(unique_fraction * child_count), 1) - # Partition-wise groupby operation pwise_schema = {k.name: k.value.dtype for k in ir.keys} | { k.name: k.value.dtype for k in piecewise_exprs @@ -465,46 +454,28 @@ def _( partition_info[gb_pwise] = PartitionInfo(count=child_count) grouped_keys = tuple(NamedExpr(k.name, Col(k.value.dtype, k.name)) for k in ir.keys) - # Reduction - gb_inter: GroupBy | Repartition | Shuffle + # N-ary tree reduction + gb_inter: GroupBy | Repartition reduction_schema = {k.name: k.value.dtype for k in grouped_keys} | { k.name: k.value.dtype for k in reduction_exprs } - if not shuffled and post_aggregation_count > 1: - # Shuffle reduction - if ir.maintain_order: # pragma: no cover - return _lower_ir_fallback( - ir, - rec, - msg="maintain_order not supported for multiple output partitions.", + n_ary = 32 + count = child_count + gb_inter = gb_pwise + while count > post_aggregation_count: + gb_inter = Repartition(gb_inter.schema, gb_inter) + count = max(math.ceil(count / n_ary), post_aggregation_count) + partition_info[gb_inter] = PartitionInfo(count=count) + if count > post_aggregation_count: + gb_inter = GroupBy( + reduction_schema, + grouped_keys, + reduction_exprs, + ir.maintain_order, + None, + gb_inter, ) - - gb_inter = Shuffle( - gb_pwise.schema, - grouped_keys, - config_options.executor.shuffle_method, - gb_pwise, - ) - partition_info[gb_inter] = PartitionInfo(count=post_aggregation_count) - else: - # N-ary tree reduction - n_ary = config_options.executor.groupby_n_ary - count = child_count - gb_inter = gb_pwise - while count > post_aggregation_count: - gb_inter = Repartition(gb_inter.schema, gb_inter) - count = max(math.ceil(count / n_ary), post_aggregation_count) partition_info[gb_inter] = PartitionInfo(count=count) - if count > post_aggregation_count: - gb_inter = GroupBy( - reduction_schema, - grouped_keys, - reduction_exprs, - ir.maintain_order, - None, - gb_inter, - ) - partition_info[gb_inter] = PartitionInfo(count=count) # Final aggregation gb_reduce = GroupBy( diff --git a/python/cudf_polars/cudf_polars/experimental/io.py b/python/cudf_polars/cudf_polars/experimental/io.py index f45baa054dd..2cea0274ee6 100644 --- a/python/cudf_polars/cudf_polars/experimental/io.py +++ b/python/cudf_polars/cudf_polars/experimental/io.py @@ -4,13 +4,11 @@ from __future__ import annotations -import dataclasses import functools import itertools import math import statistics from collections import defaultdict -from functools import partial from pathlib import Path from typing import TYPE_CHECKING, Any, Literal, overload @@ -24,16 +22,14 @@ Empty, Scan, Sink, - Union, ) from cudf_polars.experimental.base import ( IOPartitionFlavor, IOPartitionPlan, PartitionInfo, SerializedDataSourceInfo, - get_key_name, ) -from cudf_polars.experimental.dispatch import generate_ir_tasks, lower_ir_node +from cudf_polars.experimental.dispatch import lower_ir_node from cudf_polars.utils.config import Cluster from cudf_polars.utils.cuda_stream import get_cuda_stream from cudf_polars.utils.versions import POLARS_VERSION_LT_137 @@ -62,36 +58,9 @@ def _( ir: DataFrameScan, rec: LowerIRTransformer ) -> tuple[IR, MutableMapping[IR, PartitionInfo]]: - config_options = rec.state["config_options"] + from cudf_polars.experimental.rapidsmpf.io import lower_dataframescan_rapidsmpf - # RapidsMPF runtime: Use rapidsmpf-specific lowering - if ( - config_options.executor.runtime == "rapidsmpf" - ): # pragma: no cover; Requires rapidsmpf runtime - from cudf_polars.experimental.rapidsmpf.io import lower_dataframescan_rapidsmpf - - return lower_dataframescan_rapidsmpf(ir, rec) - - rows_per_partition = config_options.executor.max_rows_per_partition - nrows = max(ir.df.shape()[0], 1) - count = math.ceil(nrows / rows_per_partition) - - if count > 1: - length = math.ceil(nrows / count) - slices = [ - DataFrameScan( - ir.schema, - ir.df.slice(offset, length), - ir.projection, - ) - for offset in range(0, nrows, length) - ] - new_node = Union(ir.schema, None, *slices) - return new_node, {slice: PartitionInfo(count=1) for slice in slices} | { - new_node: PartitionInfo(count=count) - } - - return ir, {ir: PartitionInfo(count=1)} + return lower_dataframescan_rapidsmpf(ir, rec) def scan_partition_plan( @@ -285,84 +254,9 @@ def _( def _( ir: Scan, rec: LowerIRTransformer ) -> tuple[IR, MutableMapping[IR, PartitionInfo]]: - partition_info: MutableMapping[IR, PartitionInfo] - config_options = rec.state["config_options"] - - # RapidsMPF runtime: Use rapidsmpf-specific lowering - if ( - config_options.executor.name == "streaming" - and config_options.executor.runtime == "rapidsmpf" - ): # pragma: no cover; Requires rapidsmpf runtime - from cudf_polars.experimental.rapidsmpf.io import lower_scan_rapidsmpf + from cudf_polars.experimental.rapidsmpf.io import lower_scan_rapidsmpf - return lower_scan_rapidsmpf(ir, rec) - - if ( - ir.typ in ("csv", "parquet", "ndjson") - and ir.n_rows == -1 - and ir.skip_rows == 0 - and ir.row_index is None - ): - plan = scan_partition_plan(ir, rec.state["stats"], config_options) - paths = list(ir.paths) - if plan.flavor == IOPartitionFlavor.SPLIT_FILES: - # Disable chunked reader when splitting files - parquet_options = dataclasses.replace( - config_options.parquet_options, - chunked=False, - ) - - slices: list[SplitScan] = [] - for path in paths: - base_scan = Scan( - ir.schema, - ir.typ, - ir.reader_options, - ir.cloud_options, - [path], - ir.with_columns, - ir.skip_rows, - ir.n_rows, - ir.row_index, - ir.include_file_paths, - ir.predicate, - parquet_options, - ) - slices.extend( - SplitScan( - ir.schema, base_scan, sindex, plan.factor, parquet_options - ) - for sindex in range(plan.factor) - ) - new_node = Union(ir.schema, None, *slices) - partition_info = {slice: PartitionInfo(count=1) for slice in slices} | { - new_node: PartitionInfo(count=len(slices)) - } - else: - groups: list[Scan] = [ - Scan( - ir.schema, - ir.typ, - ir.reader_options, - ir.cloud_options, - paths[i : i + plan.factor], - ir.with_columns, - ir.skip_rows, - ir.n_rows, - ir.row_index, - ir.include_file_paths, - ir.predicate, - config_options.parquet_options, - ) - for i in range(0, len(paths), plan.factor) - ] - new_node = Union(ir.schema, None, *groups) - partition_info = {group: PartitionInfo(count=1) for group in groups} | { - new_node: PartitionInfo(count=len(groups)) - } - return new_node, partition_info - - return ir, {ir: PartitionInfo(count=1)} # pragma: no cover + return lower_scan_rapidsmpf(ir, rec) class StreamingSink(IR): @@ -441,22 +335,6 @@ def _prepare_sink_directory(path: str) -> None: Path(path).mkdir(parents=True, exist_ok=True) -def _sink_to_directory( - schema: Schema, - kind: str, - path: str, - parquet_options: ParquetOptions, - options: dict[str, Any], - df: DataFrame, - ready: None, - context: IRExecutionContext, -) -> DataFrame: - """Sink a partition to a new file.""" - return Sink.do_evaluate( - schema, kind, path, parquet_options, options, df, context=context - ) - - def _sink_to_parquet_file( path: str, options: dict[str, Any], @@ -545,106 +423,6 @@ def _sink_to_file( return True -def _finalize_file_sink( - kind: str, - writer_state: Any, - df: DataFrame, -) -> DataFrame: - """Finalize the file sink by closing the writer.""" - if kind == "Parquet" and writer_state is not None: - writer_state.close([]) - return df.slice((0, 0)) - - -def _file_sink_graph( - ir: StreamingSink, - partition_info: MutableMapping[IR, PartitionInfo], - context: IRExecutionContext, -) -> MutableMapping[Any, Any]: - """Sink to a single file.""" - name = get_key_name(ir) - count = partition_info[ir].count - child_name = get_key_name(ir.children[0]) - sink = ir.sink - if count == 1: - return { - (name, 0): ( - partial(sink.do_evaluate, context=context), - *sink._non_child_args, - (child_name, 0), - ) - } - - sink_name = get_key_name(sink) - graph: MutableMapping[Any, Any] = { - (sink_name, i): ( - _sink_to_file, - sink.kind, - sink.path, - sink.options, - None if i == 0 else (sink_name, i - 1), # Writer state - (child_name, i), - ) - for i in range(count) - } - - # Finalize task closes the writer after all chunks are written - graph[(sink_name, "finalize")] = ( - _finalize_file_sink, - sink.kind, - (sink_name, count - 1), # Writer state from last task - (child_name, count - 1), # Last source df for creating empty result - ) - - # Make sure final tasks point to finalize task - graph.update({(name, i): (sink_name, "finalize") for i in range(count)}) - return graph - - -def _directory_sink_graph( - ir: StreamingSink, - partition_info: MutableMapping[IR, PartitionInfo], - context: IRExecutionContext, -) -> MutableMapping[Any, Any]: - """Sink to a directory of files.""" - name = get_key_name(ir) - count = partition_info[ir].count - child_name = get_key_name(ir.children[0]) - sink = ir.sink - - setup_name = f"setup-{name}" - suffix = sink.kind.lower() - width = math.ceil(math.log10(count)) - graph: MutableMapping[Any, Any] = { - (name, i): ( - _sink_to_directory, - sink.schema, - sink.kind, - f"{sink.path}/part.{str(i).zfill(width)}.{suffix}", - sink.parquet_options, - sink.options, - (child_name, i), - setup_name, - context, - ) - for i in range(count) - } - graph[setup_name] = (_prepare_sink_directory, sink.path) - return graph - - -@generate_ir_tasks.register(StreamingSink) -def _( - ir: StreamingSink, - partition_info: MutableMapping[IR, PartitionInfo], - context: IRExecutionContext, -) -> MutableMapping[Any, Any]: - if ir.sink_to_directory: - return _directory_sink_graph(ir, partition_info, context=context) - else: - return _file_sink_graph(ir, partition_info, context=context) - - class ParquetMetadata: """ Parquet metadata container. diff --git a/python/cudf_polars/cudf_polars/experimental/join.py b/python/cudf_polars/cudf_polars/experimental/join.py index 47d0ad90d8e..cd5c514b45a 100644 --- a/python/cudf_polars/cudf_polars/experimental/join.py +++ b/python/cudf_polars/cudf_polars/experimental/join.py @@ -5,16 +5,15 @@ from __future__ import annotations import operator -from functools import partial, reduce -from typing import TYPE_CHECKING, Any +from functools import reduce +from typing import TYPE_CHECKING from cudf_polars.dsl.ir import ConditionalJoin, Join, Slice -from cudf_polars.experimental.base import PartitionInfo, get_key_name -from cudf_polars.experimental.dispatch import generate_ir_tasks, lower_ir_node +from cudf_polars.experimental.base import PartitionInfo +from cudf_polars.experimental.dispatch import lower_ir_node from cudf_polars.experimental.repartition import Repartition -from cudf_polars.experimental.shuffle import Shuffle, _hash_partition_dataframe +from cudf_polars.experimental.shuffle import Shuffle from cudf_polars.experimental.utils import ( - _concat, _dynamic_planning_on, _fallback_inform, _lower_ir_fallback, @@ -24,16 +23,14 @@ from collections.abc import MutableMapping from cudf_polars.dsl.expr import NamedExpr - from cudf_polars.dsl.ir import IR, IRExecutionContext + from cudf_polars.dsl.ir import IR from cudf_polars.experimental.parallel import LowerIRTransformer - from cudf_polars.utils.config import ShuffleMethod def _maybe_shuffle_frame( frame: IR, on: tuple[NamedExpr, ...], partition_info: MutableMapping[IR, PartitionInfo], - shuffle_method: ShuffleMethod, output_count: int, ) -> IR: # Shuffle `frame` if it isn't already shuffled. @@ -48,7 +45,6 @@ def _maybe_shuffle_frame( frame = Shuffle( frame.schema, on, - shuffle_method, frame, ) partition_info[frame] = PartitionInfo( @@ -64,21 +60,18 @@ def _make_hash_join( partition_info: MutableMapping[IR, PartitionInfo], left: IR, right: IR, - shuffle_method: ShuffleMethod, ) -> tuple[IR, MutableMapping[IR, PartitionInfo]]: # Shuffle left and right dataframes (if necessary) left = _maybe_shuffle_frame( left, ir.left_on, partition_info, - shuffle_method, output_count, ) right = _maybe_shuffle_frame( right, ir.right_on, partition_info, - shuffle_method, output_count, ) # Always reconstruct in case children contain Cache nodes @@ -146,45 +139,7 @@ def _make_bcast_join( partition_info: MutableMapping[IR, PartitionInfo], left: IR, right: IR, - shuffle_method: ShuffleMethod, - *, - streaming_runtime: str, ) -> tuple[IR, MutableMapping[IR, PartitionInfo]]: - if ir.options[0] != "Inner": - left_count = partition_info[left].count - right_count = partition_info[right].count - - # Shuffle the smaller table (if necessary) - Notes: - # - We need to shuffle the smaller table if - # (1) we are not doing an "inner" join, - # and (2) the small table contains multiple - # partitions. - # - We cannot simply join a large-table partition - # to each small-table partition, and then - # concatenate the partial-join results, because - # a non-"inner" join does NOT commute with - # concatenation. - # - In some cases, we can perform the partial joins - # sequentially. However, we are starting with a - # catch-all algorithm that works for all cases. - if streaming_runtime == "tasks": - if left_count >= right_count: - right = _maybe_shuffle_frame( - right, - ir.right_on, - partition_info, - shuffle_method, - right_count, - ) - else: - left = _maybe_shuffle_frame( - left, - ir.left_on, - partition_info, - shuffle_method, - left_count, - ) - new_node = ir.reconstruct([left, right]) partition_info[new_node] = PartitionInfo(count=output_count) return new_node, partition_info @@ -301,8 +256,6 @@ def _( partition_info, left, right, - config_options.executor.shuffle_method, - streaming_runtime=config_options.executor.runtime, ) else: # Create a hash join @@ -312,109 +265,4 @@ def _( partition_info, left, right, - config_options.executor.shuffle_method, ) - - -@generate_ir_tasks.register(Join) -def _( - ir: Join, - partition_info: MutableMapping[IR, PartitionInfo], - context: IRExecutionContext, -) -> MutableMapping[Any, Any]: - left, right = ir.children - output_count = partition_info[ir].count - - left_partitioned = ( - partition_info[left].partitioned_on == ir.left_on - and partition_info[left].count == output_count - ) - right_partitioned = ( - partition_info[right].partitioned_on == ir.right_on - and partition_info[right].count == output_count - ) - - if output_count == 1 or (left_partitioned and right_partitioned): - # Partition-wise join - left_name = get_key_name(left) - right_name = get_key_name(right) - return { - key: ( - partial(ir.do_evaluate, context=context), - *ir._non_child_args, - (left_name, i), - (right_name, i), - ) - for i, key in enumerate(partition_info[ir].keys(ir)) - } - else: - # Broadcast join - left_parts = partition_info[left] - right_parts = partition_info[right] - if left_parts.count >= right_parts.count: - small_side = "Right" - small_name = get_key_name(right) - small_size = partition_info[right].count - large_name = get_key_name(left) - large_on = ir.left_on - else: - small_side = "Left" - small_name = get_key_name(left) - small_size = partition_info[left].count - large_name = get_key_name(right) - large_on = ir.right_on - - graph: MutableMapping[Any, Any] = {} - - out_name = get_key_name(ir) - out_size = partition_info[ir].count - split_name = f"split-{out_name}" - getit_name = f"getit-{out_name}" - inter_name = f"inter-{out_name}" - - # Split each large partition if we have - # multiple small partitions (unless this - # is an inner join) - split_large = ir.options[0] != "Inner" and small_size > 1 - - for part_out in range(out_size): - if split_large: - graph[(split_name, part_out)] = ( - _hash_partition_dataframe, - (large_name, part_out), - part_out, - small_size, - None, - large_on, - ) - - _concat_list = [] - for j in range(small_size): - left_key: tuple[str, int] | tuple[str, int, int] - if split_large: - left_key = (getit_name, part_out, j) - graph[left_key] = (operator.getitem, (split_name, part_out), j) - else: - left_key = (large_name, part_out) - join_children = [left_key, (small_name, j)] - if small_side == "Left": - join_children.reverse() - - inter_key = (inter_name, part_out, j) - graph[(inter_name, part_out, j)] = ( - partial(ir.do_evaluate, context=context), - ir.left_on, - ir.right_on, - ir.options, - *join_children, - ) - _concat_list.append(inter_key) - if len(_concat_list) == 1: - graph[(out_name, part_out)] = graph.pop(_concat_list[0]) - else: - graph[(out_name, part_out)] = ( - partial(_concat, context=context), - *_concat_list, - ) - - return graph diff --git a/python/cudf_polars/cudf_polars/experimental/parallel.py b/python/cudf_polars/cudf_polars/experimental/parallel.py index f77e923bce0..ab5d3b5bd90 100644 --- a/python/cudf_polars/cudf_polars/experimental/parallel.py +++ b/python/cudf_polars/cudf_polars/experimental/parallel.py @@ -4,10 +4,9 @@ from __future__ import annotations -import itertools import operator from functools import partial, reduce -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING import polars as pl @@ -26,7 +25,6 @@ Filter, HConcat, HStack, - IRExecutionContext, MapFunction, Projection, Select, @@ -35,16 +33,11 @@ ) from cudf_polars.dsl.traversal import CachingVisitor, traversal from cudf_polars.dsl.utils.naming import unique_names -from cudf_polars.experimental.base import PartitionInfo, get_key_name -from cudf_polars.experimental.dispatch import ( - generate_ir_tasks, - lower_ir_node, -) +from cudf_polars.experimental.base import PartitionInfo +from cudf_polars.experimental.dispatch import lower_ir_node from cudf_polars.experimental.io import _clear_source_info_cache from cudf_polars.experimental.repartition import Repartition -from cudf_polars.experimental.statistics import collect_statistics from cudf_polars.experimental.utils import ( - _concat, _contains_over, _dynamic_planning_on, _lower_ir_fallback, @@ -52,7 +45,6 @@ if TYPE_CHECKING: from collections.abc import MutableMapping - from typing import Any from cudf_polars.experimental.base import StatsCollector from cudf_polars.experimental.dispatch import LowerIRTransformer, State @@ -109,63 +101,6 @@ def lower_ir_graph( return mapper(ir) -def task_graph( - ir: IR, - partition_info: MutableMapping[IR, PartitionInfo], -) -> tuple[MutableMapping[Any, Any], str | tuple[str, int]]: - """ - Construct a task graph for evaluation of an IR graph. - - Parameters - ---------- - ir - Root of the graph to rewrite. - partition_info - A mapping from all unique IR nodes to the - associated partitioning information. - - Returns - ------- - graph - A task graph for the entire IR graph with root `ir`, - in dict-of-tuples form consumed by - :func:`~cudf_polars.experimental.scheduler.synchronous_scheduler`. - - Notes - ----- - This function traverses the unique nodes of the - graph with root `ir`, and extracts the tasks for - each node with :func:`generate_ir_tasks`. - - See Also - -------- - generate_ir_tasks - """ - context = IRExecutionContext() - graph = reduce( - operator.or_, - ( - generate_ir_tasks(node, partition_info, context=context) - for node in traversal([ir]) - ), - ) - - key_name = get_key_name(ir) - partition_count = partition_info[ir].count - - key: str | tuple[str, int] - if partition_count > 1: - graph[key_name] = ( - partial(_concat, context=context), - *partition_info[ir].keys(ir), - ) - key = key_name - else: - key = (key_name, 0) - - return graph, key - - def evaluate_rapidsmpf( ir: IR, config_options: ConfigOptions[StreamingExecutor], @@ -211,44 +146,7 @@ def evaluate_streaming( # Clear source info cache in case data was overwritten _clear_source_info_cache() - if ( - config_options.executor.runtime == "rapidsmpf" - ): # pragma: no cover; rapidsmpf runtime not tested in CI yet - # Using the RapidsMPF streaming runtime. - return evaluate_rapidsmpf(ir, config_options) - else: - # Using the default task engine. - from cudf_polars.experimental.scheduler import synchronous_scheduler - - stats = collect_statistics(ir, config_options) - ir, partition_info = lower_ir_graph(ir, config_options, stats) - - graph, key = task_graph(ir, partition_info) - - return synchronous_scheduler(graph, key).to_polars() - - -@generate_ir_tasks.register(IR) -def _( - ir: IR, - partition_info: MutableMapping[IR, PartitionInfo], - context: IRExecutionContext, -) -> MutableMapping[Any, Any]: - # Generate pointwise (embarrassingly-parallel) tasks by default - child_names = [get_key_name(c) for c in ir.children] - bcast_child = [partition_info[c].count == 1 for c in ir.children] - - return { - key: ( - partial(ir.do_evaluate, context=context), - *ir._non_child_args, - *[ - (child_name, 0 if bcast_child[j] else i) - for j, child_name in enumerate(child_names) - ], - ) - for i, key in enumerate(partition_info[ir].keys(ir)) - } + return evaluate_rapidsmpf(ir, config_options) @lower_ir_node.register(Union) @@ -278,21 +176,6 @@ def _( return new_node, partition_info -@generate_ir_tasks.register(Union) -def _( - ir: Union, - partition_info: MutableMapping[IR, PartitionInfo], - context: IRExecutionContext, -) -> MutableMapping[Any, Any]: - key_name = get_key_name(ir) - partition = itertools.count() - return { - (key_name, next(partition)): child_key - for child in ir.children - for child_key in partition_info[child].keys(child) - } - - @lower_ir_node.register(MapFunction) def _( ir: MapFunction, rec: LowerIRTransformer diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/core.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/core.py index 478c0a33beb..97168f0b02d 100644 --- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/core.py +++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/core.py @@ -99,8 +99,6 @@ def evaluate_logical_plan( ------- The output DataFrame and metadata collector. """ - assert config_options.executor.runtime == "rapidsmpf", "Runtime must be rapidsmpf" - query_id = uuid.uuid4() with cudf_polars.dsl.tracing.bound_contextvars( @@ -202,8 +200,6 @@ def evaluate_pipeline( ------- The output DataFrame and metadata collector. """ - assert config_options.executor.runtime == "rapidsmpf", "Runtime must be rapidsmpf" - _original_mr: Any = None use_stream_pool = False if rmpf_context is not None: diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/core.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/core.py index 7bc8dabddec..26ad95198f6 100644 --- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/core.py +++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/core.py @@ -436,7 +436,7 @@ def execute_ir_on_rank( _RESERVED_EXECUTOR_KEYS: frozenset[str] = frozenset( - {"runtime", "cluster", "spmd_context", "ray_context", "dask_context"} + {"cluster", "spmd_context", "ray_context", "dask_context"} ) _RESERVED_ENGINE_KEYS: frozenset[str] = frozenset({"memory_resource", "executor"}) diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/dask.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/dask.py index 49810e998fd..b4300346132 100644 --- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/dask.py +++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/dask.py @@ -688,7 +688,6 @@ def __init__( nranks=nranks, executor_options={ **executor_options, - "runtime": "rapidsmpf", "cluster": "dask", "dask_context": dask_ctx, }, @@ -736,7 +735,6 @@ def _reset( nranks=self._nranks, executor_options={ **executor_options, - "runtime": "rapidsmpf", "cluster": "dask", "dask_context": ctx, }, diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/options.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/options.py index d8464aa7426..c7650bff513 100644 --- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/options.py +++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/options.py @@ -234,11 +234,6 @@ class StreamingOptions: Env: ``CUDF_POLARS__EXECUTOR__DYNAMIC_PLANNING``. Default: enabled. Category: executor. - unique_fraction - Per-column uniqueness estimate (0-1). Defaults to ``1.0``. - Env: ``CUDF_POLARS__EXECUTOR__UNIQUE_FRACTION``. - Default: ``{}``. - Category: executor. sink_to_directory Whether multi-partition sink operations should write to a directory rather than a single file. The ``spmd``/``ray``/``dask`` engines @@ -332,9 +327,6 @@ class StreamingOptions: dynamic_planning: dict[str, Any] | DynamicPlanningOptions | None | Unspecified = ( _opt("executor") ) - unique_fraction: dict[str, float] | Unspecified = _opt( - "executor", "CUDF_POLARS__EXECUTOR__UNIQUE_FRACTION", json.loads - ) sink_to_directory: bool | Unspecified = _opt( "executor", "CUDF_POLARS__EXECUTOR__SINK_TO_DIRECTORY", parse_boolean ) @@ -515,7 +507,6 @@ def _get(attr: str) -> Any: broadcast_join_limit=_get("broadcast_join_limit"), target_partition_size=target_partition_size, dynamic_planning=dynamic_planning, - unique_fraction=_get("unique_fraction"), raise_on_fail=_get("raise_on_fail"), parquet_options=_get("parquet_options"), memory_resource_config=_get("memory_resource_config"), @@ -711,15 +702,6 @@ def _add_cli_args(parser: argparse.ArgumentParser) -> None: Enable dynamic planning. Use --no-dynamic-planning to disable. Env: CUDF_POLARS__EXECUTOR__DYNAMIC_PLANNING. Built-in default: enabled."""), ) - g.add_argument( - "--unique-fraction", - dest="unique_fraction", - default=None, - type=json.loads, - help=textwrap.dedent("""\ - Per-column uniqueness estimate as a JSON object (e.g. '{"col": 0.5}'). - Env: CUDF_POLARS__EXECUTOR__UNIQUE_FRACTION. Built-in default: {}."""), - ) g.add_argument( "--stream-policy", dest="stream_policy", diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/ray.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/ray.py index 1ba92de3e49..efbb1db9ad4 100644 --- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/ray.py +++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/ray.py @@ -90,13 +90,9 @@ def evaluate_pipeline_ray_mode( Raises ------ - RuntimeError - If the configured executor runtime is not ``"rapidsmpf"``. RuntimeError If ``config_options.executor.ray_context`` is not set. """ - if config_options.executor.runtime != "rapidsmpf": - raise RuntimeError("Runtime must be rapidsmpf") if config_options.executor.ray_context is None: raise RuntimeError("ray_context must be set when cluster='ray'") rank_actors = config_options.executor.ray_context.rank_actors @@ -586,7 +582,6 @@ def __init__( nranks=nranks, executor_options={ **executor_options, - "runtime": "rapidsmpf", "cluster": "ray", "ray_context": RayContext(rank_actors), }, @@ -641,7 +636,6 @@ def _reset( nranks=len(self._rank_actors), executor_options={ **executor_options, - "runtime": "rapidsmpf", "cluster": "ray", "ray_context": RayContext(self._rank_actors), }, diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/spmd.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/spmd.py index 65e3eb8b1e7..7e1bde808cd 100644 --- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/spmd.py +++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/frontend/spmd.py @@ -23,12 +23,11 @@ from rapidsmpf.statistics import Statistics from rapidsmpf.streaming.core.context import Context -import polars as pl - import pylibcudf as plc import rmm.mr from pylibcudf.contiguous_split import pack +from cudf_polars.containers import DataFrame, DataType from cudf_polars.experimental.rapidsmpf.collectives.common import reserve_op_id from cudf_polars.experimental.rapidsmpf.frontend.core import ( ClusterInfo, @@ -53,6 +52,8 @@ from rapidsmpf.config import Options from rapidsmpf.streaming.cudf.channel_metadata import ChannelMetadata + import polars as pl + from cudf_polars.dsl.ir import IR from cudf_polars.experimental.parallel import ConfigOptions from cudf_polars.experimental.rapidsmpf.frontend.core import T @@ -98,8 +99,6 @@ def evaluate_pipeline_spmd_mode( The concatenated output DataFrame and, if ``collect_metadata`` is True, the list of channel metadata objects; otherwise ``None``. """ - if config_options.executor.runtime != "rapidsmpf": - raise RuntimeError("Runtime must be rapidsmpf") if config_options.executor.spmd_context is None: raise RuntimeError("spmd_context must be set for SPMD mode") comm = config_options.executor.spmd_context.comm @@ -155,8 +154,9 @@ def allgather_polars_dataframe( ctx = engine.context stream = ctx.get_stream_from_pool() col_names = local_df.columns + dtypes = [DataType(dtype) for dtype in local_df.dtypes] - plc_table = plc.Table.from_arrow(local_df.to_arrow()) + plc_table = plc.Table.from_arrow(local_df, stream=stream) packed_data = PackedData.from_cudf_packed_columns( pack(plc_table, stream), @@ -176,9 +176,12 @@ def allgather_polars_dataframe( plc_result = unpack_and_concat(results, stream, ctx.br()) # pylibcudf Table -> pl.DataFrame (restore column names) - ret = pl.from_arrow(plc_result.to_arrow(col_names)) - assert isinstance(ret, pl.DataFrame) - return ret + return DataFrame.from_table( + plc_result, + col_names, + dtypes, + stream, + ).to_polars() class SPMDEngine(StreamingEngine): @@ -389,7 +392,6 @@ def __init__( nranks=comm.nranks, executor_options={ **executor_options, - "runtime": "rapidsmpf", "cluster": "spmd", "spmd_context": SPMDContext( comm=comm, context=ctx, py_executor=self._py_executor @@ -494,7 +496,6 @@ def _reset( nranks=self._comm.nranks, executor_options={ **executor_options, - "runtime": "rapidsmpf", "cluster": "spmd", "spmd_context": SPMDContext( comm=self._comm, diff --git a/python/cudf_polars/cudf_polars/experimental/repartition.py b/python/cudf_polars/cudf_polars/experimental/repartition.py index 92d89a5f44c..84c39d930ca 100644 --- a/python/cudf_polars/cudf_polars/experimental/repartition.py +++ b/python/cudf_polars/cudf_polars/experimental/repartition.py @@ -4,20 +4,11 @@ from __future__ import annotations -import itertools -from functools import partial -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING from cudf_polars.dsl.ir import IR -from cudf_polars.experimental.base import get_key_name -from cudf_polars.experimental.dispatch import generate_ir_tasks -from cudf_polars.experimental.utils import _concat if TYPE_CHECKING: - from collections.abc import MutableMapping - - from cudf_polars.dsl.ir import IRExecutionContext - from cudf_polars.experimental.parallel import PartitionInfo from cudf_polars.typing import Schema @@ -43,35 +34,3 @@ def __init__(self, schema: Schema, df: IR): self.schema = schema self._non_child_args = () self.children = (df,) - - -@generate_ir_tasks.register(Repartition) -def _( - ir: Repartition, - partition_info: MutableMapping[IR, PartitionInfo], - context: IRExecutionContext, -) -> MutableMapping[Any, Any]: - # Repartition an IR node. - # Only supports rapartitioning to fewer (for now). - - (child,) = ir.children - count_in = partition_info[child].count - count_out = partition_info[ir].count - - if count_out > count_in: # pragma: no cover - raise NotImplementedError( - f"Repartition {count_in} -> {count_out} not supported." - ) - - key_name = get_key_name(ir) - n, remainder = divmod(count_in, count_out) - # Spread remainder evenly over the partitions. - offsets = [0, *itertools.accumulate(n + (i < remainder) for i in range(count_out))] - child_keys = tuple(partition_info[child].keys(child)) - return { - (key_name, i): ( - partial(_concat, context=context), - *child_keys[offsets[i] : offsets[i + 1]], - ) - for i in range(count_out) - } diff --git a/python/cudf_polars/cudf_polars/experimental/scheduler.py b/python/cudf_polars/cudf_polars/experimental/scheduler.py deleted file mode 100644 index 97eae6ab378..00000000000 --- a/python/cudf_polars/cudf_polars/experimental/scheduler.py +++ /dev/null @@ -1,153 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. -# SPDX-License-Identifier: Apache-2.0 -"""Synchronous task scheduler.""" - -from __future__ import annotations - -from collections import Counter -from collections.abc import MutableMapping -from itertools import chain -from typing import TYPE_CHECKING, Any, TypeVar, Unpack - -if TYPE_CHECKING: - from collections.abc import Mapping - from typing import TypeAlias - - -Key: TypeAlias = str | tuple[str, Unpack[tuple[int, ...]]] -Graph: TypeAlias = MutableMapping[Key, Any] -T_ = TypeVar("T_") - - -# NOTE: This is a slimmed-down version of the single-threaded -# (synchronous) scheduler in `dask.core`. -# -# Key Differences: -# * We do not allow a task to contain a list of key names. -# Keys must be distinct elements of the task. -# * We do not support nested tasks. - - -def istask(x: Any) -> bool: - """Check if x is a callable task.""" - return isinstance(x, tuple) and bool(x) and callable(x[0]) - - -def is_hashable(x: Any) -> bool: - """Check if x is hashable.""" - try: - hash(x) - except BaseException: - return False - else: - return True - - -def _execute_task(arg: Any, cache: Mapping) -> Any: - """Execute a compute task.""" - if istask(arg): - return arg[0](*(_execute_task(a, cache) for a in arg[1:])) - elif is_hashable(arg): - return cache.get(arg, arg) - else: - return arg - - -def required_keys(key: Key, graph: Graph) -> list[Key]: - """ - Return the dependencies to extract a key from the graph. - - Parameters - ---------- - key - Root key we want to extract. - graph - The full task graph. - - Returns - ------- - List of other keys needed to extract ``key``. - """ - maybe_task = graph[key] - return [ - k - for k in ( - maybe_task[1:] - if istask(maybe_task) - else [maybe_task] # maybe_task might be a key - ) - if is_hashable(k) and k in graph - ] - - -def toposort(graph: Graph, dependencies: Mapping[Key, list[Key]]) -> list[Key]: - """Return a list of task keys sorted in topological order.""" - # Stack-based depth-first search traversal. This is based on Tarjan's - # algorithm for strongly-connected components - # (https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm) - ordered: list[Key] = [] - completed: set[Key] = set() - - for key in graph: - if key in completed: - continue - nodes = [key] - while nodes: - # Keep current node on the stack until all descendants are visited - current = nodes[-1] - if current in completed: # pragma: no cover - # Already fully traversed descendants of current - nodes.pop() - continue - - # Add direct descendants of current to nodes stack - next_nodes = set(dependencies[current]) - completed - if next_nodes: - nodes.extend(next_nodes) - else: - # Current has no more descendants to explore - ordered.append(current) - completed.add(current) - nodes.pop() - - return ordered - - -def synchronous_scheduler( - graph: Graph, - key: Key, - *, - cache: MutableMapping | None = None, -) -> Any: - """ - Execute the task graph for a given key. - - Parameters - ---------- - graph - The task graph to execute. - key - The final output key to extract from the graph. - cache - Intermediate-data cache. - - Returns - ------- - Executed task-graph result for ``key``. - """ - if key not in graph: # pragma: no cover - raise KeyError(f"{key} is not a key in the graph") - if cache is None: - cache = {} - - dependencies = {k: required_keys(k, graph) for k in graph} - refcount = Counter(chain.from_iterable(dependencies.values())) - - for k in toposort(graph, dependencies): - cache[k] = _execute_task(graph[k], cache) - for dep in dependencies[k]: - refcount[dep] -= 1 - if refcount[dep] == 0 and dep != key: - del cache[dep] - - return cache[key] diff --git a/python/cudf_polars/cudf_polars/experimental/shuffle.py b/python/cudf_polars/cudf_polars/experimental/shuffle.py index 8e24dd83fe6..9381126775f 100644 --- a/python/cudf_polars/cudf_polars/experimental/shuffle.py +++ b/python/cudf_polars/cudf_polars/experimental/shuffle.py @@ -4,112 +4,22 @@ from __future__ import annotations -import operator -from functools import partial -from typing import TYPE_CHECKING, Any, Concatenate, TypeVar, TypedDict +from typing import TYPE_CHECKING -import pylibcudf as plc -from rmm.pylibrmm.stream import DEFAULT_STREAM - -from cudf_polars.containers import DataFrame -from cudf_polars.dsl.expr import Col from cudf_polars.dsl.ir import IR -from cudf_polars.dsl.tracing import log_do_evaluate, nvtx_annotate_cudf_polars -from cudf_polars.experimental.base import get_key_name -from cudf_polars.experimental.dispatch import generate_ir_tasks, lower_ir_node -from cudf_polars.experimental.utils import _concat, _dynamic_planning_on -from cudf_polars.utils.cuda_stream import get_dask_cuda_stream +from cudf_polars.dsl.tracing import log_do_evaluate +from cudf_polars.experimental.dispatch import lower_ir_node +from cudf_polars.experimental.utils import _dynamic_planning_on if TYPE_CHECKING: - from collections.abc import Callable, MutableMapping, Sequence + from collections.abc import MutableMapping - from cudf_polars.containers import DataType + from cudf_polars.containers import DataFrame from cudf_polars.dsl.expr import NamedExpr from cudf_polars.dsl.ir import IRExecutionContext from cudf_polars.experimental.dispatch import LowerIRTransformer from cudf_polars.experimental.parallel import PartitionInfo from cudf_polars.typing import Schema - from cudf_polars.utils.config import ShuffleMethod - - -# Supported shuffle methods -_SHUFFLE_METHODS = ("rapidsmpf", "tasks") - - -class ShuffleOptions(TypedDict): - """RapidsMPF shuffling options.""" - - on: Sequence[str] - column_names: Sequence[str] - dtypes: Sequence[DataType] - - -# Experimental rapidsmpf shuffler integration -class RMPFIntegration: # pragma: no cover - """cuDF-Polars protocol for rapidsmpf shuffler.""" - - @staticmethod - @nvtx_annotate_cudf_polars(message="RMPFIntegration.insert_partition") - def insert_partition( - df: DataFrame, - partition_id: int, # Not currently used - partition_count: int, - shuffler: Any, - options: ShuffleOptions, - *other: Any, - ) -> None: - """Add cudf-polars DataFrame chunks to an RMP shuffler.""" - from rapidsmpf.integrations.cudf.partition import partition_and_pack - from rapidsmpf.integrations.single import get_worker_context - - context = get_worker_context() - - on = options["on"] - assert not other, f"Unexpected arguments: {other}" - columns_to_hash = tuple(df.column_names.index(val) for val in on) - packed_inputs = partition_and_pack( - df.table, - columns_to_hash=columns_to_hash, - num_partitions=partition_count, - br=context.br, - stream=DEFAULT_STREAM, - ) - - shuffler.insert_chunks(packed_inputs) - - @staticmethod - @nvtx_annotate_cudf_polars(message="RMPFIntegration.extract_partition") - def extract_partition( - partition_id: int, - shuffler: Any, - options: ShuffleOptions, - ) -> DataFrame: - """Extract a finished partition from the RMP shuffler.""" - from rapidsmpf.integrations.cudf.partition import ( - unpack_and_concat, - unspill_partitions, - ) - from rapidsmpf.integrations.single import get_worker_context - - context = get_worker_context() - - shuffler.wait() - column_names = options["column_names"] - dtypes = options["dtypes"] - return DataFrame.from_table( - unpack_and_concat( - unspill_partitions( - shuffler.extract(partition_id), - br=context.br, - allow_overbooking=True, - ), - br=context.br, - stream=DEFAULT_STREAM, - ), - column_names, - dtypes, - get_dask_cuda_stream(), - ) class Shuffle(IR): @@ -118,29 +28,27 @@ class Shuffle(IR): Notes ----- - Only hash-based partitioning is supported (for now). See - `ShuffleSorted` for sorting-based shuffling. + Only hash-based partitioning is supported (for now). """ - __slots__ = ("keys", "shuffle_method") - _non_child = ("schema", "keys", "shuffle_method") - _n_non_child_args = 3 + __slots__ = ("keys",) + _non_child = ( + "schema", + "keys", + ) + _n_non_child_args = 2 keys: tuple[NamedExpr, ...] """Keys to shuffle on.""" - shuffle_method: ShuffleMethod - """Shuffle method to use.""" def __init__( self, schema: Schema, keys: tuple[NamedExpr, ...], - shuffle_method: ShuffleMethod, df: IR, ): self.schema = schema self.keys = keys - self.shuffle_method = shuffle_method - self._non_child_args = (schema, keys, shuffle_method) + self._non_child_args = (schema, keys) self.children = (df,) # the type-ignore is for @@ -153,7 +61,6 @@ def do_evaluate( cls, schema: Schema, keys: tuple[NamedExpr, ...], - shuffle_method: ShuffleMethod, df: DataFrame, *, context: IRExecutionContext, @@ -163,120 +70,6 @@ def do_evaluate( return df -@nvtx_annotate_cudf_polars(message="Shuffle") -def _hash_partition_dataframe( - df: DataFrame, - partition_id: int, # Used only by sorted shuffling - partition_count: int, - options: MutableMapping[str, Any] | None, # No options required - on: tuple[NamedExpr, ...], -) -> dict[int, DataFrame]: - """ - Partition an input DataFrame for hash-based shuffling. - - Parameters - ---------- - df - DataFrame to partition. - partition_id - Partition index (unused for hash partitioning). - partition_count - Total number of output partitions. - options - Options (unused for hash partitioning). - on - Expressions used for the hash partitioning. - - Returns - ------- - A dictionary mapping between int partition indices and - DataFrame fragments. - """ - assert not options, f"Expected no options, got: {options}" - - if df.num_rows == 0: - # Fast path for empty DataFrame - return dict.fromkeys(range(partition_count), df) - - # Hash the specified keys to calculate the output - # partition for each row - partition_map = plc.binaryop.binary_operation( - plc.hashing.murmurhash3_x86_32( - DataFrame([expr.evaluate(df) for expr in on], stream=df.stream).table, - stream=df.stream, - ), - plc.Scalar.from_py( - partition_count, plc.DataType(plc.TypeId.UINT32), stream=df.stream - ), - plc.binaryop.BinaryOperator.PYMOD, - plc.types.DataType(plc.types.TypeId.UINT32), - stream=df.stream, - ) - - # Apply partitioning - t, offsets = plc.partitioning.partition( - df.table, - partition_map, - partition_count, - stream=df.stream, - ) - splits = offsets[1:-1] - - # Split and return the partitioned result - return { - i: DataFrame.from_table( - split, - df.column_names, - df.dtypes, - df.stream, - ) - for i, split in enumerate(plc.copying.split(t, splits, stream=df.stream)) - } - - -# When dropping Python 3.10, can use _simple_shuffle_graph[OPT_T](...) -OPT_T = TypeVar("OPT_T") - - -def _simple_shuffle_graph( - name_in: str, - name_out: str, - count_in: int, - count_out: int, - _partition_dataframe_func: Callable[ - Concatenate[DataFrame, int, int, OPT_T, ...], - MutableMapping[int, DataFrame], - ], - options: OPT_T, - *other: Any, - context: IRExecutionContext, -) -> MutableMapping[Any, Any]: - """Make a simple all-to-all shuffle graph.""" - split_name = f"split-{name_out}" - inter_name = f"inter-{name_out}" - - graph: MutableMapping[Any, Any] = {} - for part_out in range(count_out): - _concat_list = [] - for part_in in range(count_in): - graph[(split_name, part_in)] = ( - _partition_dataframe_func, - (name_in, part_in), - part_in, - count_out, - options, - *other, - ) - _concat_list.append((inter_name, part_out, part_in)) - graph[_concat_list[-1]] = ( - operator.getitem, - (split_name, part_in), - part_out, - ) - graph[(name_out, part_out)] = (partial(_concat, context=context), *_concat_list) - return graph - - @lower_ir_node.register(Shuffle) def _( ir: Shuffle, rec: LowerIRTransformer @@ -306,47 +99,3 @@ def _( partitioned_on=ir.keys, ) return new_node, pi - - -@generate_ir_tasks.register(Shuffle) -def _( - ir: Shuffle, - partition_info: MutableMapping[IR, PartitionInfo], - context: IRExecutionContext, -) -> MutableMapping[Any, Any]: - # Extract "shuffle_method" configuration - shuffle_method = ir.shuffle_method - - # Try using rapidsmpf shuffler if we have "simple" shuffle - # keys, and the "shuffle_method" config is set to "rapidsmpf-single". - _keys: list[Col] - if shuffle_method == "rapidsmpf-single" and len( - _keys := [ne.value for ne in ir.keys if isinstance(ne.value, Col)] - ) == len(ir.keys): # pragma: no cover - from rapidsmpf.integrations.single import rapidsmpf_shuffle_graph - - shuffle_on = [k.name for k in _keys] - - return rapidsmpf_shuffle_graph( - get_key_name(ir.children[0]), - get_key_name(ir), - partition_info[ir.children[0]].count, - partition_info[ir].count, - RMPFIntegration, - { - "on": shuffle_on, - "column_names": list(ir.schema.keys()), - "dtypes": list(ir.schema.values()), - }, - ) - - # Simple task-based fall-back - return partial(_simple_shuffle_graph, context=context)( - get_key_name(ir.children[0]), - get_key_name(ir), - partition_info[ir.children[0]].count, - partition_info[ir].count, - _hash_partition_dataframe, - None, - ir.keys, - ) diff --git a/python/cudf_polars/cudf_polars/experimental/sort.py b/python/cudf_polars/cudf_polars/experimental/sort.py index 6800fb4ab74..fa610324c2d 100644 --- a/python/cudf_polars/cudf_polars/experimental/sort.py +++ b/python/cudf_polars/cudf_polars/experimental/sort.py @@ -4,47 +4,30 @@ from __future__ import annotations -from functools import partial -from typing import TYPE_CHECKING, Any, TypedDict +from typing import TYPE_CHECKING import polars as pl import pylibcudf as plc -from rmm.pylibrmm.stream import DEFAULT_STREAM from cudf_polars.containers import Column, DataFrame, DataType from cudf_polars.dsl.expr import Col -from cudf_polars.dsl.ir import IR, Slice, Sort +from cudf_polars.dsl.ir import Slice, Sort from cudf_polars.dsl.traversal import traversal from cudf_polars.dsl.utils.naming import unique_names -from cudf_polars.experimental.base import PartitionInfo, get_key_name -from cudf_polars.experimental.dispatch import ( - generate_ir_tasks, - lower_ir_node, -) -from cudf_polars.experimental.repartition import Repartition -from cudf_polars.experimental.shuffle import _simple_shuffle_graph +from cudf_polars.experimental.dispatch import lower_ir_node from cudf_polars.experimental.utils import ( - _concat, - _fallback_inform, _lower_ir_fallback, ) -from cudf_polars.utils.config import ShuffleMethod -from cudf_polars.utils.cuda_stream import ( - get_dask_cuda_stream, - get_joined_cuda_stream, - join_cuda_streams, -) if TYPE_CHECKING: from collections.abc import MutableMapping, Sequence from rmm.pylibrmm.stream import Stream - from cudf_polars.dsl.expr import NamedExpr - from cudf_polars.dsl.ir import IRExecutionContext + from cudf_polars.dsl.ir import IR + from cudf_polars.experimental.base import PartitionInfo from cudf_polars.experimental.dispatch import LowerIRTransformer - from cudf_polars.typing import Schema def find_sort_splits( @@ -251,248 +234,6 @@ def _get_final_sort_boundaries( ) -def _sort_boundaries_graph( - name_in: str, - by: Sequence[str], - column_order: Sequence[plc.types.Order], - null_order: Sequence[plc.types.NullOrder], - count: int, - context: IRExecutionContext, -) -> tuple[str, MutableMapping[Any, Any]]: - """Graph to get the boundaries from all partitions.""" - local_boundaries_name = f"sort-boundaries_local-{name_in}" - concat_boundaries_name = f"sort-boundaries-concat-{name_in}" - global_boundaries_name = f"sort-boundaries-{name_in}" - graph: MutableMapping[Any, Any] = {} - - _concat_list = [] - for part_id in range(count): - graph[(local_boundaries_name, part_id)] = ( - _select_local_split_candidates, - (name_in, part_id), - by, - count, - part_id, - ) - _concat_list.append((local_boundaries_name, part_id)) - - graph[concat_boundaries_name] = (partial(_concat, context=context), *_concat_list) - graph[global_boundaries_name] = ( - _get_final_sort_boundaries, - concat_boundaries_name, - column_order, - null_order, - count, - ) - return global_boundaries_name, graph - - -class SortedShuffleOptions(TypedDict): - """RapidsMPF shuffling options.""" - - by: Sequence[str] - order: Sequence[plc.types.Order] - null_order: Sequence[plc.types.NullOrder] - column_names: Sequence[str] - column_dtypes: Sequence[DataType] - - -# Experimental rapidsmpf shuffler integration -class RMPFIntegrationSortedShuffle: # pragma: no cover - """cuDF-Polars protocol for rapidsmpf shuffler.""" - - @staticmethod - def insert_partition( - df: DataFrame, - partition_id: int, - partition_count: int, - shuffler: Any, - options: SortedShuffleOptions, - sort_boundaries: DataFrame, - ) -> None: - """Add cudf-polars DataFrame chunks to an RMP shuffler.""" - from rapidsmpf.integrations.cudf.partition import split_and_pack - from rapidsmpf.integrations.single import get_worker_context - - context = get_worker_context() - - by = options["by"] - data_streams = [ - df.stream, - sort_boundaries.stream, - ] - stream = get_joined_cuda_stream(get_dask_cuda_stream, upstreams=data_streams) - - splits = find_sort_splits( - df.select(by).table, - sort_boundaries.table, - partition_id, - options["order"], - options["null_order"], - stream=stream, - ) - packed_inputs = split_and_pack( - df.table, - splits=splits, - br=context.br, - stream=stream, - ) - # TODO: figure out handoff with rapidsmpf - # https://github.com/rapidsai/cudf/issues/20337 - shuffler.insert_chunks(packed_inputs) - - join_cuda_streams(downstreams=data_streams, upstreams=[stream]) - - @staticmethod - def extract_partition( - partition_id: int, - shuffler: Any, - options: SortedShuffleOptions, - ) -> DataFrame: - """Extract a finished partition from the RMP shuffler.""" - from rapidsmpf.integrations.cudf.partition import ( - unpack_and_concat, - unspill_partitions, - ) - from rapidsmpf.integrations.single import get_worker_context - - context = get_worker_context() - - shuffler.wait() - column_names = options["column_names"] - column_dtypes = options["column_dtypes"] - - stream = DEFAULT_STREAM - - # TODO: When sorting, this step should finalize with a merge (unless we - # require stability, as cudf merge is not stable). - # TODO: figure out handoff with rapidsmpf - # https://github.com/rapidsai/cudf/issues/20337 - return DataFrame.from_table( - unpack_and_concat( - unspill_partitions( - shuffler.extract(partition_id), - br=context.br, - allow_overbooking=True, - ), - br=context.br, - stream=stream, - ), - column_names, - column_dtypes, - stream=stream, - ) - - -def _sort_partition_dataframe( - df: DataFrame, - partition_id: int, # Not currently used - partition_count: int, - options: MutableMapping[str, Any], - sort_boundaries: DataFrame, -) -> MutableMapping[int, DataFrame]: - """ - Partition a sorted DataFrame for shuffling. - - Parameters - ---------- - df - The DataFrame to partition. - partition_id - The partition id of the current partition. - partition_count - The total number of partitions. - options - The sort options ``(by, order, null_order)``. - sort_boundaries - The global sort boundary candidates used to decide where to split. - """ - if df.num_rows == 0: # pragma: no cover - # Fast path for empty DataFrame - return dict.fromkeys(range(partition_count), df) - - stream = get_joined_cuda_stream( - get_dask_cuda_stream, upstreams=(df.stream, sort_boundaries.stream) - ) - - splits = find_sort_splits( - df.select(options["by"]).table, - sort_boundaries.table, - partition_id, - options["order"], - options["null_order"], - stream=stream, - ) - - # Split and return the partitioned result - return { - i: DataFrame.from_table( - split, - df.column_names, - df.dtypes, - stream=df.stream, - ) - for i, split in enumerate(plc.copying.split(df.table, splits, stream=stream)) - } - - -class ShuffleSorted(IR): - """ - Shuffle already locally sorted multi-partition data. - - Shuffling is performed by extracting sort boundary candidates from all partitions, - sharing them all-to-all and then exchanging data accordingly. - The sorting information is required to be passed in identically to the already - performed local sort and as of now the final result needs to be sorted again to - merge the partitions. - """ - - __slots__ = ("by", "null_order", "order", "shuffle_method") - _non_child = ("schema", "by", "order", "null_order", "shuffle_method") - _n_non_child_args = 5 - by: tuple[NamedExpr, ...] - """Keys by which the data was sorted.""" - order: tuple[plc.types.Order, ...] - """Sort order if sorted.""" - null_order: tuple[plc.types.NullOrder, ...] - """Null precedence if sorted.""" - shuffle_method: ShuffleMethod - """Shuffle method to use.""" - - def __init__( - self, - schema: Schema, - by: tuple[NamedExpr, ...], - order: tuple[plc.types.Order, ...], - null_order: tuple[plc.types.NullOrder, ...], - shuffle_method: ShuffleMethod, - df: IR, - ): - self.schema = schema - self.by = by - self.order = order - self.null_order = null_order - self.shuffle_method = shuffle_method - self._non_child_args = (schema, by, order, null_order, shuffle_method) - self.children = (df,) - - @classmethod - def do_evaluate( - cls, - schema: Schema, - by: tuple[NamedExpr, ...], - order: tuple[plc.types.Order, ...], - null_order: tuple[plc.types.NullOrder, ...], - shuffle_method: ShuffleMethod, - df: DataFrame, - *, - context: IRExecutionContext, - ) -> DataFrame: # pragma: no cover - """Evaluate and return a dataframe.""" - # Single-partition ShuffleSorted evaluation is a no-op - return df - - def _has_simple_zlice(zlice: tuple[int, int | None] | None) -> bool: """Check if a zlice is a simple top-k/bottom-k operation.""" if zlice is None: @@ -517,26 +258,7 @@ def _( msg="sort currently only supports column names as `by` keys.", ) - config_options = rec.state["config_options"] - executor = config_options.executor - runtime = executor.runtime - - # Special handling for slicing - # (May be a top- or bottom-k operation) - simple_zlice = _has_simple_zlice(ir.zlice) - if simple_zlice and runtime == "tasks": - from cudf_polars.experimental.parallel import _lower_ir_pwise - - new_node, partition_info = _lower_ir_pwise(ir, rec) - if partition_info[new_node].count > 1: - # Collapse down to single partition - inter = Repartition(new_node.schema, new_node) - partition_info[inter] = PartitionInfo(count=1) - # Sort reduced partition - new_node = ir.reconstruct([inter]) - partition_info[new_node] = PartitionInfo(count=1) - return new_node, partition_info - elif ir.zlice is not None and not simple_zlice: + if ir.zlice is not None and not _has_simple_zlice(ir.zlice): # Pull "complex" slices out of the Sort node altogether. return rec( Slice( @@ -557,112 +279,6 @@ def _( # Extract child partitioning child, partition_info = rec(ir.children[0]) - # The "rapidsmpf" runtime uses the sort_actor to handle everything else - if runtime == "rapidsmpf": - sort_node = ir.reconstruct([child]) - partition_info[sort_node] = partition_info[child] - return sort_node, partition_info - - # TODO: Remove everything below here when "tasks" is removed. - - # Avoid rapidsmpf shuffle with maintain_order=True (for now) - shuffle_method = ( - ShuffleMethod("tasks") if ir.stable else config_options.executor.shuffle_method - ) - if ( - shuffle_method != config_options.executor.shuffle_method - ): # pragma: no cover; Requires rapidsmpf - _fallback_inform( - f"shuffle_method={config_options.executor.shuffle_method} does not support maintain_order=True. " - f"Falling back to shuffle_method={shuffle_method}.", - config_options, - ) - - if partition_info[child].count == 1: - single_part_node = ir.reconstruct([child]) - partition_info[single_part_node] = partition_info[child] - return single_part_node, partition_info - - local_sort_node = ir.reconstruct([child]) - partition_info[local_sort_node] = partition_info[child] - - shuffle = ShuffleSorted( - ir.schema, - ir.by, - ir.order, - ir.null_order, - shuffle_method, - local_sort_node, - ) - partition_info[shuffle] = partition_info[child] - - # We sort again locally. - assert ir.zlice is None # zlice handling would be incorrect without adjustment - final_sort_node = ir.reconstruct([shuffle]) - partition_info[final_sort_node] = partition_info[shuffle] - - return final_sort_node, partition_info - - -@generate_ir_tasks.register(ShuffleSorted) -def _( - ir: ShuffleSorted, - partition_info: MutableMapping[IR, PartitionInfo], - context: IRExecutionContext, -) -> MutableMapping[Any, Any]: - by = [ne.value.name for ne in ir.by if isinstance(ne.value, Col)] - if len(by) != len(ir.by): # pragma: no cover - # We should not reach here as this is checked in the lower_ir_node - raise NotImplementedError("Sorting columns must be column names.") - - (child,) = ir.children - - sort_boundaries_name, graph = _sort_boundaries_graph( - get_key_name(child), - by, - ir.order, - ir.null_order, - partition_info[child].count, - context, - ) - - options = { - "by": by, - "order": ir.order, - "null_order": ir.null_order, - "column_names": list(ir.schema.keys()), - "column_dtypes": list(ir.schema.values()), - } - - # Try using rapidsmpf shuffler if we have "simple" shuffle - # keys, and the "shuffle_method" config is set to "rapidsmpf-single". - shuffle_method = ir.shuffle_method - if shuffle_method == "rapidsmpf-single": # pragma: no cover - from rapidsmpf.integrations.single import rapidsmpf_shuffle_graph - - graph.update( - rapidsmpf_shuffle_graph( - get_key_name(child), - get_key_name(ir), - partition_info[child].count, - partition_info[ir].count, - RMPFIntegrationSortedShuffle, - options, - sort_boundaries_name, - ) - ) - return graph - - # Simple task-based fall-back - graph.update( - partial(_simple_shuffle_graph, context=context)( - get_key_name(child), - get_key_name(ir), - partition_info[child].count, - partition_info[ir].count, - _sort_partition_dataframe, - options, - sort_boundaries_name, - ) - ) - return graph + sort_node = ir.reconstruct([child]) + partition_info[sort_node] = partition_info[child] + return sort_node, partition_info diff --git a/python/cudf_polars/cudf_polars/experimental/utils.py b/python/cudf_polars/cudf_polars/experimental/utils.py index 24ce606d41b..848a4d44759 100644 --- a/python/cudf_polars/cudf_polars/experimental/utils.py +++ b/python/cudf_polars/cudf_polars/experimental/utils.py @@ -10,7 +10,7 @@ from itertools import chain from typing import TYPE_CHECKING -from cudf_polars.dsl.expr import Col, Expr, GroupedWindow, UnaryFunction +from cudf_polars.dsl.expr import Col, GroupedWindow, UnaryFunction from cudf_polars.dsl.ir import Union from cudf_polars.dsl.traversal import traversal from cudf_polars.experimental.base import PartitionInfo @@ -49,11 +49,8 @@ def _fallback_inform( def _dynamic_planning_on(config_options: ConfigOptions[StreamingExecutor]) -> bool: - """Check if dynamic planning is enabled for rapidsmpf runtime.""" - return ( - config_options.executor.runtime == "rapidsmpf" - and config_options.executor.dynamic_planning is not None - ) + """Check if dynamic planning is enabled.""" + return config_options.executor.dynamic_planning is not None def _lower_ir_fallback( @@ -68,9 +65,6 @@ def _lower_ir_fallback( from cudf_polars.experimental.repartition import Repartition from cudf_polars.experimental.select import _inline_hstack_false - config_options = rec.state["config_options"] - rapidsmpf_engine = config_options.executor.runtime == "rapidsmpf" - # Make sure we avoid mixed-length columns in intermediate TableChunks. ir = _inline_hstack_false(ir) @@ -82,13 +76,10 @@ def _lower_ir_fallback( children = [] inform = False for c in lowered_children: - child = c - if multi_partitioned := partition_info[c].count > 1: + if partition_info[c].count > 1: inform = True - if multi_partitioned or rapidsmpf_engine: - # Fall-back logic - child = Repartition(child.schema, child) - partition_info[child] = PartitionInfo(count=1) + child = Repartition(c.schema, c) + partition_info[child] = PartitionInfo(count=1) children.append(child) if inform and msg: @@ -114,32 +105,6 @@ def _leaf_column_names(expr: Expr) -> tuple[str, ...]: return () -def _get_unique_fractions( - column_names: Sequence[str], - user_unique_fractions: dict[str, float], -) -> dict[str, float]: - """ - Return unique-fraction statistics subset. - - Parameters - ---------- - column_names - The column names to get unique-fractions for. - user_unique_fractions - The user-provided unique-fraction dictionary. - - Returns - ------- - unique_fractions - The final unique-fraction dictionary filtered to column_names. - """ - return { - c: max(min(f, 1.0), 0.00001) - for c, f in user_unique_fractions.items() - if c in column_names - } - - def _contains_over(exprs: Sequence[Expr]) -> bool: """Return True if any expression contains a window expression.""" return any(isinstance(e, GroupedWindow) for e in traversal(exprs)) diff --git a/python/cudf_polars/cudf_polars/testing/asserts.py b/python/cudf_polars/cudf_polars/testing/asserts.py index 9f0953cd4df..5611f8c3e70 100644 --- a/python/cudf_polars/cudf_polars/testing/asserts.py +++ b/python/cudf_polars/cudf_polars/testing/asserts.py @@ -30,7 +30,6 @@ # Will be overriden by `conftest.py` with the value from the `--executor` # and `--cluster` command-line arguments DEFAULT_EXECUTOR = "in-memory" -DEFAULT_RUNTIME = "tasks" DEFAULT_CLUSTER = "single" @@ -200,7 +199,6 @@ def get_default_engine( executor = executor or DEFAULT_EXECUTOR if executor == "streaming": executor_options["cluster"] = DEFAULT_CLUSTER - executor_options["runtime"] = DEFAULT_RUNTIME return GPUEngine( raise_on_fail=True, @@ -290,7 +288,8 @@ def assert_collect_raises( if polars_except != (): raise AssertionError(f"CPU execution DID NOT RAISE {polars_except}") - engine = GPUEngine(raise_on_fail=True) + # TODO: https://github.com/rapidsai/cudf/issues/22346 + engine = GPUEngine(executor="in-memory", raise_on_fail=True) try: lazydf.collect(**final_cudf_collect_kwargs, engine=engine) # type: ignore[misc, call-overload] except cudf_except: diff --git a/python/cudf_polars/cudf_polars/testing/inject_gpu_engine.py b/python/cudf_polars/cudf_polars/testing/inject_gpu_engine.py index 6fe2de4d154..7cfb62c414e 100644 --- a/python/cudf_polars/cudf_polars/testing/inject_gpu_engine.py +++ b/python/cudf_polars/cudf_polars/testing/inject_gpu_engine.py @@ -30,6 +30,7 @@ def pytest_addoption(parser: pytest.Parser) -> None: choices=("in-memory", "spmd"), help="Which GPU engine variant to inject globally.", ) + # TODO: We never run with --inject-gpu-engine-blocksize in ci/run_cudf_polars_polars_tests.sh. Remove? group.addoption( "--inject-gpu-engine-blocksize", action="store", @@ -134,6 +135,7 @@ def pytest_report_header(config: pytest.Config) -> str: return f"injected GPU engine: {cls.__module__}.{cls.__name__}" +# TODO: This is just Mapping[str, str]? EXPECTED_FAILURES: Mapping[str, str | tuple[str, bool]] = { "tests/unit/io/test_csv.py::test_read_csv_only_loads_selected_columns": "Memory usage won't be correct due to GPU", "tests/unit/io/test_delta.py::test_scan_delta_version": "Need to expose hive partitioning", @@ -305,7 +307,6 @@ def pytest_report_header(config: pytest.Config) -> str: # Generally skip for: # 1) Tests that are too slow with --inject-gpu-engine-blocksize=small due to many small partitions for large data -# 2) Tests that fail during cudf_polars execution and segfaults later due to https://github.com/rapidsai/cudf/issues/22138 STREAMING_ENGINE_TESTS_TO_SKIP: Mapping[str, str] = { "tests/unit/operations/aggregation/test_aggregations.py::test_boolean_aggs": "float difference in std/var in the unit of least precision", "tests/benchmark/test_group_by.py::test_groupby_h2oai_q1": "Too slow with --inject-gpu-engine-blocksize=small", diff --git a/python/cudf_polars/cudf_polars/utils/config.py b/python/cudf_polars/cudf_polars/utils/config.py index a6bbd73929b..7b5fb5c940c 100644 --- a/python/cudf_polars/cudf_polars/utils/config.py +++ b/python/cudf_polars/cudf_polars/utils/config.py @@ -55,9 +55,7 @@ "InMemoryExecutor", "ParquetOptions", "RayContext", - "Runtime", "SPMDContext", - "ShuffleMethod", "StreamingExecutor", "StreamingFallbackMode", ] @@ -112,15 +110,6 @@ def get_total_device_memory() -> int | None: return None -@functools.cache -def rapidsmpf_single_available() -> bool: # pragma: no cover - """Query whether rapidsmpf is available as a single-process shuffle method.""" - try: - return importlib.util.find_spec("rapidsmpf.integrations.single") is not None - except (ImportError, ValueError): - return False - - class StreamingFallbackMode(enum.StrEnum): """ How the streaming executor handles operations that don't support multiple partitions. @@ -138,20 +127,6 @@ class StreamingFallbackMode(enum.StrEnum): SILENT = "silent" -class Runtime(enum.StrEnum): - """ - The runtime to use for the streaming executor. - - * ``Runtime.TASKS`` : Use the task-based runtime. - This is the default runtime. - * ``Runtime.RAPIDSMPF`` : Use the coroutine-based streaming runtime (rapidsmpf). - This runtime is experimental. - """ - - TASKS = "tasks" - RAPIDSMPF = "rapidsmpf" - - class Cluster(enum.StrEnum): """ The cluster configuration for the streaming executor. @@ -172,27 +147,6 @@ class Cluster(enum.StrEnum): DASK = "dask" -class ShuffleMethod(enum.StrEnum): - """ - The method to use for shuffling data between workers with the streaming executor. - - * ``ShuffleMethod.TASKS`` : Use the task-based shuffler. - * ``ShuffleMethod.RAPIDSMPF`` : Use the rapidsmpf shuffler. - * ``ShuffleMethod._RAPIDSMPF_SINGLE`` : Use the single-process rapidsmpf shuffler. - - With :class:`cudf_polars.utils.config.StreamingExecutor`, the default of ``None`` - resolves to ``ShuffleMethod.TASKS``. - - The user should **not** specify ``ShuffleMethod._RAPIDSMPF_SINGLE`` directly. - A setting of ``ShuffleMethod.RAPIDSMPF`` will be converted to the single-process - shuffler automatically when using single-GPU execution. - """ - - TASKS = "tasks" - RAPIDSMPF = "rapidsmpf" - _RAPIDSMPF_SINGLE = "rapidsmpf-single" - - T = TypeVar("T") @@ -254,7 +208,7 @@ class ParquetOptions: will also be skipped if ``max_footer_samples`` is 0. use_rapidsmpf_native Whether to use the native rapidsmpf node for parquet reading. - This option is only used when the rapidsmpf runtime is enabled. + This option is only used by the streaming executor. Default is False. """ @@ -315,49 +269,32 @@ def __post_init__(self) -> None: # noqa: D105 raise TypeError("use_rapidsmpf_native must be a bool") -def default_target_partition_size(cluster: str, runtime: str) -> int: +@functools.cache +def default_target_partition_size() -> int: """Return the default blocksize.""" if (device_size := get_total_device_memory()) is None: # pragma: no cover # System doesn't have proper "GPU memory". # Fall back to a conservative 1GB default. return 1_000_000_000 - if ( - cluster == "single" - and runtime == "tasks" - and _env_get_int("POLARS_GPU_ENABLE_CUDA_MANAGED_MEMORY", default=1) == 1 - ): - # We can use a larger blocksize when UVM is enabled - blocksize = int(device_size * 0.0625) - else: - # Otherwise, use a conservative default - blocksize = int(device_size * 0.025) + blocksize = int(device_size * 0.025) # Use lower and upper bounds of 1GB and 10GB return min(max(blocksize, 1_000_000_000), 10_000_000_000) -def default_broadcast_join_limit(cluster: str, runtime: str) -> int: +@functools.cache +def default_broadcast_join_limit() -> int: """Return the default broadcast join limit.""" if (device_size := get_total_device_memory()) is None: # pragma: no cover # System doesn't have proper "GPU memory". # We probably want to broadcast in most cases. return 32 - if runtime == "rapidsmpf": - # Target about 12.5% of the device memory when - # default_target_partition_size is used to set the - # target partition size (i.e. 5x the 2.5% default). - return min(5, int(max(1, (device_size * 0.125) // 1e9))) - elif _env_get_int("POLARS_GPU_ENABLE_CUDA_MANAGED_MEMORY", default=1) == 1: - # The "tasks" runtime always runs single-GPU; we can lean on UVM - # to support most broadcast joins. - return 32 - else: - # Extra-conservative default for the "tasks" runtime without UVM. - # We cannot spill outside a rapidsmpf shuffle within this runtime, - # so shuffling is usually preferred. - return 2 + # Target about 12.5% of the device memory when + # default_target_partition_size is used to set the + # target partition size (i.e. 5x the 2.5% default). + return min(5, int(max(1, (device_size * 0.125) // 1e9))) @dataclasses.dataclass(frozen=True) @@ -599,17 +536,14 @@ class StreamingExecutor: Parameters ---------- - runtime - The runtime to use for the streaming executor. - ``Runtime.TASKS`` by default. cluster The cluster configuration for the streaming executor. ``Cluster.SINGLE`` by default. * ``Cluster.SINGLE``: Single-GPU execution - * ``Cluster.SPMD``: Multi-GPU SPMD execution (rapidsmpf runtime) - * ``Cluster.RAY``: Multi-GPU Ray execution (rapidsmpf runtime) - * ``Cluster.DASK``: Multi-GPU Dask execution (rapidsmpf runtime) + * ``Cluster.SPMD``: Multi-GPU SPMD execution + * ``Cluster.RAY``: Multi-GPU Ray execution + * ``Cluster.DASK``: Multi-GPU Dask execution fallback_mode How to handle errors when the GPU engine fails to execute a query. @@ -621,13 +555,6 @@ class StreamingExecutor: The maximum number of rows to process per partition. 1_000_000 by default. When the number of rows exceeds this value, the query will be split into multiple partitions and executed in parallel. - unique_fraction - A dictionary mapping column names to floats between 0 and 1 (inclusive - on the right). - - Each factor estimates the fractional number of unique values in the - column. By default, ``1.0`` is used for any column not included in - ``unique_fraction``. target_partition_size Target partition size, in bytes, for IO tasks. This configuration currently controls how large parquet files are split into multiple partitions. @@ -639,11 +566,8 @@ class StreamingExecutor: - keyword argument to ``polars.GPUEngine`` - the ``CUDF_POLARS__EXECUTOR__TARGET_PARTITION_SIZE`` environment variable - By default, cudf-polars uses a target partition size that's a fraction - of the device memory, where the fraction depends on the cluster and runtime: - - - rapidsmpf runtime: 1/40th of the device memory - - single cluster and tasks runtime: 1/16th of the device memory + By default, cudf-polars uses a target partition size of 1/40th of the + device memory. The pynvml library is used to query the total device memory on the first visible GPU. If the device size is not available, the default target @@ -651,26 +575,14 @@ class StreamingExecutor: NOTE: If this configuration is changed manually, it is recommended to set `broadcast_join_limit` manually as well. - groupby_n_ary - The factor by which the number of partitions is decreased when performing - a groupby on a partitioned column. For example, if a column has 64 partitions, - it will first be reduced to ``ceil(64 / 32) = 2`` partitions. - - This is useful when the absolute number of partitions is large. broadcast_join_limit The maximum number of partitions to allow for the smaller table in a broadcast join. For example, if the target partition size is 1GB and the broadcast join limit is 5, then the smaller table will be broadcasted - if it is smaller than 5GB (within the "rapidsmpf" runtime) or contains - fewer than 5 partitions (within the "tasks" runtime). The default depends - on the cluster and runtime. - shuffle_method - The method to use for shuffling data between workers. Defaults to - 'tasks' for the single-GPU cluster. + if it is smaller than 5GB. client_device_threshold - Threshold for spilling data from device memory in rapidsmpf. + Threshold for spilling data from device memory. Default is 50% of device memory on the client process. - This argument is only used by the "rapidsmpf" runtime. sink_to_directory Whether multi-partition sink operations write to a directory rather than a single file. For the spmd, ray, and dask clusters this is @@ -680,7 +592,7 @@ class StreamingExecutor: Options controlling dynamic shuffle planning. See :class:`~cudf_polars.utils.config.DynamicPlanningOptions` for more. max_io_threads - Maximum number of IO threads for the rapidsmpf runtime. Default is 4. + Maximum number of IO threads. Default is 4. This controls the parallelism of IO operations when reading data. spill_to_pinned_memory Whether RapidsMPF should spill to pinned host memory when available, @@ -688,8 +600,8 @@ class StreamingExecutor: bandwidth and lower latency for device to host transfers compared to regular pageable host memory. num_py_executors - Maximum number of workers for the Python ThreadPoolExecutor used by - the rapidsmpf runtime. Default is 8. + Maximum number of workers for the Python ThreadPoolExecutor. + Default is 8. Notes ----- @@ -700,13 +612,6 @@ class StreamingExecutor: _env_prefix = "CUDF_POLARS__EXECUTOR" name: Literal["streaming"] = dataclasses.field(default="streaming", init=False) - runtime: Runtime = dataclasses.field( - default_factory=_make_default_factory( - f"{_env_prefix}__RUNTIME", - Runtime.__call__, - default=Runtime.TASKS, - ) - ) cluster: Cluster | None = dataclasses.field( default_factory=_make_default_factory( f"{_env_prefix}__CLUSTER", @@ -726,33 +631,16 @@ class StreamingExecutor: f"{_env_prefix}__MAX_ROWS_PER_PARTITION", int, default=1_000_000 ) ) - unique_fraction: dict[str, float] = dataclasses.field( - default_factory=_make_default_factory( - f"{_env_prefix}__UNIQUE_FRACTION", json.loads, default={} - ) - ) target_partition_size: int = dataclasses.field( default_factory=_make_default_factory( f"{_env_prefix}__TARGET_PARTITION_SIZE", int, default=0 ) ) - groupby_n_ary: int = dataclasses.field( - default_factory=_make_default_factory( - f"{_env_prefix}__GROUPBY_N_ARY", int, default=32 - ) - ) broadcast_join_limit: int = dataclasses.field( default_factory=_make_default_factory( f"{_env_prefix}__BROADCAST_JOIN_LIMIT", int, default=0 ) ) - shuffle_method: ShuffleMethod = dataclasses.field( - default_factory=_make_default_factory( - f"{_env_prefix}__SHUFFLE_METHOD", - ShuffleMethod.__call__, - default=ShuffleMethod.TASKS, - ) - ) client_device_threshold: float = dataclasses.field( default_factory=_make_default_factory( f"{_env_prefix}__CLIENT_DEVICE_THRESHOLD", float, default=0.5 @@ -786,33 +674,10 @@ class StreamingExecutor: dask_context: DaskContext | None = None def __post_init__(self) -> None: # noqa: D105 - # Check for rapidsmpf runtime - if self.runtime == "rapidsmpf": # pragma: no cover; requires rapidsmpf runtime - if not rapidsmpf_single_available(): - raise ValueError("The rapidsmpf streaming engine requires rapidsmpf.") - object.__setattr__(self, "shuffle_method", "rapidsmpf") - if self.cluster is None: object.__setattr__(self, "cluster", Cluster.SINGLE) assert self.cluster is not None, "Expected cluster to be set." - # Handle shuffle_method defaults for streaming executor - if self.shuffle_method is None: - # Use task-based shuffle by default. - # TODO: Evaluate single-process shuffle by default. - object.__setattr__(self, "shuffle_method", "tasks") - elif self.shuffle_method == "rapidsmpf-single": - # The user should NOT specify "rapidsmpf-single" directly. - raise ValueError("rapidsmpf-single is not a supported shuffle method.") - elif self.shuffle_method == "rapidsmpf": - if self.cluster == "single" and not rapidsmpf_single_available(): - raise ValueError( - "rapidsmpf shuffle method requested, but rapidsmpf is not installed." - ) - # Select "rapidsmpf-single" for single-GPU - if self.cluster == "single": - object.__setattr__(self, "shuffle_method", "rapidsmpf-single") - # frozen dataclass, so use object.__setattr__ object.__setattr__( self, "fallback_mode", StreamingFallbackMode(self.fallback_mode) @@ -821,16 +686,15 @@ def __post_init__(self) -> None: # noqa: D105 object.__setattr__( self, "target_partition_size", - default_target_partition_size(self.cluster, self.runtime), + default_target_partition_size(), ) if self.broadcast_join_limit == 0: object.__setattr__( self, "broadcast_join_limit", - default_broadcast_join_limit(self.cluster, self.runtime), + default_broadcast_join_limit(), ) object.__setattr__(self, "cluster", Cluster(self.cluster)) - object.__setattr__(self, "shuffle_method", ShuffleMethod(self.shuffle_method)) # Handle dynamic_planning. # Can be None, dict, or DynamicPlanningOptions @@ -853,12 +717,8 @@ def __post_init__(self) -> None: # noqa: D105 # Type / value check everything else if not isinstance(self.max_rows_per_partition, int): raise TypeError("max_rows_per_partition must be an int") - if not isinstance(self.unique_fraction, dict): - raise TypeError("unique_fraction must be a dict of column name to float") if not isinstance(self.target_partition_size, int): raise TypeError("target_partition_size must be an int") - if not isinstance(self.groupby_n_ary, int): - raise TypeError("groupby_n_ary must be an int") if not isinstance(self.broadcast_join_limit, int): raise TypeError("broadcast_join_limit must be an int") if not isinstance(self.sink_to_directory, bool): @@ -873,10 +733,9 @@ def __post_init__(self) -> None: # noqa: D105 raise TypeError("num_py_executors must be an int") def __hash__(self) -> int: # noqa: D105 - # cardinality factory, a dict, isn't natively hashable. We'll dump it + # dynamic_planning factory, a dataclass, isn't natively hashable. We'll dump it # to json and hash that. d = dataclasses.asdict(self) - d["unique_fraction"] = json.dumps(d["unique_fraction"]) d["dynamic_planning"] = json.dumps(d["dynamic_planning"]) return hash(tuple(sorted(d.items()))) @@ -1059,19 +918,6 @@ def from_polars_engine( executor = InMemoryExecutor(**user_executor_options) case "streaming": user_executor_options = user_executor_options.copy() - # Handle the interaction between the default shuffle method, the - # cluster, and whether rapidsmpf is available. - env_shuffle_method = os.environ.get( - "CUDF_POLARS__EXECUTOR__SHUFFLE_METHOD", None - ) - if env_shuffle_method is not None: - shuffle_method_default = ShuffleMethod(env_shuffle_method) - else: - shuffle_method_default = None - - user_executor_options.setdefault( - "shuffle_method", shuffle_method_default - ) # Handle dynamic_planning: check user config, then env var user_dynamic_planning = user_executor_options.get( @@ -1097,7 +943,7 @@ def from_polars_engine( } # Handle "cuda-stream-policy". - # The default will depend on the runtime and executor. + # The default will depend on the executor. user_cuda_stream_policy = engine.config.get( "cuda_stream_policy", None ) or os.environ.get("CUDF_POLARS__CUDA_STREAM_POLICY", None) @@ -1105,24 +951,18 @@ def from_polars_engine( cuda_stream_policy: CUDAStreamPoolConfig | None if user_cuda_stream_policy is None: - if ( - executor.name == "streaming" and executor.runtime == Runtime.RAPIDSMPF - ): # pragma: no cover; requires rapidsmpf runtime - # the rapidsmpf runtime defaults to using a stream pool + if executor.name == "streaming": cuda_stream_policy = CUDAStreamPoolConfig() else: - # everything else defaults to the default stream cuda_stream_policy = None else: cuda_stream_policy = _convert_cuda_stream_policy(user_cuda_stream_policy) - # Pool policy is only supported by the rapidsmpf runtime. if isinstance(cuda_stream_policy, CUDAStreamPoolConfig) and ( - (executor.name != "streaming") - or (executor.name == "streaming" and executor.runtime != Runtime.RAPIDSMPF) + executor.name != "streaming" ): raise ValueError( - "A stream pool is only supported by the rapidsmpf runtime." + "A stream pool is only supported by the streaming executor." ) kwargs["cuda_stream_policy"] = cuda_stream_policy diff --git a/python/cudf_polars/cudf_polars/utils/cuda_stream.py b/python/cudf_polars/cudf_polars/utils/cuda_stream.py index c0708d3bea8..22022ee3401 100644 --- a/python/cudf_polars/cudf_polars/utils/cuda_stream.py +++ b/python/cudf_polars/cudf_polars/utils/cuda_stream.py @@ -17,11 +17,6 @@ from rmm.pylibrmm.stream import Stream -def get_dask_cuda_stream() -> Stream: - """Get the default CUDA stream for Dask.""" - return DEFAULT_STREAM - - def get_cuda_stream() -> Stream: """Get the default CUDA stream for the current thread.""" return DEFAULT_STREAM diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml index d48793f0541..47633e42364 100644 --- a/python/cudf_polars/pyproject.toml +++ b/python/cudf_polars/pyproject.toml @@ -26,6 +26,7 @@ dependencies = [ "packaging", "polars>=1.30,<1.39", "pylibcudf==26.6.*,>=0.0.0a0", + "rapidsmpf==26.6.*,>=0.0.0a0", "typing_extensions>=4.0.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ @@ -47,7 +48,6 @@ test = [ "pytest-cov", "pytest-httpserver", "pytest-xdist", - "rapidsmpf==26.6.*,>=0.0.0a0", "rich", "structlog", "zstandard", diff --git a/python/cudf_polars/tests/conftest.py b/python/cudf_polars/tests/conftest.py index 7f00684638f..b3d83b36d36 100644 --- a/python/cudf_polars/tests/conftest.py +++ b/python/cudf_polars/tests/conftest.py @@ -2,7 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations -import importlib.util from typing import TYPE_CHECKING import pytest @@ -54,13 +53,6 @@ def clear_memory_resource_cache(): @pytest.fixture(autouse=True) def _skip_unless_spmd(request: pytest.FixtureRequest) -> None: """Skip tests in SPMD multi-rank mode unless marked with ``pytest.mark.spmd``.""" - # Do not use `pytest.importorskip` here: this fixture is autouse, so an - # import-based skip would skip every test in the suite on environments - # without rapidsmpf (e.g. the coverage CI job), masking real coverage. - # We only want to gate the nranks>1 check on rapidsmpf being available. - if importlib.util.find_spec("rapidsmpf") is None: - return - from rapidsmpf.bootstrap import get_nranks, is_running_with_rrun if ( @@ -79,7 +71,6 @@ def streaming_engines() -> Generator[StreamingEngines, None, None]: name to a single shared engine instance, which is reused across the entire test session. """ - pytest.importorskip("rapidsmpf") from rapidsmpf import bootstrap from rapidsmpf.communicator.single import new_communicator as single_communicator from rapidsmpf.config import Options, get_environment_variables @@ -228,7 +219,8 @@ def engine_raise_on_fail() -> pl.GPUEngine: from ``.collect()``. Uses the in-memory executor so errors are not wrapped by a streaming task group. """ - return pl.GPUEngine(raise_on_fail=True) + # TODO: We should be testing will all supported engine variants + return pl.GPUEngine(executor="in-memory", raise_on_fail=True) def pytest_addoption(parser): @@ -240,14 +232,6 @@ def pytest_addoption(parser): help="Executor to use for GPUEngine.", ) - parser.addoption( - "--runtime", - action="store", - default="tasks", - choices=("tasks", "rapidsmpf"), - help="Runtime to use for the 'streaming' executor.", - ) - parser.addoption( "--cluster", action="store", @@ -278,17 +262,7 @@ def pytest_configure(config): # apply globally rather than per-module. config.addinivalue_line("filterwarnings", "ignore::ResourceWarning") - if config.getoption("--runtime") == "rapidsmpf": - if config.getoption("--executor") == "in-memory": - raise pytest.UsageError("Rapidsmpf runtime requires --executor='streaming'") - - if importlib.util.find_spec("rapidsmpf") is None: - raise pytest.UsageError( - "Rapidsmpf runtime requires the 'rapidsmpf' package" - ) - cudf_polars.testing.asserts.DEFAULT_EXECUTOR = config.getoption("--executor") - cudf_polars.testing.asserts.DEFAULT_RUNTIME = config.getoption("--runtime") cudf_polars.testing.asserts.DEFAULT_CLUSTER = config.getoption("--cluster") diff --git a/python/cudf_polars/tests/experimental/test_dask.py b/python/cudf_polars/tests/experimental/test_dask.py index 5ccdde864ef..93ef4318490 100644 --- a/python/cudf_polars/tests/experimental/test_dask.py +++ b/python/cudf_polars/tests/experimental/test_dask.py @@ -64,7 +64,6 @@ def test_yields_engine(engine: DaskEngine) -> None: def test_executor_options_forwarded(engine: DaskEngine) -> None: """Reserved executor_options keys are injected into the engine config.""" opts = engine.config["executor_options"] - assert opts["runtime"] == "rapidsmpf" assert opts["cluster"] == "dask" assert isinstance(opts["dask_context"], DaskContext) @@ -196,7 +195,6 @@ def test_reset_updates_executor_options(reset_engine: DaskEngine) -> None: opts = reset_engine.config["executor_options"] assert opts["max_rows_per_partition"] == 42 # Reserved keys are still injected by ``_reset``. - assert opts["runtime"] == "rapidsmpf" assert opts["cluster"] == "dask" assert isinstance(opts["dask_context"], DaskContext) diff --git a/python/cudf_polars/tests/experimental/test_explain.py b/python/cudf_polars/tests/experimental/test_explain.py index fecd4ba4d03..7f19e318778 100644 --- a/python/cudf_polars/tests/experimental/test_explain.py +++ b/python/cudf_polars/tests/experimental/test_explain.py @@ -540,8 +540,7 @@ def test_scan_properties(tmp_path: Path, predicate: pl.Expr | None): engine = pl.GPUEngine(executor="streaming", raise_on_fail=True) dag = serialize_query(q, engine) - # walk Union -> Scan - node = dag.nodes[dag.nodes[dag.roots[0]].children[0]] + node = dag.nodes[dag.roots[0]] assert node.type == "Scan" assert node.properties == expected_properties @@ -673,7 +672,6 @@ def test_dynamic_planning_adds_repartition(df, op): executor="streaming", raise_on_fail=True, executor_options={ - "runtime": "rapidsmpf", "dynamic_planning": {}, "max_rows_per_partition": 1_000_000, }, diff --git a/python/cudf_polars/tests/experimental/test_groupby.py b/python/cudf_polars/tests/experimental/test_groupby.py index 8d6ac5927e9..03d87fe23e9 100644 --- a/python/cudf_polars/tests/experimental/test_groupby.py +++ b/python/cudf_polars/tests/experimental/test_groupby.py @@ -270,10 +270,7 @@ def test_groupby_literal_key(df, streaming_engine): @pytest.mark.parametrize("keys", [("y",), ("y", "z")]) def test_groupby_agg_config_options(df, op, keys, streaming_engine_factory): streaming_engine = streaming_engine_factory( - StreamingOptions( - max_rows_per_partition=4, - unique_fraction={"z": 0.5}, - ), + StreamingOptions(max_rows_per_partition=4), ) agg = getattr(pl.col("x"), op)() if op in ("sum", "mean"): diff --git a/python/cudf_polars/tests/experimental/test_hstack.py b/python/cudf_polars/tests/experimental/test_hstack.py index 9bbb4b7aa33..0c21678f7e2 100644 --- a/python/cudf_polars/tests/experimental/test_hstack.py +++ b/python/cudf_polars/tests/experimental/test_hstack.py @@ -20,7 +20,6 @@ from cudf_polars.experimental.statistics import collect_statistics from cudf_polars.testing.asserts import ( DEFAULT_CLUSTER, - DEFAULT_RUNTIME, assert_gpu_result_equal, ) from cudf_polars.utils.config import ConfigOptions @@ -34,7 +33,6 @@ def engine(): executor_options={ "max_rows_per_partition": 3, "cluster": DEFAULT_CLUSTER, - "runtime": DEFAULT_RUNTIME, }, ) diff --git a/python/cudf_polars/tests/experimental/test_options.py b/python/cudf_polars/tests/experimental/test_options.py index 291cbda7589..eb64cb97ed8 100644 --- a/python/cudf_polars/tests/experimental/test_options.py +++ b/python/cudf_polars/tests/experimental/test_options.py @@ -66,11 +66,6 @@ def test_executor_options_includes_set_fields() -> None: assert "log" not in result -def test_executor_options_unique_fraction() -> None: - result = StreamingOptions(unique_fraction={"col_a": 0.5}).to_executor_options() - assert result["unique_fraction"] == {"col_a": 0.5} - - def test_executor_options_num_py_executors() -> None: result = StreamingOptions(num_py_executors=4).to_executor_options() assert result["num_py_executors"] == 4 diff --git a/python/cudf_polars/tests/experimental/test_parallel.py b/python/cudf_polars/tests/experimental/test_parallel.py index 42365a113e2..67fc372e2e4 100644 --- a/python/cudf_polars/tests/experimental/test_parallel.py +++ b/python/cudf_polars/tests/experimental/test_parallel.py @@ -12,13 +12,9 @@ from polars.testing import assert_frame_equal from cudf_polars import Translator -from cudf_polars.dsl.expressions.base import Col, NamedExpr from cudf_polars.dsl.traversal import traversal -from cudf_polars.experimental.parallel import lower_ir_graph from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions -from cudf_polars.experimental.statistics import collect_statistics from cudf_polars.testing.asserts import assert_gpu_result_equal -from cudf_polars.utils.config import ConfigOptions @pytest.mark.parametrize("column", ["a", "b"]) @@ -134,40 +130,3 @@ def test_pickle_conditional_join_args(): ir = Translator(q._ldf.visit(), GPUEngine()).translate_ir() for node in traversal([ir]): pickle.loads(pickle.dumps(node._non_child_args)) - - -def test_preserve_partitioning(streaming_engine_factory): - streaming_engine = streaming_engine_factory( - StreamingOptions( - max_rows_per_partition=2, - broadcast_join_limit=2, - unique_fraction={"a": 1.0}, - ), - ) - left = pl.LazyFrame({"a": [1, 2, 3, 4] * 5, "b": range(20)}) - right = pl.LazyFrame({"a": [3, 4, 5, 6, 7] * 4, "c": range(20)}) - q = ( - left.join(right, on="a") - .filter(pl.col("a") == 2) - .group_by(pl.col("a")) - .mean() - .select(pl.col("a"), pl.col("c")) - ) - _engine = pl.GPUEngine( - raise_on_fail=True, - executor="streaming", - executor_options={ - "max_rows_per_partition": 2, - "broadcast_join_limit": 2, - "unique_fraction": {"a": 1.0}, - }, - ) - config_options = ConfigOptions.from_polars_engine(_engine) - ir = Translator(q._ldf.visit(), _engine).translate_ir() - ir, partition_info = lower_ir_graph( - ir, config_options, collect_statistics(ir, config_options) - ) - expect_dtype = ir.schema["a"] - expect_expr = (NamedExpr("a", Col(expect_dtype, "a")),) - assert partition_info[ir].partitioned_on == expect_expr - assert_gpu_result_equal(q, engine=streaming_engine) diff --git a/python/cudf_polars/tests/experimental/test_ray.py b/python/cudf_polars/tests/experimental/test_ray.py index ded4903c594..f62c3e3b831 100644 --- a/python/cudf_polars/tests/experimental/test_ray.py +++ b/python/cudf_polars/tests/experimental/test_ray.py @@ -53,7 +53,7 @@ def engine() -> Iterator[RayEngine]: def test_reserved_executor_keys() -> None: """executor_options rejects reserved keys.""" - for key in ("runtime", "cluster", "spmd_context", "ray_context"): + for key in ("cluster", "spmd_context", "ray_context"): with pytest.raises(TypeError, match="reserved"): RayEngine(executor_options={key: "anything"}) @@ -109,7 +109,6 @@ def test_executor_options_forwarded( ) -> None: """Reserved executor_options keys are injected into the engine config.""" opts = engine.config["executor_options"] - assert opts["runtime"] == "rapidsmpf" assert opts["cluster"] == "ray" assert isinstance(opts["ray_context"], RayContext) assert engine.rank_actors == opts["ray_context"].rank_actors @@ -258,7 +257,6 @@ def test_reset_updates_executor_options(reset_engine: RayEngine) -> None: opts = reset_engine.config["executor_options"] assert opts["max_rows_per_partition"] == 42 # Reserved keys are still injected by ``_reset``. - assert opts["runtime"] == "rapidsmpf" assert opts["cluster"] == "ray" assert isinstance(opts["ray_context"], RayContext) assert opts["ray_context"].rank_actors == reset_engine.rank_actors diff --git a/python/cudf_polars/tests/experimental/test_sort.py b/python/cudf_polars/tests/experimental/test_sort.py index 194686acf6b..f0abf5caade 100644 --- a/python/cudf_polars/tests/experimental/test_sort.py +++ b/python/cudf_polars/tests/experimental/test_sort.py @@ -9,7 +9,6 @@ from cudf_polars.testing.asserts import ( DEFAULT_CLUSTER, - DEFAULT_RUNTIME, assert_gpu_result_equal, ) @@ -22,7 +21,6 @@ def engine(): executor_options={ "max_rows_per_partition": 3, "cluster": DEFAULT_CLUSTER, - "runtime": DEFAULT_RUNTIME, "fallback_mode": "raise", }, ) @@ -36,7 +34,6 @@ def engine_large(): executor_options={ "max_rows_per_partition": 2_100, "cluster": DEFAULT_CLUSTER, - "runtime": DEFAULT_RUNTIME, "fallback_mode": "raise", }, ) @@ -139,7 +136,6 @@ def test_sort_after_sparse_join(): executor="streaming", executor_options={ "cluster": DEFAULT_CLUSTER, - "runtime": DEFAULT_RUNTIME, "max_rows_per_partition": 4, }, ) diff --git a/python/cudf_polars/tests/experimental/test_spmd.py b/python/cudf_polars/tests/experimental/test_spmd.py index 9fef0e00350..96ec5eab932 100644 --- a/python/cudf_polars/tests/experimental/test_spmd.py +++ b/python/cudf_polars/tests/experimental/test_spmd.py @@ -66,7 +66,7 @@ def test_single_communicator_outside_rrun() -> None: def test_reserved_keys() -> None: """executor_options rejects reserved keys.""" - for key in ("runtime", "cluster", "spmd_context"): + for key in ("cluster", "spmd_context"): with ( pytest.raises(TypeError, match="reserved"), SPMDEngine(executor_options={key: "anything"}), @@ -320,7 +320,6 @@ def test_reset_updates_executor_options(comm: Communicator) -> None: opts = engine.config["executor_options"] assert opts["max_rows_per_partition"] == 42 # Reserved keys are still injected by ``_reset``. - assert opts["runtime"] == "rapidsmpf" assert opts["cluster"] == "spmd" assert isinstance(opts["spmd_context"], SPMDContext) diff --git a/python/cudf_polars/tests/experimental/test_unique.py b/python/cudf_polars/tests/experimental/test_unique.py index 49d2b580300..6bb30624cb6 100644 --- a/python/cudf_polars/tests/experimental/test_unique.py +++ b/python/cudf_polars/tests/experimental/test_unique.py @@ -34,12 +34,9 @@ def df(): @pytest.mark.parametrize("subset", [None, ("y",), ("y", "z")]) @pytest.mark.parametrize("keep", ["first", "last", "any", "none"]) @pytest.mark.parametrize("maintain_order", [True, False]) -@pytest.mark.parametrize("cardinality", [{}, {"y": 0.7}]) -def test_unique( - df, streaming_engine_factory, keep, subset, maintain_order, cardinality -): +def test_unique(df, streaming_engine_factory, keep, subset, maintain_order): engine = streaming_engine_factory( - StreamingOptions(unique_fraction=cardinality, fallback_mode="warn"), + StreamingOptions(fallback_mode="warn"), ) q = df.unique(subset=subset, keep=keep, maintain_order=maintain_order) check_row_order = maintain_order @@ -50,40 +47,16 @@ def test_unique( assert_gpu_result_equal(q, engine=engine, check_row_order=check_row_order) -def test_unique_fallback(df, streaming_engine_factory): - engine = streaming_engine_factory( - StreamingOptions( - unique_fraction={"y": 1.0}, - fallback_mode="raise", - dynamic_planning=None, - ), - ) - q = df.unique(keep="first", maintain_order=True) - with pytest.raises( - NotImplementedError, - match="Unsupported unique options", - ): - assert_gpu_result_equal(q, engine=engine) - - @pytest.mark.parametrize("maintain_order", [True, False]) -@pytest.mark.parametrize("cardinality", [{}, {"y": 0.5}]) -def test_unique_select(df, streaming_engine_factory, maintain_order, cardinality): +def test_unique_select(df, streaming_engine_factory, maintain_order): engine = streaming_engine_factory( StreamingOptions( max_rows_per_partition=4, - unique_fraction=cardinality, fallback_mode="warn", ), ) q = df.select(pl.col("y").unique(maintain_order=maintain_order)) - if cardinality == {"y": 0.5} and maintain_order: - with pytest.warns( - UserWarning, match="Unsupported unique options for multiple partitions." - ): - assert_gpu_result_equal(q, engine=engine, check_row_order=False) - else: - assert_gpu_result_equal(q, engine=engine, check_row_order=False) + assert_gpu_result_equal(q, engine=engine, check_row_order=False) @pytest.mark.parametrize("keep", ["first", "last", "any"]) diff --git a/python/cudf_polars/tests/test_config.py b/python/cudf_polars/tests/test_config.py index 3cd66bc527d..6004c5eef40 100644 --- a/python/cudf_polars/tests/test_config.py +++ b/python/cudf_polars/tests/test_config.py @@ -4,7 +4,7 @@ from __future__ import annotations import sys -from typing import Any, cast +from typing import cast import pytest @@ -35,20 +35,7 @@ StreamingExecutor, _default_cuda_stream_policy, ) -from cudf_polars.utils.cuda_stream import ( - get_cuda_stream, - get_dask_cuda_stream, -) - - -@pytest.fixture(params=[False, True], ids=["norapidsmpf.single", "rapidsmpf.single"]) -def rapidsmpf_single_available(request, monkeypatch): - monkeypatch.setattr( - cudf_polars.utils.config, - "rapidsmpf_single_available", - lambda: request.param, - ) - return request.param +from cudf_polars.utils.cuda_stream import get_cuda_stream def test_polars_verbose_warns(monkeypatch): @@ -232,47 +219,6 @@ def test_parquet_options_from_none() -> None: assert config.parquet_options.chunked is True -def test_validate_streaming_executor_shuffle_method( - *, rapidsmpf_single_available: bool -) -> None: - config = ConfigOptions.from_polars_engine( - pl.GPUEngine( - executor="streaming", - executor_options={"shuffle_method": "tasks"}, - ) - ) - assert config.executor.name == "streaming" - assert config.executor.shuffle_method == "tasks" - - # rapidsmpf with single cluster - engine = pl.GPUEngine( - executor="streaming", - executor_options={"shuffle_method": "rapidsmpf", "cluster": "single"}, - ) - - if rapidsmpf_single_available: - config = ConfigOptions.from_polars_engine(engine) - assert config.executor.name == "streaming" - assert config.executor.shuffle_method == "rapidsmpf-single" - else: - with pytest.raises(ValueError, match="rapidsmpf is not installed"): - ConfigOptions.from_polars_engine(engine) - - -def test_join_rapidsmpf_single_private_config() -> None: - # The user may not specify "rapidsmpf-single" directly - engine = pl.GPUEngine( - raise_on_fail=True, - executor="streaming", - executor_options={ - "shuffle_method": "rapidsmpf-single", - "runtime": "tasks", - }, - ) - with pytest.raises(ValueError, match="not a supported shuffle method"): - ConfigOptions.from_polars_engine(engine) - - @pytest.mark.parametrize("executor", ["in-memory", "streaming"]) def test_hashable(executor: str) -> None: config = ConfigOptions.from_polars_engine( @@ -319,31 +265,11 @@ def test_validate_cluster() -> None: ) -def test_validate_shuffle_method_defaults() -> None: - config = ConfigOptions.from_polars_engine( - pl.GPUEngine( - executor="streaming", - ) - ) - assert config.executor.name == "streaming" - assert config.executor.shuffle_method == "tasks" # Default for single cluster - - with pytest.raises(ValueError, match="'foo' is not a valid ShuffleMethod"): - ConfigOptions.from_polars_engine( - pl.GPUEngine( - executor="streaming", - executor_options={"shuffle_method": "foo"}, - ) - ) - - @pytest.mark.parametrize( "option", [ "max_rows_per_partition", - "unique_fraction", "target_partition_size", - "groupby_n_ary", "broadcast_join_limit", "sink_to_directory", "client_device_threshold", @@ -409,11 +335,8 @@ def test_config_option_from_env(monkeypatch: pytest.MonkeyPatch) -> None: m.setenv("CUDF_POLARS__EXECUTOR__CLUSTER", "single") m.setenv("CUDF_POLARS__EXECUTOR__FALLBACK_MODE", "silent") m.setenv("CUDF_POLARS__EXECUTOR__MAX_ROWS_PER_PARTITION", "42") - m.setenv("CUDF_POLARS__EXECUTOR__UNIQUE_FRACTION", '{"a": 0.5}') m.setenv("CUDF_POLARS__EXECUTOR__TARGET_PARTITION_SIZE", "100") - m.setenv("CUDF_POLARS__EXECUTOR__GROUPBY_N_ARY", "43") m.setenv("CUDF_POLARS__EXECUTOR__BROADCAST_JOIN_LIMIT", "44") - m.setenv("CUDF_POLARS__EXECUTOR__SHUFFLE_METHOD", "tasks") m.setenv("CUDF_POLARS__CUDA_STREAM_POLICY", "default") engine = pl.GPUEngine() @@ -422,11 +345,8 @@ def test_config_option_from_env(monkeypatch: pytest.MonkeyPatch) -> None: assert config.executor.cluster == "single" assert config.executor.fallback_mode == "silent" assert config.executor.max_rows_per_partition == 42 - assert config.executor.unique_fraction == {"a": 0.5} assert config.executor.target_partition_size == 100 - assert config.executor.groupby_n_ary == 43 assert config.executor.broadcast_join_limit == 44 - assert config.executor.shuffle_method == "tasks" assert config.cuda_stream_policy is None @@ -498,12 +418,6 @@ def test_default_executor() -> None: assert config.executor.name == "streaming" -def test_default_runtime() -> None: - config = ConfigOptions.from_polars_engine(pl.GPUEngine()) - assert config.executor.name == "streaming" - assert config.executor.runtime == "tasks" - - @pytest.mark.parametrize( "memory_resource, memory_resource_config", [ @@ -537,10 +451,7 @@ def test_memory_resource(memory_resource, memory_resource_config) -> None: if memory_resource is None and memory_resource_config is None: # The default case: We make a new RMM MR, whose type depends on the GPU's features. - if _is_concurrent_managed_access_supported(): - assert isinstance(result, rmm.mr.PrefetchResourceAdaptor) - else: - assert isinstance(result, rmm.mr.CudaAsyncMemoryResource) + assert isinstance(result, rmm.mr.CudaAsyncMemoryResource) elif memory_resource is None: # Configured through memory_resource_config @@ -608,21 +519,17 @@ def test_cuda_stream_pool(): def test_cuda_stream_policy_default(monkeypatch: pytest.MonkeyPatch) -> None: # Default from engine config = ConfigOptions.from_polars_engine(pl.GPUEngine()) - assert config.cuda_stream_policy is None + assert isinstance(config.cuda_stream_policy, CUDAStreamPoolConfig) - config = ConfigOptions.from_polars_engine( - pl.GPUEngine(executor_options={"runtime": "tasks"}) - ) - assert config.cuda_stream_policy is None + config = ConfigOptions.from_polars_engine(pl.GPUEngine(executor="streaming")) + assert isinstance(config.cuda_stream_policy, CUDAStreamPoolConfig) # Default from env monkeypatch.setenv("CUDF_POLARS__CUDA_STREAM_POLICY", "default") config = ConfigOptions.from_polars_engine(pl.GPUEngine()) assert config.cuda_stream_policy is None - config = ConfigOptions.from_polars_engine( - pl.GPUEngine(executor_options={"runtime": "tasks"}) - ) + config = ConfigOptions.from_polars_engine(pl.GPUEngine(executor="streaming")) assert config.cuda_stream_policy is None @@ -635,26 +542,19 @@ def test_default_cuda_stream_policy(monkeypatch: pytest.MonkeyPatch) -> None: assert isinstance(result, CUDAStreamPoolConfig) -def test_cuda_stream_policy_from_config(*, rapidsmpf_single_available: bool) -> None: +def test_cuda_stream_policy_from_config() -> None: engine = pl.GPUEngine( executor="streaming", - executor_options={"runtime": "rapidsmpf"}, cuda_stream_policy={ "pool_size": 32, "flags": rmm.pylibrmm.CudaStreamFlags.NON_BLOCKING, }, ) - if rapidsmpf_single_available: - config = ConfigOptions.from_polars_engine(engine) - assert isinstance(config.cuda_stream_policy, CUDAStreamPoolConfig) - assert config.cuda_stream_policy.pool_size == 32 - assert ( - config.cuda_stream_policy.flags == rmm.pylibrmm.CudaStreamFlags.NON_BLOCKING - ) - config.cuda_stream_policy.build().get_stream() # no exception - else: - with pytest.raises(ValueError, match="The rapidsmpf streaming engine"): - ConfigOptions.from_polars_engine(engine) + config = ConfigOptions.from_polars_engine(engine) + assert isinstance(config.cuda_stream_policy, CUDAStreamPoolConfig) + assert config.cuda_stream_policy.pool_size == 32 + assert config.cuda_stream_policy.flags == rmm.pylibrmm.CudaStreamFlags.NON_BLOCKING + config.cuda_stream_policy.build().get_stream() # no exception @pytest.mark.parametrize( @@ -667,26 +567,19 @@ def test_cuda_stream_policy_from_config(*, rapidsmpf_single_available: bool) -> '{"pool_size": 32}', ], ) -def test_cuda_stream_policy_from_env( - monkeypatch: pytest.MonkeyPatch, env: str, *, rapidsmpf_single_available: bool -) -> None: +def test_cuda_stream_policy_from_env(monkeypatch: pytest.MonkeyPatch, env: str) -> None: monkeypatch.setenv("CUDF_POLARS__CUDA_STREAM_POLICY", env) - runtime = "tasks" if env == "default" else "rapidsmpf" - engine = pl.GPUEngine(executor="streaming", executor_options={"runtime": runtime}) - if runtime == "rapidsmpf" and rapidsmpf_single_available: - config = ConfigOptions.from_polars_engine(engine) + engine = pl.GPUEngine(executor="streaming") + config = ConfigOptions.from_polars_engine(engine) + if env == "default": + assert config.cuda_stream_policy is None + else: assert isinstance(config.cuda_stream_policy, CUDAStreamPoolConfig) if env == "pool": assert config.cuda_stream_policy.pool_size == 16 assert config.cuda_stream_policy.flags == CudaStreamFlags.NON_BLOCKING else: assert config.cuda_stream_policy.pool_size == 32 - elif runtime == "rapidsmpf": - with pytest.raises(ValueError, match="The rapidsmpf streaming engine"): - ConfigOptions.from_polars_engine(engine) - else: - config = ConfigOptions.from_polars_engine(engine) - assert config.cuda_stream_policy is None def test_cuda_stream_policy_from_env_invalid(monkeypatch: pytest.MonkeyPatch): @@ -696,41 +589,26 @@ def test_cuda_stream_policy_from_env_invalid(monkeypatch: pytest.MonkeyPatch): def test_cuda_stream_policy_default_rapidsmpf(monkeypatch: pytest.MonkeyPatch) -> None: - pytest.importorskip("rapidsmpf") - # Default from engine - config = ConfigOptions.from_polars_engine( - pl.GPUEngine(executor_options={"runtime": "rapidsmpf"}) - ) + config = ConfigOptions.from_polars_engine(pl.GPUEngine(executor="streaming")) assert isinstance(config.cuda_stream_policy, CUDAStreamPoolConfig) assert config.cuda_stream_policy.pool_size == 16 assert config.cuda_stream_policy.flags == rmm.pylibrmm.CudaStreamFlags.NON_BLOCKING # "default" user argument overrides pool default monkeypatch.setenv("CUDF_POLARS__CUDA_STREAM_POLICY", "default") - config = ConfigOptions.from_polars_engine( - pl.GPUEngine(executor_options={"runtime": "rapidsmpf"}) - ) + config = ConfigOptions.from_polars_engine(pl.GPUEngine(executor="streaming")) assert config.cuda_stream_policy is None -@pytest.mark.parametrize( - "polars_kwargs", - [ - {"executor": "in-memory"}, - {"executor": "streaming", "executor_options": {"runtime": "tasks"}}, - ], -) -def test_cuda_stream_policy_pool_only_supported_by_rapidsmpf( - polars_kwargs: dict[str, Any], -) -> None: +def test_cuda_stream_policy_pool_in_memory_unsupported() -> None: with pytest.raises( ValueError, - match="A stream pool is only supported by the rapidsmpf runtime.", + match="A stream pool is only supported by the streaming executor.", ): ConfigOptions.from_polars_engine( pl.GPUEngine( - **polars_kwargs, + executor="in-memory", cuda_stream_policy={"pool_size": 32, "flags": "NON_BLOCKING"}, ) ) @@ -903,8 +781,3 @@ def test_dask_sink_to_directory_false_raises() -> None: ValueError, match="The dask cluster requires sink_to_directory=True" ): StreamingExecutor(cluster=Cluster.DASK, sink_to_directory=False) - - -def test_get_dask_cuda_stream() -> None: - stream = get_dask_cuda_stream() - assert stream is not None diff --git a/python/cudf_polars/tests/test_scan.py b/python/cudf_polars/tests/test_scan.py index e3e788f2866..a655efbe422 100644 --- a/python/cudf_polars/tests/test_scan.py +++ b/python/cudf_polars/tests/test_scan.py @@ -718,7 +718,7 @@ def test_scan_parquet_zero_width_with_limit( ): request.applymarker( pytest.mark.xfail( - is_streaming_engine(engine) and custom_engine is None, + is_streaming_engine(engine) or custom_engine is not None, reason="https://github.com/rapidsai/cudf/issues/21644", ) ) diff --git a/python/cudf_polars/tests/test_sink.py b/python/cudf_polars/tests/test_sink.py index 7b69f6904b4..d23559d2134 100644 --- a/python/cudf_polars/tests/test_sink.py +++ b/python/cudf_polars/tests/test_sink.py @@ -157,6 +157,7 @@ def test_chunked_sink_empty_table_to_parquet(tmp_path): pl.LazyFrame(), tmp_path / "out.parquet", engine=pl.GPUEngine( + executor="in-memory", raise_on_fail=True, parquet_options={"chunked": True, "n_output_chunks": 2}, ), diff --git a/python/cudf_polars/tests/test_tracing.py b/python/cudf_polars/tests/test_tracing.py index 184c0a77d38..283ca361682 100644 --- a/python/cudf_polars/tests/test_tracing.py +++ b/python/cudf_polars/tests/test_tracing.py @@ -55,9 +55,10 @@ def test_trace_basic( assert b"frames_input" in result assert b"total_bytes_output" in result assert b"total_bytes_input" in result - assert b"rmm_total_bytes_output" in result - assert b"rmm_total_bytes_input" in result - assert b"rmm_current_bytes_output" in result + # TODO: With rapidsmpf are the rmm fields not supposed to be logged? + assert b"rmm_total_bytes_output" not in result + assert b"rmm_total_bytes_input" not in result + assert b"rmm_current_bytes_output" not in result assert b"overhead_duration" in result @@ -79,10 +80,6 @@ def test_import_without_structlog() -> None: subprocess.check_call([sys.executable, "-c", code]) -@pytest.mark.skipif( - cudf_polars.testing.asserts.DEFAULT_RUNTIME != "rapidsmpf", - reason="Requires 'rapidsmpf' runtime.", -) def test_log_query_plan() -> None: """Test that log_query_plan emits a Query Plan event.""" import os @@ -98,7 +95,6 @@ def test_log_query_plan() -> None: executor="streaming", executor_options={ "cluster": "single", - "runtime": "rapidsmpf", "max_rows_per_partition": 5, }, memory_resource=rmm.mr.ManagedMemoryResource(), @@ -126,7 +122,6 @@ def test_log_query_plan() -> None: reason="Requires CUDF_POLARS_LOG_TRACES=1.", ) def test_sets_cudf_polars_query_id(): - pytest.importorskip("rapidsmpf") left = pl.LazyFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) right = pl.LazyFrame({"a": [1, 2, 3], "c": [7, 8, 9]}) @@ -136,7 +131,6 @@ def test_sets_cudf_polars_query_id(): engine = pl.GPUEngine( executor="streaming", raise_on_fail=True, - executor_options={"runtime": "rapidsmpf"}, ) with structlog.testing.capture_logs( diff --git a/python/cudf_polars/tests/testing/test_engine_utils.py b/python/cudf_polars/tests/testing/test_engine_utils.py index faf113502d6..346a11acf2e 100644 --- a/python/cudf_polars/tests/testing/test_engine_utils.py +++ b/python/cudf_polars/tests/testing/test_engine_utils.py @@ -3,8 +3,6 @@ from __future__ import annotations -import pytest - from cudf_polars.testing.engine_utils import ( EngineFixtureParam, create_streaming_options, @@ -30,7 +28,6 @@ def test_engine_fixture_param_small_blocksize(): def test_create_streaming_options_medium(): - pytest.importorskip("rapidsmpf") opts = create_streaming_options("medium") assert opts.max_rows_per_partition == 50 assert opts.target_partition_size == 1_000_000 @@ -38,7 +35,6 @@ def test_create_streaming_options_medium(): def test_create_streaming_options_small(): - pytest.importorskip("rapidsmpf") opts = create_streaming_options("small") assert opts.max_rows_per_partition == 4 assert opts.target_partition_size == 10 @@ -46,7 +42,6 @@ def test_create_streaming_options_small(): def test_create_streaming_options_overrides_merge(): """Overrides take precedence over the blocksize baseline.""" - pytest.importorskip("rapidsmpf") from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions overrides = StreamingOptions(max_rows_per_partition=999) From f49d5e8d4bd06480db547dfa70b367095bf99b92 Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Wed, 6 May 2026 20:00:13 -0700 Subject: [PATCH 32/36] Use thread pool to submit hybrid scan host IO tasks (#21992) This PR uses the host worker pool to submit hybrid scan's host-read IO tasks so that the mutex can be safely released after submission. Authors: - Muhammad Haseeb (https://github.com/mhaseeb123) Approvers: - Tianyu Liu (https://github.com/kingcrimsontianyu) - Shruti Shivakumar (https://github.com/shrshi) URL: https://github.com/rapidsai/cudf/pull/21992 --- cpp/src/io/parquet/io_utils/parquet_io_utils.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/cpp/src/io/parquet/io_utils/parquet_io_utils.cpp b/cpp/src/io/parquet/io_utils/parquet_io_utils.cpp index 3e67b49d03e..9b6953b4bd1 100644 --- a/cpp/src/io/parquet/io_utils/parquet_io_utils.cpp +++ b/cpp/src/io/parquet/io_utils/parquet_io_utils.cpp @@ -6,6 +6,7 @@ #include "io/comp/common.hpp" #include "io/parquet/parquet_common.hpp" +#include #include #include #include @@ -16,8 +17,8 @@ #include #include +#include #include -#include #include @@ -118,8 +119,7 @@ fetch_byte_ranges_to_device_async( stream.synchronize(); { - auto iter = - thrust::make_zip_iterator(io_offsets.begin(), io_sizes.begin(), destinations.begin()); + auto iter = cuda::make_zip_iterator(io_offsets.begin(), io_sizes.begin(), destinations.begin()); std::lock_guard lock(mutex); @@ -128,16 +128,14 @@ fetch_byte_ranges_to_device_async( auto const io_size = cuda::std::get<1>(tuple); auto const dest = cuda::std::get<2>(tuple); - // Directly read the column chunk data to the device - // buffer if supported + // Directly read the column chunk data to the device buffer if supported if (datasource.supports_device_read() and datasource.is_device_read_preferred(io_size)) { device_read_tasks.emplace_back( datasource.device_read_async(io_offset, io_size, dest, stream)); } else { - // Read the column chunk data to the host buffer and - // copy it to the device buffer - host_read_tasks.emplace_back( - std::async(std::launch::deferred, [&datasource, io_offset, io_size, dest, stream]() { + // Read the column chunk data to the host buffer copy it to the device buffer + host_read_tasks.emplace_back(cudf::detail::host_worker_pool().submit_task( + [&datasource, io_offset, io_size, dest, stream]() { auto host_buffer = datasource.host_read(io_offset, io_size); cudf::detail::cuda_memcpy_async( cudf::device_span{dest, io_size}, From 50cee5ba65909e7913f88cfa4cc95dfb3755bc3c Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Wed, 6 May 2026 23:03:11 -0700 Subject: [PATCH 33/36] Python bindings and pytests for `cudf::apply_deletion_mask` (#22145) Follow up #22144 Adds Python bindings for the `cudf::apply_deletion_mask` API and adds pytests for stream compaction. Authors: - Muhammad Haseeb (https://github.com/mhaseeb123) - Matthew Murray (https://github.com/Matt711) Approvers: - Matthew Roeschke (https://github.com/mroeschke) - Bradley Dice (https://github.com/bdice) - Matthew Murray (https://github.com/Matt711) URL: https://github.com/rapidsai/cudf/pull/22145 --- .../libcudf/lists/stream_compaction.pxd | 7 ++ .../pylibcudf/libcudf/stream_compaction.pxd | 7 ++ python/pylibcudf/pylibcudf/lists.pxd | 7 ++ python/pylibcudf/pylibcudf/lists.pyi | 6 ++ python/pylibcudf/pylibcudf/lists.pyx | 42 +++++++++++ .../pylibcudf/pylibcudf/stream_compaction.pxd | 7 ++ .../pylibcudf/pylibcudf/stream_compaction.pyi | 6 ++ .../pylibcudf/pylibcudf/stream_compaction.pyx | 36 ++++++++++ .../pylibcudf/tests/test_stream_compaction.py | 69 +++++++++++++++++++ 9 files changed, 187 insertions(+) create mode 100644 python/pylibcudf/tests/test_stream_compaction.py diff --git a/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd b/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd index 0187642e0c7..7514f9d159a 100644 --- a/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/lists/stream_compaction.pxd @@ -19,6 +19,13 @@ cdef extern from "cudf/lists/stream_compaction.hpp" \ device_async_resource_ref mr ) except +libcudf_exception_handler + cdef unique_ptr[column] apply_deletion_mask( + const lists_column_view& lists_column, + const lists_column_view& deletion_mask, + cudaStream_t stream, + device_async_resource_ref mr + ) except +libcudf_exception_handler + cdef unique_ptr[column] distinct( const lists_column_view& lists_column, null_equality nulls_equal, diff --git a/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd b/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd index 9f8686da472..9b5f6d287f3 100644 --- a/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/stream_compaction.pxd @@ -48,6 +48,13 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil: device_async_resource_ref mr ) except +libcudf_exception_handler + cdef unique_ptr[table] apply_deletion_mask( + table_view source_table, + column_view deletion_mask, + cudaStream_t stream, + device_async_resource_ref mr + ) except +libcudf_exception_handler + cdef unique_ptr[table] unique( table_view input, vector[size_type] keys, diff --git a/python/pylibcudf/pylibcudf/lists.pxd b/python/pylibcudf/pylibcudf/lists.pxd index 88b09c01531..75db812de14 100644 --- a/python/pylibcudf/pylibcudf/lists.pxd +++ b/python/pylibcudf/pylibcudf/lists.pxd @@ -150,6 +150,13 @@ cpdef Column apply_boolean_mask( DeviceMemoryResource mr=*, ) +cpdef Column apply_deletion_mask( + Column, + Column, + object stream=*, + DeviceMemoryResource mr=*, +) + cpdef Column distinct( Column, null_equality, diff --git a/python/pylibcudf/pylibcudf/lists.pyi b/python/pylibcudf/pylibcudf/lists.pyi index 1e418b59726..6ff27345854 100644 --- a/python/pylibcudf/pylibcudf/lists.pyi +++ b/python/pylibcudf/pylibcudf/lists.pyi @@ -131,6 +131,12 @@ def apply_boolean_mask( stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Column: ... +def apply_deletion_mask( + input: Column, + deletion_mask: Column, + stream: CudaStreamLike | None = None, + mr: DeviceMemoryResource | None = None, +) -> Column: ... def distinct( input: Column, nulls_equal: NullEquality, diff --git a/python/pylibcudf/pylibcudf/lists.pyx b/python/pylibcudf/pylibcudf/lists.pyx index fd05242e44f..fbc07eebb8a 100644 --- a/python/pylibcudf/pylibcudf/lists.pyx +++ b/python/pylibcudf/pylibcudf/lists.pyx @@ -32,6 +32,7 @@ from pylibcudf.libcudf.lists.sorting cimport ( ) from pylibcudf.libcudf.lists.stream_compaction cimport ( apply_boolean_mask as cpp_apply_boolean_mask, + apply_deletion_mask as cpp_apply_deletion_mask, distinct as cpp_distinct, ) from pylibcudf.libcudf.stream_compaction cimport duplicate_keep_option @@ -61,6 +62,7 @@ __all__ = [ "ConcatenateNullPolicy", "DuplicateFindOption", "apply_boolean_mask", + "apply_deletion_mask", "concatenate_list_elements", "concatenate_rows", "contains", @@ -833,6 +835,46 @@ cpdef Column apply_boolean_mask( return Column.from_libcudf(move(c_result), _stream, mr) +cpdef Column apply_deletion_mask( + Column input, + Column deletion_mask, + object stream=None, + DeviceMemoryResource mr=None, +): + """Filters elements in each row of the input lists column using a deletion mask. + + For details, see :cpp:func:`apply_deletion_mask`. + + Parameters + ---------- + input : Column + The input lists column. + deletion_mask : Column + A lists-of-bools column used as a deletion mask. + + Returns + ------- + Column + Lists column with elements removed where deletion_mask is true. + """ + cdef unique_ptr[column] c_result + cdef ListsColumnView list_view = input.list_view() + cdef ListsColumnView mask_view = deletion_mask.list_view() + + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() + mr = _get_memory_resource(mr) + + with nogil: + c_result = cpp_apply_deletion_mask( + list_view.view(), + mask_view.view(), + _cs, + mr.get_mr(), + ) + return Column.from_libcudf(move(c_result), _stream, mr) + + cpdef Column distinct( Column input, null_equality nulls_equal, diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pxd b/python/pylibcudf/pylibcudf/stream_compaction.pxd index 6e904e11ce1..ffe36cebfbd 100644 --- a/python/pylibcudf/pylibcudf/stream_compaction.pxd +++ b/python/pylibcudf/pylibcudf/stream_compaction.pxd @@ -37,6 +37,13 @@ cpdef Table apply_boolean_mask( DeviceMemoryResource mr = *, ) +cpdef Table apply_deletion_mask( + Table source_table, + Column deletion_mask, + object stream = *, + DeviceMemoryResource mr = *, +) + cpdef Table unique( Table input, list keys, diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pyi b/python/pylibcudf/pylibcudf/stream_compaction.pyi index afdd692dde2..76e669f8995 100644 --- a/python/pylibcudf/pylibcudf/stream_compaction.pyi +++ b/python/pylibcudf/pylibcudf/stream_compaction.pyi @@ -37,6 +37,12 @@ def apply_boolean_mask( stream: CudaStreamLike | None = None, mr: DeviceMemoryResource | None = None, ) -> Table: ... +def apply_deletion_mask( + source_table: Table, + deletion_mask: Column, + stream: CudaStreamLike | None = None, + mr: DeviceMemoryResource | None = None, +) -> Table: ... def unique( input: Table, keys: list[int], diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pyx b/python/pylibcudf/pylibcudf/stream_compaction.pyx index b4751078acb..2fe8705ea52 100644 --- a/python/pylibcudf/pylibcudf/stream_compaction.pyx +++ b/python/pylibcudf/pylibcudf/stream_compaction.pyx @@ -29,6 +29,7 @@ from cuda.bindings.cyruntime cimport cudaStream_t __all__ = [ "DuplicateKeepOption", "apply_boolean_mask", + "apply_deletion_mask", "distinct", "distinct_indices", "drop_nans", @@ -151,6 +152,41 @@ cpdef Table apply_boolean_mask( return Table.from_libcudf(move(c_result), _stream, mr) +cpdef Table apply_deletion_mask( + Table source_table, + Column deletion_mask, + object stream=None, + DeviceMemoryResource mr=None, +): + """Filters out rows from the input table using a deletion mask. + + For details, see :cpp:func:`apply_deletion_mask`. + + Parameters + ---------- + source_table : Table + The input table to filter. + deletion_mask : Column + A boolean column used as a deletion mask. + + Returns + ------- + Table + Table with rows removed where deletion_mask is true. + """ + cdef unique_ptr[table] c_result + + cdef Stream _stream = _get_stream(stream) + cdef cudaStream_t _cs = _stream.view().value() + mr = _get_memory_resource(mr) + + with nogil: + c_result = cpp_stream_compaction.apply_deletion_mask( + source_table.view(), deletion_mask.view(), _cs, mr.get_mr() + ) + return Table.from_libcudf(move(c_result), _stream, mr) + + cpdef Table unique( Table input, list keys, diff --git a/python/pylibcudf/tests/test_stream_compaction.py b/python/pylibcudf/tests/test_stream_compaction.py new file mode 100644 index 00000000000..ccf21c2a6b3 --- /dev/null +++ b/python/pylibcudf/tests/test_stream_compaction.py @@ -0,0 +1,69 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 + +import pyarrow as pa +import pytest +from utils import assert_column_eq, assert_table_eq + +import pylibcudf as plc + + +@pytest.fixture +def lists_column_and_mask(): + pa_input = pa.array( + [[0, 1, 2, 3], [4, 5], [6, 7, 8, 9]], type=pa.list_(pa.int32()) + ) + pa_mask = pa.array( + [ + [True, False, True, False], + [True, False], + [True, False, True, False], + ], + type=pa.list_(pa.bool_()), + ) + return pa_input, pa_mask + + +def test_lists_apply_boolean_mask(lists_column_and_mask): + pa_input, pa_mask = lists_column_and_mask + result = plc.lists.apply_boolean_mask( + plc.Column.from_arrow(pa_input), plc.Column.from_arrow(pa_mask) + ) + expected = pa.array([[0, 2], [4], [6, 8]], type=pa.list_(pa.int32())) + assert_column_eq(expected, result) + + +def test_lists_apply_deletion_mask(lists_column_and_mask): + pa_input, pa_mask = lists_column_and_mask + result = plc.lists.apply_deletion_mask( + plc.Column.from_arrow(pa_input), plc.Column.from_arrow(pa_mask) + ) + expected = pa.array([[1, 3], [5], [7, 9]], type=pa.list_(pa.int32())) + assert_column_eq(expected, result) + + +def test_apply_boolean_mask(): + pa_table = pa.table( + { + "a": pa.array([10, 40, 70, 5, 2, 10], type=pa.int32()), + "b": pa.array([10, 40, 70, 5, 2, 10], type=pa.float64()), + } + ) + pa_mask = pa.array( + [True, False, True, False, True, False], type=pa.bool_() + ) + result = plc.stream_compaction.apply_boolean_mask( + plc.Table.from_arrow(pa_table), plc.Column.from_arrow(pa_mask) + ) + expected = pa_table.filter(pa_mask) + assert_table_eq(expected, result) + + +def test_apply_deletion_mask(): + pa_table = pa.table({"a": pa.array([1, 2, 3, 4, 5], type=pa.int32())}) + pa_mask = pa.array([True, False, True, False, True], type=pa.bool_()) + result = plc.stream_compaction.apply_deletion_mask( + plc.Table.from_arrow(pa_table), plc.Column.from_arrow(pa_mask) + ) + expected = pa.table({"a": pa.array([2, 4], type=pa.int32())}) + assert_table_eq(expected, result) From 47b699df1f49a19de48455d0986bad67c3e46c73 Mon Sep 17 00:00:00 2001 From: "Richard (Rick) Zamora" Date: Thu, 7 May 2026 08:52:41 -0500 Subject: [PATCH 34/36] Refactor ``sort_actor`` to prepare for ``OrderScheme`` changes (#22350) - Follow up to https://github.com/rapidsai/cudf/pull/22315 - Further revises `sort_actor` in preparation for https://github.com/rapidsai/rapidsmpf/pull/853 - Part of https://github.com/rapidsai/cudf/issues/22128 - Breaks apart `sort_actor` logic into modular steps, so we can avoid collecting boundaries when we already know the boundaries (future work). Authors: - Richard (Rick) Zamora (https://github.com/rjzamora) Approvers: - Matthew Murray (https://github.com/Matt711) - Matthew Roeschke (https://github.com/mroeschke) URL: https://github.com/rapidsai/cudf/pull/22350 --- .../rapidsmpf/collectives/sort.py | 129 +++++++++++++----- 1 file changed, 94 insertions(+), 35 deletions(-) diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/collectives/sort.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/collectives/sort.py index ffc10ea44c2..a950df3ce34 100644 --- a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/collectives/sort.py +++ b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/collectives/sort.py @@ -337,16 +337,25 @@ async def _receive_and_buffer_chunks( return local_candidates_list +async def _forward_from_chunk_store( + context: Context, ch_out: Channel[TableChunk], chunk_store: ChunkStore +) -> None: + """Forward buffered messages from a ChunkStore into a channel.""" + for msg in chunk_store: + await ch_out.send(context, msg) + await ch_out.drain(context) + + async def _insert_chunks_into_shuffle( context: Context, comm: Communicator, + ir: Sort, + ir_context: IRExecutionContext, + ch_in: Channel[TableChunk], num_partitions: int, collective_ids: list[int], metadata_in: ChannelMetadata, - chunk_store: ChunkStore, sort_boundaries_df: DataFrame, - ir: Sort, - ir_context: IRExecutionContext, by: list[str], ) -> tuple[ShuffleManager, Sort]: """Create shuffle manager and insert each buffered chunk with sort-based splits.""" @@ -364,7 +373,7 @@ async def _insert_chunks_into_shuffle( partition_assignment=PartitionAssignment.CONTIGUOUS, ) async with shuffle.inserting() as inserter: - for msg in chunk_store: + while (msg := await ch_in.recv(context)) is not None: if skip_insert: continue seq_num = msg.sequence_number @@ -379,6 +388,8 @@ async def _insert_chunks_into_shuffle( upstreams=(available_chunk.stream, sort_boundaries_df.stream), ) + # TODO: Pre-sort chunks if they do not originate from the ChunkStore. + # (Not possible until we use _global_sort outside of sort_actor.) splits = find_sort_splits( sort_cols_tbl, sort_boundaries_df.table, @@ -453,6 +464,52 @@ async def _extract_partitions_and_send( await ch_out.drain(context) +async def _global_sort( + context: Context, + comm: Communicator, + ir: Sort, + ir_context: IRExecutionContext, + ch_out: Channel[TableChunk], + ch_in: Channel[TableChunk], + metadata_in: ChannelMetadata, + by: list[str], + num_partitions: int, + sort_boundaries_df: DataFrame, + collective_ids: list[int], + *, + tracer: ActorTracer | None, +) -> None: + """Global sort.""" + # TODO: Attach OrderScheme metadata here. + output_metadata = ChannelMetadata( + local_count=max(1, num_partitions // comm.nranks), + partitioning=Partitioning(inter_rank=None, local="inherit"), + ) + await send_metadata(ch_out, context, output_metadata) + + shuffle, post_sort_ir = await _insert_chunks_into_shuffle( + context, + comm, + ir, + ir_context, + ch_in, + num_partitions, + collective_ids, + metadata_in, + sort_boundaries_df, + by, + ) + await _extract_partitions_and_send( + context, + ch_out, + shuffle, + post_sort_ir, + ir_context, + ir.schema, + tracer=tracer, + ) + + @define_actor() async def sort_actor( context: Context, @@ -467,10 +524,18 @@ async def sort_actor( collective_ids: list[int], ) -> None: """Streaming sort actor.""" - ch_replay = context.create_channel() + ch_sample_replay = context.create_channel() + ch_chunk_store = context.create_channel() async with shutdown_on_error( - context, ch_in, ch_out, ch_replay, trace_ir=ir, ir_context=ir_context + context, + ch_in, + ch_out, + ch_sample_replay, + ch_chunk_store, + trace_ir=ir, + ir_context=ir_context, ) as tracer: + # TODO: Skip sort if OrderScheme metadata is present and compatible. metadata_in = await recv_metadata(ch_in, context) if ir.zlice is not None: @@ -494,20 +559,19 @@ async def sort_actor( context, comm, ch_in, num_partitions, metadata_in, executor, collective_ids ) - output_metadata = ChannelMetadata( - local_count=max(1, num_partitions // comm.nranks), - partitioning=Partitioning(inter_rank=None, local="inherit"), - ) - await send_metadata(ch_out, context, output_metadata) - chunk_store = ChunkStore(context) _, local_candidates_list = await gather_in_task_group( replay_buffered_channel( - context, ch_replay, ch_in, sampled_chunks, metadata_in, trace_ir=ir + context, + ch_sample_replay, + ch_in, + sampled_chunks, + metadata_in, + trace_ir=ir, ), _receive_and_buffer_chunks( context, - ch_replay, + ch_sample_replay, chunk_store, ir, by, @@ -529,27 +593,22 @@ async def sort_actor( collective_ids.pop() if need_allgather else None, ) - shuffle, post_sort_ir = await _insert_chunks_into_shuffle( - context, - comm, - num_partitions, - collective_ids, - metadata_in, - chunk_store, - sort_boundaries_df, - ir, - ir_context, - by, - ) - - await _extract_partitions_and_send( - context, - ch_out, - shuffle, - post_sort_ir, - ir_context, - ir.schema, - tracer=tracer, + await gather_in_task_group( + _forward_from_chunk_store(context, ch_chunk_store, chunk_store), + _global_sort( + context, + comm, + ir, + ir_context, + ch_out, + ch_chunk_store, + metadata_in, + by, + num_partitions, + sort_boundaries_df, + collective_ids, + tracer=tracer, + ), ) From 996eb35f8a6775d1577c979f2c4b1385b958ad5b Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Thu, 7 May 2026 21:07:24 +0200 Subject: [PATCH 35/36] Run the cudf-polars test suite against `DaskEngine` and `RayEngine` (#22381) Builds on the cached `streaming_engines` fixture from #22364, which amortizes SPMD bootstrap via `_reset()`, and extends the same pattern to Dask and Ray. With this change, the test matrix runs against: `["in-memory", "spmd", "spmd-small", "dask", "ray"]` subject to package availability and `rrun` gating. We might change the different setups later, but for now CI runs: | Engine | Block Size(s) | GPU Configuration | |----------------|-----------------------|-------------------| | `SPMDEngine` | `"medium"`, `"small"` | Single GPU | | `DaskEngine` | `"medium"` | Single GPU | | `RayEngine` | `"medium"` | Two GPUs | Authors: - Mads R. B. Kristensen (https://github.com/madsbk) - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - Matthew Murray (https://github.com/Matt711) - Bradley Dice (https://github.com/bdice) - Peter Andreas Entschev (https://github.com/pentschev) - Matthew Roeschke (https://github.com/mroeschke) URL: https://github.com/rapidsai/cudf/pull/22381 --- .github/workflows/pr.yaml | 89 ++++++++++--------- .github/workflows/test.yaml | 1 + ci/run_cudf_polars_experimental_pytests.sh | 2 +- ci/test_cudf_polars_experimental.sh | 2 +- dependencies.yaml | 13 +++ .../cudf_polars/experimental/join.py | 20 +++-- .../cudf_polars/testing/engine_utils.py | 43 +++++++++ python/cudf_polars/pyproject.toml | 3 + python/cudf_polars/tests/conftest.py | 72 +++++++++++++-- .../experimental/test_all_gather_host_data.py | 2 - .../tests/experimental/test_dataframescan.py | 21 ++--- .../tests/experimental/test_filter.py | 9 +- .../tests/experimental/test_groupby.py | 8 +- .../tests/experimental/test_io_multirank.py | 48 ++-------- .../tests/experimental/test_join.py | 52 ++++++----- .../tests/experimental/test_metadata.py | 22 +++-- .../tests/experimental/test_parallel.py | 4 +- .../tests/experimental/test_rolling.py | 13 ++- .../tests/experimental/test_select.py | 25 ++++-- .../tests/experimental/test_spilling.py | 8 +- .../tests/experimental/test_statistics.py | 53 ++--------- .../tests/experimental/test_unique.py | 13 +-- 22 files changed, 310 insertions(+), 213 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index a7ef36049b0..f373953d5f1 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -470,50 +470,51 @@ jobs: with: build_type: pull-request script: ci/test_wheel_cudf.sh - # wheel-build-cudf-polars: - # needs: wheel-build-pylibcudf - # secrets: inherit - # uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main - # with: - # # This selects "ARCH=amd64 + the latest supported Python + CUDA". - # matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) - # build_type: pull-request - # node_type: cpu8 - # script: "ci/build_wheel_cudf_polars.sh" - # package-name: cudf_polars - # package-type: python - # pure-wheel: true - # wheel-tests-cudf-polars: - # needs: [wheel-build-cudf-polars, changed-files] - # secrets: inherit - # uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main - # if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_wheels && fromJSON(needs.changed-files.outputs.changed_file_groups).neither_cudf_nor_dask_cudf - # with: - # # This selects "ARCH=amd64 + the latest supported Python + CUDA". - # matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) - # build_type: pull-request - # script: "ci/test_wheel_cudf_polars.sh" - # wheel-tests-cudf-polars-with-rapidsmpf: - # needs: [wheel-build-cudf-polars, changed-files] - # secrets: inherit - # uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main - # if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_wheels && fromJSON(needs.changed-files.outputs.changed_file_groups).neither_cudf_nor_dask_cudf - # with: - # # This selects "ARCH=amd64 + the latest supported Python + CUDA" to minimize CI usage. - # # (rapidsmpf compatibility already validated in rapidsmpf CI) - # matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) - # build_type: pull-request - # script: "ci/test_cudf_polars_experimental.sh" - # cudf-polars-polars-tests: - # needs: [wheel-build-cudf-polars, changed-files] - # secrets: inherit - # uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main - # if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_wheels && fromJSON(needs.changed-files.outputs.changed_file_groups).neither_cudf_nor_dask_cudf - # with: - # # This selects "ARCH=amd64 + the latest supported Python + CUDA". - # matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) - # build_type: pull-request - # script: "ci/test_cudf_polars_polars_tests.sh" + wheel-build-cudf-polars: + needs: wheel-build-pylibcudf + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main + with: + # This selects "ARCH=amd64 + the latest supported Python + CUDA". + matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) + build_type: pull-request + node_type: cpu8 + script: "ci/build_wheel_cudf_polars.sh" + package-name: cudf_polars + package-type: python + pure-wheel: true + wheel-tests-cudf-polars: + needs: [wheel-build-cudf-polars, changed-files] + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main + if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_wheels && fromJSON(needs.changed-files.outputs.changed_file_groups).neither_cudf_nor_dask_cudf + with: + # This selects "ARCH=amd64 + the latest supported Python + CUDA". + matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) + build_type: pull-request + script: "ci/test_wheel_cudf_polars.sh" + wheel-tests-cudf-polars-with-rapidsmpf: + needs: [wheel-build-cudf-polars, changed-files] + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main + if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_wheels && fromJSON(needs.changed-files.outputs.changed_file_groups).neither_cudf_nor_dask_cudf + with: + # This selects "ARCH=amd64 + the latest supported Python + CUDA" to minimize CI usage. + # (rapidsmpf compatibility already validated in rapidsmpf CI) + matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) + build_type: pull-request + container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" + script: "ci/test_cudf_polars_experimental.sh" + cudf-polars-polars-tests: + needs: [wheel-build-cudf-polars, changed-files] + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main + if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_wheels && fromJSON(needs.changed-files.outputs.changed_file_groups).neither_cudf_nor_dask_cudf + with: + # This selects "ARCH=amd64 + the latest supported Python + CUDA". + matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) + build_type: pull-request + script: "ci/test_cudf_polars_polars_tests.sh" wheel-build-dask-cudf: needs: wheel-build-cudf secrets: inherit diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index b4977f60def..a6b0b6f3326 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -174,6 +174,7 @@ jobs: matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) build_type: ${{ inputs.build_type }} branch: ${{ inputs.branch }} + container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" date: ${{ inputs.date }} sha: ${{ inputs.sha }} script: "ci/test_cudf_polars_experimental.sh" diff --git a/ci/run_cudf_polars_experimental_pytests.sh b/ci/run_cudf_polars_experimental_pytests.sh index d0a4767bd99..da659c7b386 100755 --- a/ci/run_cudf_polars_experimental_pytests.sh +++ b/ci/run_cudf_polars_experimental_pytests.sh @@ -10,5 +10,5 @@ set -euo pipefail # Support invoking outside the script directory cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cudf_polars/ -echo "Running the full cudf-polars test suite with both the in-memory and spmd engine" +echo "Running the full cudf-polars test suite" python -m pytest --cache-clear "$@" tests diff --git a/ci/test_cudf_polars_experimental.sh b/ci/test_cudf_polars_experimental.sh index aa3abd66254..4b796ff4b94 100755 --- a/ci/test_cudf_polars_experimental.sh +++ b/ci/test_cudf_polars_experimental.sh @@ -28,7 +28,7 @@ rapids-pip-retry install \ -v \ --prefer-binary \ --constraint "${PIP_CONSTRAINT}" \ - "$(echo "${CUDF_POLARS_WHEELHOUSE}"/cudf_polars_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test,experimental]" \ + "$(echo "${CUDF_POLARS_WHEELHOUSE}"/cudf_polars_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test,experimental,ray]" \ "$(echo "${LIBCUDF_WHEELHOUSE}"/libcudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" \ "$(echo "${PYLIBCUDF_WHEELHOUSE}"/pylibcudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" diff --git a/dependencies.yaml b/dependencies.yaml index af7dfea460f..9701fb3733b 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -384,6 +384,14 @@ files: key: experimental includes: - run_cudf_polars_experimental + py_run_cudf_polars_ray: + output: pyproject + pyproject_dir: python/cudf_polars + extras: + table: project.optional-dependencies + key: ray + includes: + - depends_on_ray py_test_cudf_polars: output: pyproject pyproject_dir: python/cudf_polars @@ -1290,6 +1298,11 @@ dependencies: - matrix: packages: - *rapidsmpf_unsuffixed + depends_on_ray: + common: + - output_types: [conda, requirements, pyproject] + packages: + - ray>=2.55.1 depends_on_rapids_logger: common: - output_types: [conda, requirements, pyproject] diff --git a/python/cudf_polars/cudf_polars/experimental/join.py b/python/cudf_polars/cudf_polars/experimental/join.py index cd5c514b45a..1682762c9e8 100644 --- a/python/cudf_polars/cudf_polars/experimental/join.py +++ b/python/cudf_polars/cudf_polars/experimental/join.py @@ -164,20 +164,22 @@ def _( left, pi_left = rec(left) right, pi_right = rec(right) - # Fallback to single partition on the smaller table + # Fallback to single partition on the smaller table whenever either + # side has more than one partition. left_count = pi_left[left].count right_count = pi_right[right].count output_count = max(left_count, right_count) - fallback_msg = "ConditionalJoin not supported for multiple partitions." - if left_count < right_count: - if left_count > 1 or dynamic_planning: + if output_count > 1 or dynamic_planning: + if left_count < right_count: left = Repartition(left.schema, left) pi_left[left] = PartitionInfo(count=1) - _fallback_inform(fallback_msg, config_options) - elif right_count > 1 or dynamic_planning: - right = Repartition(right.schema, right) - pi_right[right] = PartitionInfo(count=1) - _fallback_inform(fallback_msg, config_options) + else: + right = Repartition(right.schema, right) + pi_right[right] = PartitionInfo(count=1) + _fallback_inform( + "ConditionalJoin not supported for multiple partitions.", + config_options, + ) # Reconstruct and return new_node = ir.reconstruct([left, right]) diff --git a/python/cudf_polars/cudf_polars/testing/engine_utils.py b/python/cudf_polars/cudf_polars/testing/engine_utils.py index c36bcf2ed27..b0b640615f7 100644 --- a/python/cudf_polars/cudf_polars/testing/engine_utils.py +++ b/python/cudf_polars/cudf_polars/testing/engine_utils.py @@ -11,6 +11,7 @@ if TYPE_CHECKING: from collections.abc import Mapping + from contextlib import AbstractContextManager import polars as pl @@ -21,6 +22,15 @@ STREAMING_ENGINE_FIXTURE_PARAMS: list[str] = [] if importlib.util.find_spec("rapidsmpf") is not None: STREAMING_ENGINE_FIXTURE_PARAMS.extend(["spmd", "spmd-small"]) + # ``DaskEngine`` and ``RayEngine`` both reject construction inside an + # ``rrun`` cluster. + from rapidsmpf.bootstrap import is_running_with_rrun as _is_running_with_rrun + + if not _is_running_with_rrun(): # pragma: no cover + if importlib.util.find_spec("distributed") is not None: + STREAMING_ENGINE_FIXTURE_PARAMS.append("dask") + if importlib.util.find_spec("ray") is not None: + STREAMING_ENGINE_FIXTURE_PARAMS.append("ray") ALL_ENGINE_FIXTURE_PARAMS = ["in-memory", *STREAMING_ENGINE_FIXTURE_PARAMS] @@ -63,6 +73,34 @@ def is_streaming_engine(obj: Any) -> bool: return isinstance(obj, StreamingEngine) +def warns_on_spmd( # pragma: no cover; rapidsmpf-only path + engine: Any, + *args: Any, + when: bool = True, + **kwargs: Any, +) -> AbstractContextManager[Any]: + """ + ``pytest.warns(*args, **kwargs)`` on SPMD; ``nullcontext`` otherwise. + + ``pytest.warns`` only captures warnings emitted in the test process. On + multi-process backends (``DaskEngine``, ``RayEngine``) the fallback + warning fires on workers/actors and only appears in worker logs/stdout, + so the assertion is replaced with a passthrough on those backends. + + The optional ``when`` kwarg lets callers compose an additional gate (e.g. + a parametrize value) without an outer ``if``. + """ + import contextlib + + import pytest + + from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine + + if when and isinstance(engine, SPMDEngine): + return pytest.warns(*args, **kwargs) + return contextlib.nullcontext() + + def create_streaming_options( blocksize_mode: Literal["medium", "small"], overrides: StreamingOptions | None = None, @@ -87,6 +125,9 @@ def create_streaming_options( from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions from cudf_polars.utils.config import StreamingFallbackMode + # ``allow_gpu_sharing=True`` is always set so the cached multi-rank + # engines (Dask workers, Ray actors with ``num_ranks > 1``) don't trip + # the UUID-collision guard on every ``_reset(...)``. match blocksize_mode: case "medium": baseline = StreamingOptions( @@ -94,6 +135,7 @@ def create_streaming_options( dynamic_planning={}, target_partition_size=1_000_000, raise_on_fail=True, + allow_gpu_sharing=True, ) case "small": baseline = StreamingOptions( @@ -102,6 +144,7 @@ def create_streaming_options( target_partition_size=10, raise_on_fail=True, fallback_mode=StreamingFallbackMode.SILENT, + allow_gpu_sharing=True, ) case _: # pragma: no cover raise ValueError(f"Unknown blocksize_mode: {blocksize_mode!r}") diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml index 47633e42364..7703cad7dad 100644 --- a/python/cudf_polars/pyproject.toml +++ b/python/cudf_polars/pyproject.toml @@ -63,6 +63,9 @@ rapidsmpf = [ "pyarrow>=19.0.0,<24", "rapidsmpf==26.6.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +ray = [ + "ray>=2.55.1", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project.urls] Homepage = "https://github.com/rapidsai/cudf" diff --git a/python/cudf_polars/tests/conftest.py b/python/cudf_polars/tests/conftest.py index b3d83b36d36..65445b683ae 100644 --- a/python/cudf_polars/tests/conftest.py +++ b/python/cudf_polars/tests/conftest.py @@ -31,6 +31,12 @@ StreamingEngines: TypeAlias = Mapping[str, StreamingEngine] +# Number of ranks for multi-rank streaming engines that share one GPU +# (currently ``RayEngine``). Single-GPU dev hosts and CI runners require +# ``allow_gpu_sharing=True`` to oversubscribe one device across actors. +NUM_RANKS = 2 + + @pytest.fixture(params=[False, True], ids=["no_nulls", "nulls"], scope="session") def with_nulls(request): return request.param @@ -89,6 +95,27 @@ def streaming_engines() -> Generator[StreamingEngines, None, None]: ) engines: dict[str, StreamingEngine] = {"spmd": SPMDEngine(comm=comm)} + + if "dask" in STREAMING_ENGINE_FIXTURE_PARAMS: # pragma: no cover + from cudf_polars.experimental.rapidsmpf.frontend.dask import DaskEngine + + engines["dask"] = DaskEngine(engine_options={"allow_gpu_sharing": True}) + + if "ray" in STREAMING_ENGINE_FIXTURE_PARAMS: # pragma: no cover + from cudf_polars.experimental.rapidsmpf.frontend.ray import RayEngine + + # Always pin ``num_ranks`` so the cached engine has a deterministic + # actor count regardless of how many GPUs the host happens to have; + # otherwise ``RayEngine`` defaults to ``get_num_gpus_in_ray_cluster()`` + # and tests that depend on rank-count behavior (e.g. fast-count + # parquet, concat) become non-portable. Pinning ``num_ranks`` requires + # ``allow_gpu_sharing=True`` (production guard). + engines["ray"] = RayEngine( + num_ranks=NUM_RANKS, + engine_options={"allow_gpu_sharing": True}, + ray_init_options={"include_dashboard": False}, + ) + try: yield engines finally: @@ -108,6 +135,28 @@ def spmd_engine(streaming_engines: StreamingEngines) -> SPMDEngine: return engine +@pytest.fixture +def spmd_engine_factory( + streaming_engines: StreamingEngines, +) -> Callable[..., SPMDEngine]: + """ + Return a factory that yields the shared :class:`SPMDEngine`. + + Use this in place of :func:`streaming_engine_factory` for tests that + must run on SPMD only. + """ + from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine + + param = EngineFixtureParam(full_name="spmd") + + def factory(options: StreamingOptions | None = None) -> SPMDEngine: + engine = build_streaming_engine(param, streaming_engines, options) + assert isinstance(engine, SPMDEngine) + return engine + + return factory + + @pytest.fixture(params=STREAMING_ENGINE_FIXTURE_PARAMS) def _streaming_engine_param(request: pytest.FixtureRequest) -> EngineFixtureParam: """Parametrization helper to run tests for each streaming engine variant.""" @@ -246,10 +295,9 @@ def pytest_configure(config): config.addinivalue_line( "markers", - "skip_on_streaming_engine(reason): skip the test for streaming " - '``engine`` variants (e.g. ``"spmd"``, ``"spmd-small"``) while ' - "still letting the in-memory variant run. Use this to track features " - "that have no multi-partition implementation", + "skip_on_streaming_engine(reason, *, engine=None): skip the test for " + 'streaming ``engine`` variants (e.g. ``"spmd"``, ``"spmd-small"``, ' + '``"dask"``, ``"ray"``) while still allowing the in-memory variant to run.', ) # Ray's internal subprocess management leaks `/dev/null` file handles, and @@ -275,9 +323,23 @@ def pytest_collection_modifyitems(items): callspec = getattr(item, "callspec", None) if callspec is None: continue - engine_param = callspec.params.get("_all_engine_param") + # Tests bind to either ``engine`` (parametrized via ``_all_engine_param``) + # or ``streaming_engine`` / ``streaming_engine_factory`` (parametrized via + # ``_streaming_engine_param``). Check both. + engine_param = callspec.params.get("_all_engine_param") or callspec.params.get( + "_streaming_engine_param" + ) if engine_param is None or engine_param == "in-memory": continue + engine_filter = marker.kwargs.get("engine") + if engine_filter is not None: + if isinstance(engine_filter, str): + engine_filter = (engine_filter,) + # Strip the ``-small`` suffix so ``"spmd-small"`` matches + # ``engine=("spmd",)``. + engine_name = engine_param.removesuffix("-small") + if engine_name not in engine_filter: + continue reason = ( marker.args[0] if marker.args diff --git a/python/cudf_polars/tests/experimental/test_all_gather_host_data.py b/python/cudf_polars/tests/experimental/test_all_gather_host_data.py index 8f09a82c4bd..c85598a8c64 100644 --- a/python/cudf_polars/tests/experimental/test_all_gather_host_data.py +++ b/python/cudf_polars/tests/experimental/test_all_gather_host_data.py @@ -59,8 +59,6 @@ def test_gather_cluster_info(streaming_engine) -> None: assert isinstance(info.gpu_uuid, str) # Each rank runs in its own process. assert len({info.pid for info in infos}) == streaming_engine.nranks - # Without allow_gpu_sharing, all UUIDs must be unique (enforced at init). - assert len({info.gpu_uuid for info in infos}) == streaming_engine.nranks def test_cluster_info_cuda_visible_devices(monkeypatch) -> None: diff --git a/python/cudf_polars/tests/experimental/test_dataframescan.py b/python/cudf_polars/tests/experimental/test_dataframescan.py index dbf22848824..fb263e20b94 100644 --- a/python/cudf_polars/tests/experimental/test_dataframescan.py +++ b/python/cudf_polars/tests/experimental/test_dataframescan.py @@ -60,19 +60,20 @@ def test_parallel_dataframescan(df, streaming_engine_factory, max_rows_per_parti assert count == 1 -@pytest.mark.xfail( - reason=( - "Multi-rank Union interleaves child outputs across ranks: client " - "receives [rank0_A, rank0_B, rank1_A, rank1_B] instead of the " - "polars-CPU [A, B]. Tracked in " - "https://github.com/rapidsai/cudf/issues/22376." - ), - strict=False, -) -def test_dataframescan_concat(df, streaming_engine_factory): +def test_dataframescan_concat(request, df, streaming_engine_factory): streaming_engine = streaming_engine_factory( StreamingOptions(max_rows_per_partition=1_000), ) + if streaming_engine.nranks > 1: + # Multi-rank Union interleaves child outputs across ranks: client + # receives [rank0_A, rank0_B, rank1_A, rank1_B] instead of the + # polars-CPU [A, B]. + request.applymarker( + pytest.mark.xfail( + reason="https://github.com/rapidsai/cudf/issues/22376", + strict=False, + ) + ) df2 = pl.concat([df, df]) assert_gpu_result_equal(df2, engine=streaming_engine) diff --git a/python/cudf_polars/tests/experimental/test_filter.py b/python/cudf_polars/tests/experimental/test_filter.py index 4fb11df691c..b8b4fb2749c 100644 --- a/python/cudf_polars/tests/experimental/test_filter.py +++ b/python/cudf_polars/tests/experimental/test_filter.py @@ -9,12 +9,11 @@ from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions from cudf_polars.testing.asserts import assert_gpu_result_equal +from cudf_polars.testing.engine_utils import warns_on_spmd @pytest.fixture def engine(streaming_engine_factory): - # ``fallback_mode="warn"`` overrides the small-blocksize baseline (which - # sets SILENT) so ``test_filter_non_pointwise`` can assert on the warning. return streaming_engine_factory( StreamingOptions(max_rows_per_partition=3, fallback_mode="warn"), ) @@ -38,7 +37,9 @@ def test_filter_pointwise(df, engine): def test_filter_non_pointwise(df, engine): query = df.filter(pl.col("a") > pl.col("a").max()) - with pytest.warns( - UserWarning, match="This filter is not supported for multiple partitions." + with warns_on_spmd( + engine, + UserWarning, + match="This filter is not supported for multiple partitions.", ): assert_gpu_result_equal(query, engine=engine) diff --git a/python/cudf_polars/tests/experimental/test_groupby.py b/python/cudf_polars/tests/experimental/test_groupby.py index 03d87fe23e9..6ca11387da0 100644 --- a/python/cudf_polars/tests/experimental/test_groupby.py +++ b/python/cudf_polars/tests/experimental/test_groupby.py @@ -131,8 +131,8 @@ def test_groupby_std_var_ddof(df, engine, agg, ddof): @pytest.mark.parametrize("fallback_mode", ["silent", "raise", "warn", "foo"]) -def test_groupby_fallback(df, fallback_mode, streaming_engine_factory): - streaming_engine = streaming_engine_factory( +def test_groupby_fallback(df, fallback_mode, spmd_engine_factory): + streaming_engine = spmd_engine_factory( StreamingOptions(fallback_mode=fallback_mode), ) match = "Failed to decompose groupby aggs" @@ -287,6 +287,10 @@ def test_groupby_count_type_mismatch(df, streaming_engine_factory): assert_gpu_result_equal(q, engine=streaming_engine, check_row_order=False) +@pytest.mark.skip_on_streaming_engine( + "patch.object on ShuffleManager.Inserter doesn't reach worker processes", + engine=("dask", "ray"), +) def test_shuffle_reduce_insert_finished_called_on_oom(streaming_engine_factory): streaming_engine = streaming_engine_factory( StreamingOptions(target_partition_size=10, max_rows_per_partition=5), diff --git a/python/cudf_polars/tests/experimental/test_io_multirank.py b/python/cudf_polars/tests/experimental/test_io_multirank.py index 2208cc67316..bf9e8e70343 100644 --- a/python/cudf_polars/tests/experimental/test_io_multirank.py +++ b/python/cudf_polars/tests/experimental/test_io_multirank.py @@ -7,16 +7,15 @@ from typing import TYPE_CHECKING import pytest -from rapidsmpf.bootstrap import is_running_with_rrun import polars as pl -from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine +from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions from cudf_polars.testing.asserts import assert_sink_result_equal from cudf_polars.utils.config import Cluster, StreamingExecutor if TYPE_CHECKING: - from collections.abc import Iterator + from collections.abc import Callable from pathlib import Path from cudf_polars.experimental.rapidsmpf.frontend.core import StreamingEngine @@ -39,43 +38,14 @@ def df() -> pl.LazyFrame: ) -@pytest.fixture(params=["spmd", "ray", "dask"]) +@pytest.fixture def engine( - request: pytest.FixtureRequest, - spmd_engine: SPMDEngine, -) -> Iterator[StreamingEngine]: - """Yield each supported streaming engine.""" - backend = request.param - executor_options = {"max_rows_per_partition": 1_000} - - if backend == "spmd": - with SPMDEngine( - comm=spmd_engine.comm, - executor_options=executor_options, - ) as eng: - yield eng - return - - if is_running_with_rrun(): - pytest.skip(f"{backend}Engine must not be created from within an rrun cluster") - - if backend == "ray": - pytest.importorskip("ray", reason="ray is not installed") - from cudf_polars.experimental.rapidsmpf.frontend.ray import RayEngine - - with RayEngine( - executor_options=executor_options, - ray_init_options={"include_dashboard": False}, - ) as eng: - yield eng - return - - assert backend == "dask" - pytest.importorskip("distributed", reason="distributed is not installed") - from cudf_polars.experimental.rapidsmpf.frontend.dask import DaskEngine - - with DaskEngine(executor_options=executor_options) as eng: - yield eng + streaming_engine_factory: Callable[..., StreamingEngine], +) -> StreamingEngine: + """Yield each supported streaming engine pinned to small partitions.""" + return streaming_engine_factory( + StreamingOptions(max_rows_per_partition=1_000), + ) def test_sink_parquet_directory( diff --git a/python/cudf_polars/tests/experimental/test_join.py b/python/cudf_polars/tests/experimental/test_join.py index 6a09ff95ef5..1b4635dd924 100644 --- a/python/cudf_polars/tests/experimental/test_join.py +++ b/python/cudf_polars/tests/experimental/test_join.py @@ -19,6 +19,7 @@ from cudf_polars.experimental.shuffle import Shuffle from cudf_polars.experimental.statistics import collect_statistics from cudf_polars.testing.asserts import assert_gpu_result_equal +from cudf_polars.testing.engine_utils import warns_on_spmd from cudf_polars.utils.config import ConfigOptions, StreamingExecutor @@ -103,12 +104,11 @@ def test_join_conditional(reverse, max_rows_per_partition, streaming_engine_fact if reverse: left, right = right, left q = left.join_where(right, pl.col("y") < pl.col("yy")) - if max_rows_per_partition == 3: - with pytest.warns( - UserWarning, match="ConditionalJoin not supported for multiple partitions." - ): - assert_gpu_result_equal(q, engine=streaming_engine, check_row_order=False) - else: + with warns_on_spmd( + streaming_engine, + UserWarning, + match="ConditionalJoin not supported for multiple partitions.", + ): assert_gpu_result_equal(q, engine=streaming_engine, check_row_order=False) @@ -156,7 +156,7 @@ def test_join(left, right, how, reverse, streaming_engine_factory, options): @pytest.mark.parametrize("zlice", [(0, 2), (2, 2), (-2, None)]) -def test_join_and_slice(zlice, streaming_engine_factory): +def test_join_and_slice(request, zlice, streaming_engine_factory): streaming_engine = streaming_engine_factory( StreamingOptions( max_rows_per_partition=3, @@ -164,6 +164,16 @@ def test_join_and_slice(zlice, streaming_engine_factory): fallback_mode="warn", ), ) + if streaming_engine.nranks > 1: + # The multi-rank fallback for slice doesn't preserve row order + # within equal-key groups, so the slice can pick different rows + # than the CPU baseline. + request.applymarker( + pytest.mark.xfail( + reason="https://github.com/rapidsai/cudf/issues/22405", + strict=False, + ) + ) left = pl.LazyFrame( { "a": [1, 2, 3, 1, None], @@ -181,23 +191,22 @@ def test_join_and_slice(zlice, streaming_engine_factory): q = left.join(right, on="a", how="inner").slice(*zlice) # Check that we get the correct row count # See: https://github.com/rapidsai/cudf/issues/19153 - if zlice in {(2, 2), (-2, None)}: - with pytest.warns( - UserWarning, match="This slice not supported for multiple partitions." - ): - assert q.collect(engine=streaming_engine).height == q.collect().height - else: + with warns_on_spmd( + streaming_engine, + UserWarning, + match="This slice not supported for multiple partitions.", + when=zlice in {(2, 2), (-2, None)}, + ): assert q.collect(engine=streaming_engine).height == q.collect().height # Need sort to match order after a join q = left.join(right, on="a", how="inner").sort(pl.col("a")).slice(*zlice) - if zlice == (2, 2): - with pytest.warns( - UserWarning, - match="This slice not supported for multiple partitions.", - ): - assert_gpu_result_equal(q, engine=streaming_engine) - else: + with warns_on_spmd( + streaming_engine, + UserWarning, + match="This slice not supported for multiple partitions.", + when=zlice == (2, 2), + ): assert_gpu_result_equal(q, engine=streaming_engine) @@ -232,7 +241,8 @@ def test_join_maintain_order_fallback_streaming( ) q = left.join(right, on="y", how="inner", maintain_order=maintain_order) - with pytest.warns( + with warns_on_spmd( + streaming_engine, UserWarning, match=r"Join\(maintain_order=.*\) not supported for multiple partitions\.", ): diff --git a/python/cudf_polars/tests/experimental/test_metadata.py b/python/cudf_polars/tests/experimental/test_metadata.py index 618087a27c5..791e33744cd 100644 --- a/python/cudf_polars/tests/experimental/test_metadata.py +++ b/python/cudf_polars/tests/experimental/test_metadata.py @@ -66,20 +66,30 @@ def right() -> pl.LazyFrame: def test_rapidsmpf_join_metadata( left: pl.LazyFrame, right: pl.LazyFrame, - streaming_engine_factory, + spmd_engine_factory, options, ) -> None: - streaming_engine = streaming_engine_factory(options) - config_options = ConfigOptions.from_polars_engine(streaming_engine) + # Pinned to SPMD: ``ChannelMetadata.__reduce_cython__`` can't pickle + # ``self._handle`` across worker/actor processes, so the + # ``metadata_collector`` round-trip fails on Dask and Ray. + # + # When https://github.com/rapidsai/cudf/pull/22394 lands, dedup of + # replicated outputs moves to the Dask/Ray frontends and the + # ``duplicated`` flag's semantics change to "every rank holds the + # data". Revisit the ``len(metadata_collector) == 1`` and + # ``metadata.duplicated is False`` assertions below, and reconsider + # whether this test can widen to ``streaming_engine_factory``. + engine = spmd_engine_factory(options) + config_options = ConfigOptions.from_polars_engine(engine) broadcast_join_limit = config_options.executor.broadcast_join_limit q = left.join( right, on="y", how="left", ).filter(pl.col("x") > pl.col("zz")) - ir = Translator(q._ldf.visit(), streaming_engine).translate_ir() - left_count = left.collect(engine=streaming_engine).height - right_count = right.collect(engine=streaming_engine).height + ir = Translator(q._ldf.visit(), engine).translate_ir() + left_count = left.collect(engine=engine).height + right_count = right.collect(engine=engine).height metadata_collector = evaluate_logical_plan( ir, config_options, collect_metadata=True diff --git a/python/cudf_polars/tests/experimental/test_parallel.py b/python/cudf_polars/tests/experimental/test_parallel.py index 67fc372e2e4..a9a0ff63786 100644 --- a/python/cudf_polars/tests/experimental/test_parallel.py +++ b/python/cudf_polars/tests/experimental/test_parallel.py @@ -50,10 +50,10 @@ def test_rename_concat(streaming_engine) -> None: assert_gpu_result_equal(q, engine=streaming_engine) -def test_fallback_on_concat_zlice(streaming_engine_factory) -> None: +def test_fallback_on_concat_zlice(spmd_engine_factory) -> None: # Pin ``fallback_mode="warn"`` so the spmd-small baseline (which sets # ``SILENT``) doesn't suppress the warning this test asserts on. - streaming_engine = streaming_engine_factory(StreamingOptions(fallback_mode="warn")) + streaming_engine = spmd_engine_factory(StreamingOptions(fallback_mode="warn")) q = pl.concat( [ pl.LazyFrame({"a": [1, 2]}), diff --git a/python/cudf_polars/tests/experimental/test_rolling.py b/python/cudf_polars/tests/experimental/test_rolling.py index 37de6f7f8a1..ee3ae137e27 100644 --- a/python/cudf_polars/tests/experimental/test_rolling.py +++ b/python/cudf_polars/tests/experimental/test_rolling.py @@ -8,6 +8,7 @@ import polars as pl from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions +from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine from cudf_polars.testing.asserts import assert_gpu_result_equal from cudf_polars.utils.versions import POLARS_VERSION_LT_136 @@ -46,10 +47,20 @@ def test_rolling_datetime(request, engine): assert_gpu_result_equal(q, engine=engine) -def test_over_in_filter_unsupported(streaming_engine_factory) -> None: +def test_over_in_filter_unsupported(request, streaming_engine_factory) -> None: engine = streaming_engine_factory( StreamingOptions(max_rows_per_partition=1, fallback_mode="warn"), ) + if not isinstance(engine, SPMDEngine): + # On Dask/Ray the fallback warning fires on worker processes and is + # invisible to ``pytest.warns``; the multi-rank fallback also + # doesn't preserve row order. + request.applymarker( + pytest.mark.xfail( + reason="https://github.com/rapidsai/cudf/issues/22405", + strict=False, + ) + ) q = pl.concat( [ pl.LazyFrame({"k": ["x", "y"], "v": [3, 2]}), diff --git a/python/cudf_polars/tests/experimental/test_select.py b/python/cudf_polars/tests/experimental/test_select.py index 264f8b5aab1..cef9f0f66cf 100644 --- a/python/cudf_polars/tests/experimental/test_select.py +++ b/python/cudf_polars/tests/experimental/test_select.py @@ -22,6 +22,7 @@ assert_gpu_result_equal, assert_ir_translation_raises, ) +from cudf_polars.testing.engine_utils import warns_on_spmd from cudf_polars.utils.versions import ( POLARS_VERSION_LT_132, POLARS_VERSION_LT_134, @@ -54,8 +55,8 @@ def test_select(df, engine): @pytest.mark.parametrize("fallback_mode", ["silent", "raise", "warn", "foo"]) -def test_select_reduce_fallback(df, streaming_engine_factory, fallback_mode): - engine = streaming_engine_factory( +def test_select_reduce_fallback(df, spmd_engine_factory, fallback_mode): + engine = spmd_engine_factory( StreamingOptions(max_rows_per_partition=3, fallback_mode=fallback_mode), ) match = "This selection is not supported for multiple partitions." @@ -84,13 +85,17 @@ def test_select_reduce_fallback(df, streaming_engine_factory, fallback_mode): assert_gpu_result_equal(query, engine=engine) -def test_select_fill_null_with_strategy(df, engine): +def test_select_fill_null_with_strategy(df, streaming_engine_factory): + engine = streaming_engine_factory( + StreamingOptions(max_rows_per_partition=3, fallback_mode="warn"), + ) q = df.select(pl.col("a").forward_fill()) if POLARS_VERSION_LT_132: assert_ir_translation_raises(q, NotImplementedError) else: - with pytest.warns( + with warns_on_spmd( + engine, UserWarning, match="fill_null with strategy other than 'zero' or 'one' is not supported for multiple partitions", ): @@ -183,15 +188,19 @@ def test_select_mean_with_decimals(engine): assert_gpu_result_equal(q, engine=engine, check_dtypes=not POLARS_VERSION_LT_134) -def test_select_with_len(engine): - # https://github.com/pola-rs/polars/issues/25592 +def test_select_with_len(streaming_engine_factory): + engine = streaming_engine_factory( + StreamingOptions(max_rows_per_partition=3, fallback_mode="warn"), + ) df1 = pl.LazyFrame({"c0": [1] * 4}) df2 = pl.LazyFrame({"c0": [2] * 4}) q = pl.concat([df1.join(df2, how="cross"), df1.with_columns(pl.lit(None))]).select( pl.len() ) - with pytest.warns( - UserWarning, match="Cross join not support for multiple partitions" + with warns_on_spmd( + engine, + UserWarning, + match="Cross join not support for multiple partitions", ): assert_gpu_result_equal(q, engine=engine) diff --git a/python/cudf_polars/tests/experimental/test_spilling.py b/python/cudf_polars/tests/experimental/test_spilling.py index 6aa11801132..7f79b911038 100644 --- a/python/cudf_polars/tests/experimental/test_spilling.py +++ b/python/cudf_polars/tests/experimental/test_spilling.py @@ -50,20 +50,20 @@ def create_test_table(nbytes: int, stream: Stream) -> plc.Table: ], ) def test_make_spill_function( - streaming_engine_factory, + spmd_engine_factory, *, pinned_memory: bool, spilled_host_mem_type: MemoryType, ) -> None: """Test that spilling prioritizes longest queues and newest messages.""" - engine = streaming_engine_factory(StreamingOptions(pinned_memory=pinned_memory)) + engine = spmd_engine_factory(StreamingOptions(pinned_memory=pinned_memory)) context = engine.context if spilled_host_mem_type == MemoryType.PINNED_HOST: - assert engine.context.br().pinned_mr is not None + assert context.br().pinned_mr is not None other_host_mem_type = MemoryType.HOST else: - assert engine.context.br().pinned_mr is None + assert context.br().pinned_mr is None other_host_mem_type = MemoryType.PINNED_HOST # Create 3 spillable message containers simulating fanout buffers diff --git a/python/cudf_polars/tests/experimental/test_statistics.py b/python/cudf_polars/tests/experimental/test_statistics.py index 82c121d5830..42014a02106 100644 --- a/python/cudf_polars/tests/experimental/test_statistics.py +++ b/python/cudf_polars/tests/experimental/test_statistics.py @@ -7,14 +7,12 @@ from typing import TYPE_CHECKING import pytest -from rapidsmpf.bootstrap import is_running_with_rrun -from rapidsmpf.config import Options from rapidsmpf.statistics import Statistics -from cudf_polars.experimental.rapidsmpf.frontend.spmd import SPMDEngine +from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions if TYPE_CHECKING: - from collections.abc import Iterator + from collections.abc import Callable from cudf_polars.experimental.rapidsmpf.frontend.core import StreamingEngine @@ -25,49 +23,14 @@ ] -@pytest.fixture(params=["spmd", "ray", "dask"]) +@pytest.fixture def engine( - request: pytest.FixtureRequest, - spmd_engine: SPMDEngine, -) -> Iterator[StreamingEngine]: + streaming_engine_factory: Callable[..., StreamingEngine], +) -> StreamingEngine: """Yield each supported streaming engine with statistics enabled.""" - backend = request.param - rapidsmpf_options = Options({"statistics": "True"}) - executor_options = {"max_rows_per_partition": 10} - - if backend == "spmd": - with SPMDEngine( - comm=spmd_engine.comm, - rapidsmpf_options=rapidsmpf_options, - executor_options=executor_options, - ) as engine: - yield engine - return - - if is_running_with_rrun(): - pytest.skip(f"{backend}Engine must not be created from within an rrun cluster") - - if backend == "ray": - pytest.importorskip("ray", reason="ray is not installed") - from cudf_polars.experimental.rapidsmpf.frontend.ray import RayEngine - - with RayEngine( - rapidsmpf_options=rapidsmpf_options, - executor_options=executor_options, - ray_init_options={"include_dashboard": False}, - ) as engine: - yield engine - return - - assert backend == "dask" - pytest.importorskip("distributed", reason="distributed is not installed") - from cudf_polars.experimental.rapidsmpf.frontend.dask import DaskEngine - - with DaskEngine( - rapidsmpf_options=rapidsmpf_options, - executor_options=executor_options, - ) as engine: - yield engine + return streaming_engine_factory( + StreamingOptions(statistics=True, max_rows_per_partition=10), + ) def test_statistics(engine: StreamingEngine) -> None: diff --git a/python/cudf_polars/tests/experimental/test_unique.py b/python/cudf_polars/tests/experimental/test_unique.py index 6bb30624cb6..1a157c3fe21 100644 --- a/python/cudf_polars/tests/experimental/test_unique.py +++ b/python/cudf_polars/tests/experimental/test_unique.py @@ -10,13 +10,7 @@ from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions from cudf_polars.testing.asserts import assert_gpu_result_equal - - -@pytest.fixture -def engine(streaming_engine_factory): - return streaming_engine_factory( - StreamingOptions(fallback_mode="warn"), - ) +from cudf_polars.testing.engine_utils import warns_on_spmd @pytest.fixture(scope="module") @@ -77,11 +71,12 @@ def test_unique_head_tail(keep, zlice, streaming_engine_factory): ) -def test_unique_complex_slice_fallback(df, engine): +def test_unique_complex_slice_fallback(df, streaming_engine_factory): """Test that unique with complex slice (offset >= 1) falls back correctly.""" + engine = streaming_engine_factory(StreamingOptions(fallback_mode="warn")) # unique().slice(offset=5, length=10) has zlice[0] >= 1, triggering fallback q = df.unique(subset=("y",), keep="any").slice(5, 10) - with pytest.warns(UserWarning, match="Complex slice not supported"): + with warns_on_spmd(engine, UserWarning, match="Complex slice not supported"): result = q.collect(engine=engine) # Just verify the fallback produces valid output with expected shape assert result.shape == (10, 3) From 7a120b735f0279221347802d9c02023901a62e78 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Fri, 8 May 2026 02:55:05 +0000 Subject: [PATCH 36/36] Address reviews --- python/cudf/cudf/core/groupby/groupby.py | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 153b459f0ba..88fe1f3aca9 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -3070,20 +3070,7 @@ def _to_bool_col(col): if min_count and min_count > 0: counts = self.agg("count") - if isinstance(result, Series): - count_series = ( - counts if isinstance(counts, Series) else counts.iloc[:, 0] - ) - result = result.where(count_series >= min_count, None) - else: - for col_name in result._column_names: - if col_name not in counts._column_names: - continue - count_col = counts._data[col_name] - mask = count_col < min_count - result[col_name] = result[col_name].where( - ~Series._from_column(mask), None - ) + result = result.where(counts >= min_count, None) return result