Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions ci/validate_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ PYDISTCHECK_ARGS=(
if [[ "${package_dir}" == "python/libcudf" ]]; then
if [[ "${RAPIDS_CUDA_MAJOR}" == "12" ]]; then
PYDISTCHECK_ARGS+=(
--max-allowed-size-compressed '675M'
--max-allowed-size-compressed '700M'
)
else
PYDISTCHECK_ARGS+=(
--max-allowed-size-compressed '325M'
--max-allowed-size-compressed '350M'
)
fi
elif [[ "${package_dir}" != "python/cudf" ]] && \
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-129_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ dependencies:
- packaging
- pandas>=2.0,<2.4.0
- pandoc
- polars>=1.35,<1.39
- polars>=1.35,<1.40
- pre-commit
- pyarrow>=19.0.0,<24
- pydata-sphinx-theme>=0.15.4
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-129_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ dependencies:
- packaging
- pandas>=2.0,<2.4.0
- pandoc
- polars>=1.35,<1.39
- polars>=1.35,<1.40
- pre-commit
- pyarrow>=19.0.0,<24
- pydata-sphinx-theme>=0.15.4
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-132_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ dependencies:
- packaging
- pandas>=2.0,<2.4.0
- pandoc
- polars>=1.35,<1.39
- polars>=1.35,<1.40
- pre-commit
- pyarrow>=19.0.0,<24
- pydata-sphinx-theme>=0.15.4
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-132_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ dependencies:
- packaging
- pandas>=2.0,<2.4.0
- pandoc
- polars>=1.35,<1.39
- polars>=1.35,<1.40
- pre-commit
- pyarrow>=19.0.0,<24
- pydata-sphinx-theme>=0.15.4
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cudf-polars/recipe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ requirements:
- python
- pylibcudf =${{ version }}
- rapidsmpf =${{ minor_version }}
- polars>=1.35,<1.39
- polars>=1.35,<1.40
- packaging
- ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }}
- if: cuda_major == "12"
Expand Down
11 changes: 11 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,7 @@ add_library(
src/filling/repeat.cu
src/filling/sequence.cu
src/groupby/common/m2_var_std.cu
src/groupby/common/utils.cpp
src/groupby/groupby.cu
src/groupby/hash/compute_global_memory_aggs.cu
src/groupby/hash/compute_global_memory_aggs_null.cu
Expand Down Expand Up @@ -539,6 +540,16 @@ add_library(
src/groupby/sort/host_udf_aggregation.cpp
src/groupby/sort/scan.cpp
src/groupby/sort/sort_helper.cu
src/groupby/streaming_groupby.cpp
src/groupby/streaming_groupby/aggregate.cu
src/groupby/streaming_groupby/impl.cu
src/groupby/streaming_groupby/insert.cu
src/groupby/streaming_groupby/insert_first.cu
src/groupby/streaming_groupby/insert_first_nested.cu
src/groupby/streaming_groupby/insert_nested.cu
src/groupby/streaming_groupby/insert_subsequent.cu
src/groupby/streaming_groupby/insert_subsequent_nested.cu
src/groupby/streaming_groupby/merge.cu
src/hash/md5_hash.cu
src/hash/murmurhash3_x86_32.cu
src/hash/murmurhash3_x64_128.cu
Expand Down
86 changes: 81 additions & 5 deletions cpp/benchmarks/groupby/group_max.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,85 @@ void bench_groupby_max(nvbench::state& state, nvbench::type_list<Type>)
template <typename Type>
void bench_groupby_max_cardinality(nvbench::state& state, nvbench::type_list<Type>)
{
auto constexpr num_rows = 20'000'000;
auto constexpr null_probability = 0.;
auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));
auto const num_aggregations = state.get_int64("num_aggregations");
auto const is_streaming = state.get_string("api") == "streaming";

// TODO: streaming groupby reuses the cudf hash element_aggregator, which has
// no decimal128 MIN/MAX/SUM specialization (no native 128-bit atomics). The
// stateless `normal` path falls back to sort-based aggregation, but streaming
// has no fallback and rejects the request. Re-enable once streaming has a
// non-atomic aggregator path or 128-bit atomics gain hardware support.
if (is_streaming && std::is_same_v<Type, numeric::decimal128>) {
state.skip("streaming groupby does not support decimal128 MAX/MIN/SUM");
return;
}

groupby_max_helper<Type>(state, num_rows, cardinality, null_probability);
auto const keys = [&] {
data_profile const profile =
data_profile_builder()
.cardinality(cardinality)
.no_validity()
.distribution(cudf::type_to_id<int32_t>(), distribution_id::UNIFORM, 0, num_rows);
return create_random_column(cudf::type_to_id<int32_t>(), row_count{num_rows}, profile);
}();

auto const make_values = [&]() {
auto builder = data_profile_builder().cardinality(0).no_validity().distribution(
cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, num_rows);
return create_random_column(
cudf::type_to_id<Type>(), row_count{num_rows}, data_profile{builder});
};

std::vector<std::unique_ptr<cudf::column>> val_cols;
for (int64_t i = 0; i < num_aggregations; i++) {
val_cols.emplace_back(make_values());
}

auto keys_view = keys->view();

auto const mem_stats_logger = cudf::memory_stats_logger();
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));

if (is_streaming) {
std::vector<cudf::column_view> all_columns = {keys_view, keys_view, keys_view};
for (auto const& vc : val_cols) {
all_columns.push_back(vc->view());
}
auto const full_table = cudf::table_view(all_columns);

std::vector<cudf::size_type> key_indices = {0, 1, 2};
std::vector<cudf::groupby::streaming_aggregation_request> requests;
for (int64_t i = 0; i < num_aggregations; i++) {
cudf::groupby::streaming_aggregation_request req;
req.column_index = static_cast<cudf::size_type>(3 + i);
req.aggregation = cudf::make_max_aggregation<cudf::groupby_aggregation>();
requests.push_back(std::move(req));
}
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto sgb = cudf::groupby::streaming_groupby(key_indices, requests, num_rows);
sgb.aggregate(full_table);
auto const result = sgb.finalize();
});
} else {
std::vector<cudf::groupby::aggregation_request> requests;
for (int64_t i = 0; i < num_aggregations; i++) {
requests.emplace_back();
requests[i].values = val_cols[i]->view();
requests[i].aggregations.push_back(cudf::make_max_aggregation<cudf::groupby_aggregation>());
}
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto gb_obj = cudf::groupby::groupby(cudf::table_view({keys_view, keys_view, keys_view}));
auto const result = gb_obj.aggregate(requests);
});
}

auto const elapsed_time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
state.add_element_count(
static_cast<double>(num_rows * num_aggregations) / elapsed_time / 1'000'000., "Mrows/s");
state.add_buffer_size(
mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
}

NVBENCH_BENCH_TYPES(bench_groupby_max,
Expand All @@ -102,5 +176,7 @@ NVBENCH_BENCH_TYPES(bench_groupby_max,
NVBENCH_BENCH_TYPES(bench_groupby_max_cardinality,
NVBENCH_TYPE_AXES(nvbench::type_list<int32_t, numeric::decimal128>))
.set_name("groupby_max_cardinality")
.add_int64_axis("num_rows", {20'000'000})
.add_int64_axis("num_aggregations", {1, 2, 3, 4, 5, 6, 7, 8})
.add_int64_axis("cardinality", {20, 50, 100, 1'000, 10'000, 100'000, 1'000'000});
.add_int64_axis("cardinality", {20, 50, 100, 1'000, 10'000, 100'000, 1'000'000})
.add_string_axis("api", {"normal", "streaming"});
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
diff --git a/cpp/cmake_modules/FindRapidJSONAlt.cmake b/cpp/cmake_modules/FindRapidJSONAlt.cmake
index babb450e20..148dd93a78 100644
--- a/cpp/cmake_modules/FindRapidJSONAlt.cmake
+++ b/cpp/cmake_modules/FindRapidJSONAlt.cmake
@@ -26,7 +26,10 @@ endif()
if(RapidJSONAlt_FIND_QUIETLY)
list(APPEND find_package_args QUIET)
endif()
+set(_CMAKE_POLICY_VERSION_MINIMUM_OLD ${CMAKE_POLICY_VERSION_MINIMUM})
+set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
find_package(RapidJSON ${find_package_args})
+set(CMAKE_POLICY_VERSION_MINIMUM ${_CMAKE_POLICY_VERSION_MINIMUM_OLD})
if(RapidJSON_FOUND)
set(RapidJSONAlt_FOUND TRUE)
if(NOT TARGET RapidJSON)
17 changes: 17 additions & 0 deletions cpp/cmake/thirdparty/patches/override.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"packages": {
"arrow": {
"version": "21.0.0",
"git_url": "https://github.com/apache/arrow.git",
"git_tag": "apache-arrow-${version}",
"git_shallow": false,
"patches": [
{
"file": "${current_json_dir}/arrow_rapidjson_cmake_policy_version_minimum.diff",
"issue": "https://github.com/apache/arrow/pull/49993"
}
],
"source_subdir": "cpp"
}
}
}
6 changes: 4 additions & 2 deletions cpp/include/cudf/detail/aggregation/aggregation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
#include <cudf/utilities/span.hpp>
#include <cudf/utilities/traits.hpp>

#include <cuda/std/type_traits>

#include <functional>
#include <numeric>
#include <utility>
Expand Down Expand Up @@ -1341,9 +1343,9 @@ data_type target_type(data_type source_type, aggregation::Kind k);
* @tparam k The aggregation to perform
*/
template <typename Source, aggregation::Kind k>
constexpr inline bool is_valid_aggregation()
CUDF_HOST_DEVICE constexpr inline bool is_valid_aggregation()
{
return (not std::is_void_v<target_type_t<Source, k>>);
return (not cuda::std::is_void_v<target_type_t<Source, k>>);
}

/**
Expand Down
Loading
Loading