diff --git a/cpp/benchmarks/bench_pack.cpp b/cpp/benchmarks/bench_pack.cpp index 216794948..0494044d0 100644 --- a/cpp/benchmarks/bench_pack.cpp +++ b/cpp/benchmarks/bench_pack.cpp @@ -20,6 +20,7 @@ #include #include +#include #include "utils/random_data.hpp" @@ -44,7 +45,7 @@ void run_pack( // Calculate number of rows for a single-column table of the desired size auto const nrows = - static_cast(table_size_bytes / sizeof(random_data_t)); + rapidsmpf::safe_cast(table_size_bytes / sizeof(random_data_t)); auto table = random_table(1, nrows, 0, 1000, stream, table_mr); // Warm up @@ -120,7 +121,8 @@ void run_chunked_pack( rmm::cuda_stream_view stream ) { // Calculate number of rows for a single-column table of the desired size - auto const nrows = static_cast(table_size / sizeof(random_data_t)); + auto const nrows = + rapidsmpf::safe_cast(table_size / sizeof(random_data_t)); auto table = random_table(1, nrows, 0, 1000, stream, table_mr); // Create the chunked_pack instance to get total output size diff --git a/cpp/benchmarks/bench_shuffle.cpp b/cpp/benchmarks/bench_shuffle.cpp index 1744f488a..c95df17b9 100644 --- a/cpp/benchmarks/bench_shuffle.cpp +++ b/cpp/benchmarks/bench_shuffle.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #ifdef RAPIDSMPF_HAVE_CUPTI @@ -373,28 +374,23 @@ std::vector generate_input_partitions( rapidsmpf::BufferResource* br, TransformFn&& transform_fn ) { + auto const num_columns = rapidsmpf::safe_cast(args.num_columns); + auto const num_local_rows = + rapidsmpf::safe_cast(args.num_local_rows); std::int32_t const min_val = 0; - std::int32_t const max_val = args.num_local_rows; + std::int32_t const max_val = num_local_rows; std::vector input_partitions; input_partitions.reserve(args.num_local_partitions); for (rapidsmpf::shuffler::PartID i = 0; i < args.num_local_partitions; ++i) { - std::size_t size_lb = random_table_size_lower_bound( - static_cast(args.num_columns), - static_cast(args.num_local_rows) - ); + std::size_t size_lb = random_table_size_lower_bound(num_columns, num_local_rows); // reserve at least size_lb and spill if necessary. auto res = br->reserve_device_memory_and_spill( size_lb, args.input_data_allow_overbooking ); cudf::table table = random_table( - static_cast(args.num_columns), - static_cast(args.num_local_rows), - min_val, - max_val, - stream, - br->device_mr() + num_columns, num_local_rows, min_val, max_val, stream, br->device_mr() ); input_partitions.emplace_back(transform_fn(std::move(table))); } diff --git a/cpp/benchmarks/streaming/bench_streaming_shuffle.cpp b/cpp/benchmarks/streaming/bench_streaming_shuffle.cpp index 63a6cd164..9cc5ea538 100644 --- a/cpp/benchmarks/streaming/bench_streaming_shuffle.cpp +++ b/cpp/benchmarks/streaming/bench_streaming_shuffle.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "../utils/misc.hpp" @@ -263,14 +264,17 @@ rapidsmpf::Duration run( std::vector actors; { auto ch1 = ctx->create_channel(); + auto const num_columns = rapidsmpf::safe_cast(args.num_columns); + auto const num_local_rows = + rapidsmpf::safe_cast(args.num_local_rows); actors.push_back( rapidsmpf::streaming::actor::random_table_generator( ctx, stream, ch1, args.num_local_partitions, - static_cast(args.num_columns), - static_cast(args.num_local_rows), + num_columns, + num_local_rows, min_val, max_val ) diff --git a/cpp/benchmarks/streaming/data_generator.hpp b/cpp/benchmarks/streaming/data_generator.hpp index 66959fa55..9bb53795c 100644 --- a/cpp/benchmarks/streaming/data_generator.hpp +++ b/cpp/benchmarks/streaming/data_generator.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #include "../utils/random_data.hpp" @@ -52,7 +53,8 @@ inline Actor random_table_generator( ) { ShutdownAtExit c{ch_out}; co_await ctx->executor()->schedule(); - auto nbytes = static_cast(ncolumns * nrows) * sizeof(std::int32_t); + auto nbytes = rapidsmpf::safe_cast(ncolumns) + * rapidsmpf::safe_cast(nrows) * sizeof(std::int32_t); for (std::uint64_t seq = 0; seq < num_blocks; ++seq) { auto res = ctx->br()->reserve_device_memory_and_spill(nbytes, AllowOverbooking::NO); diff --git a/cpp/benchmarks/utils/random_data.cu b/cpp/benchmarks/utils/random_data.cu index b93f04585..021c384ad 100644 --- a/cpp/benchmarks/utils/random_data.cu +++ b/cpp/benchmarks/utils/random_data.cu @@ -3,6 +3,10 @@ * SPDX-License-Identifier: Apache-2.0 */ +#include +#include +#include + #include #include @@ -13,25 +17,32 @@ #include #include +#include #include "random_data.hpp" rmm::device_uvector random_device_vector( - cudf::size_type nelem, + std::size_t nelem, std::int32_t min_val, std::int32_t max_val, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr ) { // Fill vector with random data. - rmm::device_uvector vec(static_cast(nelem), stream, mr); + using index_t = std::int64_t; + auto const end_index = rapidsmpf::safe_cast(nelem); + rmm::device_uvector vec(nelem, stream, mr); + thrust::counting_iterator const begin(0); + thrust::counting_iterator const end(end_index); thrust::transform( rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(nelem), + begin, + end, vec.begin(), - [min_val, max_val] __device__(cudf::size_type index) { - thrust::default_random_engine engine(index); // HACK: use the seed as index + [min_val, max_val] __device__(index_t index) { + thrust::default_random_engine engine( + static_cast(index) + ); thrust::uniform_int_distribution dist(min_val, max_val); return dist(engine); } @@ -46,7 +57,9 @@ std::unique_ptr random_column( rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr ) { - auto vec = random_device_vector(nrows, min_val, max_val, stream, mr); + auto vec = random_device_vector( + rapidsmpf::safe_cast(nrows), min_val, max_val, stream, mr + ); return std::make_unique( std::move(vec), rmm::device_buffer{0, stream, mr}, 0 ); @@ -71,8 +84,13 @@ void random_fill(rapidsmpf::Buffer& buffer, rmm::device_async_resource_ref mr) { switch (buffer.mem_type()) { case rapidsmpf::MemoryType::DEVICE: { + auto const num_elements = std::max( + std::size_t{1}, + buffer.size / sizeof(random_data_t) + + (buffer.size % sizeof(random_data_t) != 0) + ); auto vec = random_device_vector( - buffer.size / sizeof(std::int32_t) + sizeof(std::int32_t), + num_elements, std::numeric_limits::min(), std::numeric_limits::max(), buffer.stream(), diff --git a/cpp/benchmarks/utils/random_data.hpp b/cpp/benchmarks/utils/random_data.hpp index 5a6f66fec..983f086a1 100644 --- a/cpp/benchmarks/utils/random_data.hpp +++ b/cpp/benchmarks/utils/random_data.hpp @@ -4,11 +4,15 @@ */ #pragma once +#include +#include + #include #include #include #include +#include /** @@ -25,7 +29,8 @@ using random_data_t = std::int32_t; std::size_t constexpr random_table_size_lower_bound( cudf::size_type ncolumns, cudf::size_type nrows ) { - return static_cast(ncolumns * nrows) * sizeof(random_data_t); + return rapidsmpf::safe_cast(ncolumns) + * rapidsmpf::safe_cast(nrows) * sizeof(random_data_t); } /** @@ -44,7 +49,7 @@ std::size_t constexpr random_table_size_lower_bound( * @note The function uses the specified CUDA stream for asynchronous operations. */ rmm::device_uvector random_device_vector( - cudf::size_type nelem, + std::size_t nelem, std::int32_t min_val, std::int32_t max_val, rmm::cuda_stream_view stream,