diff --git a/cpp/benchmarks/bench_pack.cpp b/cpp/benchmarks/bench_pack.cpp
index 216794948..0494044d0 100644
--- a/cpp/benchmarks/bench_pack.cpp
+++ b/cpp/benchmarks/bench_pack.cpp
@@ -20,6 +20,7 @@
 
 #include <rapidsmpf/memory/cuda_memcpy_async.hpp>
 #include <rapidsmpf/memory/pinned_memory_resource.hpp>
+#include <rapidsmpf/utils/misc.hpp>
 
 #include "utils/random_data.hpp"
 
@@ -44,7 +45,7 @@ void run_pack(
 
     // Calculate number of rows for a single-column table of the desired size
     auto const nrows =
-        static_cast<cudf::size_type>(table_size_bytes / sizeof(random_data_t));
+        rapidsmpf::safe_cast<cudf::size_type>(table_size_bytes / sizeof(random_data_t));
     auto table = random_table(1, nrows, 0, 1000, stream, table_mr);
 
     // Warm up
@@ -120,7 +121,8 @@ void run_chunked_pack(
     rmm::cuda_stream_view stream
 ) {
     // Calculate number of rows for a single-column table of the desired size
-    auto const nrows = static_cast<cudf::size_type>(table_size / sizeof(random_data_t));
+    auto const nrows =
+        rapidsmpf::safe_cast<cudf::size_type>(table_size / sizeof(random_data_t));
     auto table = random_table(1, nrows, 0, 1000, stream, table_mr);
 
     // Create the chunked_pack instance to get total output size
diff --git a/cpp/benchmarks/bench_shuffle.cpp b/cpp/benchmarks/bench_shuffle.cpp
index 1744f488a..c95df17b9 100644
--- a/cpp/benchmarks/bench_shuffle.cpp
+++ b/cpp/benchmarks/bench_shuffle.cpp
@@ -23,6 +23,7 @@
 #include <rapidsmpf/progress_thread.hpp>
 #include <rapidsmpf/shuffler/shuffler.hpp>
 #include <rapidsmpf/statistics.hpp>
+#include <rapidsmpf/utils/misc.hpp>
 #include <rapidsmpf/utils/string.hpp>
 
 #ifdef RAPIDSMPF_HAVE_CUPTI
@@ -373,28 +374,23 @@ std::vector<InputPartitionsT> generate_input_partitions(
     rapidsmpf::BufferResource* br,
     TransformFn&& transform_fn
 ) {
+    auto const num_columns = rapidsmpf::safe_cast<cudf::size_type>(args.num_columns);
+    auto const num_local_rows =
+        rapidsmpf::safe_cast<cudf::size_type>(args.num_local_rows);
     std::int32_t const min_val = 0;
-    std::int32_t const max_val = args.num_local_rows;
+    std::int32_t const max_val = num_local_rows;
 
     std::vector<InputPartitionsT> input_partitions;
     input_partitions.reserve(args.num_local_partitions);
     for (rapidsmpf::shuffler::PartID i = 0; i < args.num_local_partitions; ++i) {
-        std::size_t size_lb = random_table_size_lower_bound(
-            static_cast<cudf::size_type>(args.num_columns),
-            static_cast<cudf::size_type>(args.num_local_rows)
-        );
+        std::size_t size_lb = random_table_size_lower_bound(num_columns, num_local_rows);
 
         // reserve at least size_lb and spill if necessary.
         auto res = br->reserve_device_memory_and_spill(
             size_lb, args.input_data_allow_overbooking
         );
         cudf::table table = random_table(
-            static_cast<cudf::size_type>(args.num_columns),
-            static_cast<cudf::size_type>(args.num_local_rows),
-            min_val,
-            max_val,
-            stream,
-            br->device_mr()
+            num_columns, num_local_rows, min_val, max_val, stream, br->device_mr()
         );
         input_partitions.emplace_back(transform_fn(std::move(table)));
     }
diff --git a/cpp/benchmarks/streaming/bench_streaming_shuffle.cpp b/cpp/benchmarks/streaming/bench_streaming_shuffle.cpp
index 63a6cd164..9cc5ea538 100644
--- a/cpp/benchmarks/streaming/bench_streaming_shuffle.cpp
+++ b/cpp/benchmarks/streaming/bench_streaming_shuffle.cpp
@@ -28,6 +28,7 @@
 #include <rapidsmpf/streaming/core/context.hpp>
 #include <rapidsmpf/streaming/cudf/partition.hpp>
 #include <rapidsmpf/streaming/cudf/table_chunk.hpp>
+#include <rapidsmpf/utils/misc.hpp>
 #include <rapidsmpf/utils/string.hpp>
 
 #include "../utils/misc.hpp"
@@ -263,14 +264,17 @@ rapidsmpf::Duration run(
     std::vector<rapidsmpf::streaming::Actor> actors;
     {
         auto ch1 = ctx->create_channel();
+        auto const num_columns = rapidsmpf::safe_cast<cudf::size_type>(args.num_columns);
+        auto const num_local_rows =
+            rapidsmpf::safe_cast<cudf::size_type>(args.num_local_rows);
         actors.push_back(
             rapidsmpf::streaming::actor::random_table_generator(
                 ctx,
                 stream,
                 ch1,
                 args.num_local_partitions,
-                static_cast<cudf::size_type>(args.num_columns),
-                static_cast<cudf::size_type>(args.num_local_rows),
+                num_columns,
+                num_local_rows,
                 min_val,
                 max_val
             )
diff --git a/cpp/benchmarks/streaming/data_generator.hpp b/cpp/benchmarks/streaming/data_generator.hpp
index 66959fa55..9bb53795c 100644
--- a/cpp/benchmarks/streaming/data_generator.hpp
+++ b/cpp/benchmarks/streaming/data_generator.hpp
@@ -12,6 +12,7 @@
 #include <rapidsmpf/streaming/core/channel.hpp>
 #include <rapidsmpf/streaming/core/context.hpp>
 #include <rapidsmpf/streaming/cudf/table_chunk.hpp>
+#include <rapidsmpf/utils/misc.hpp>
 
 #include "../utils/random_data.hpp"
 
@@ -52,7 +53,8 @@ inline Actor random_table_generator(
 ) {
     ShutdownAtExit c{ch_out};
     co_await ctx->executor()->schedule();
-    auto nbytes = static_cast<std::size_t>(ncolumns * nrows) * sizeof(std::int32_t);
+    auto nbytes = rapidsmpf::safe_cast<std::size_t>(ncolumns)
+                  * rapidsmpf::safe_cast<std::size_t>(nrows) * sizeof(std::int32_t);
     for (std::uint64_t seq = 0; seq < num_blocks; ++seq) {
         auto res =
             ctx->br()->reserve_device_memory_and_spill(nbytes, AllowOverbooking::NO);
diff --git a/cpp/benchmarks/utils/random_data.cu b/cpp/benchmarks/utils/random_data.cu
index b93f04585..021c384ad 100644
--- a/cpp/benchmarks/utils/random_data.cu
+++ b/cpp/benchmarks/utils/random_data.cu
@@ -3,6 +3,10 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
+#include <algorithm>
+#include <cstdint>
+#include <limits>
+
 #include <thrust/random.h>
 #include <thrust/transform.h>
 
@@ -13,25 +17,32 @@
 #include <rmm/exec_policy.hpp>
 
 #include <rapidsmpf/memory/cuda_memcpy_async.hpp>
+#include <rapidsmpf/utils/misc.hpp>
 
 #include "random_data.hpp"
 
 rmm::device_uvector<std::int32_t> random_device_vector(
-    cudf::size_type nelem,
+    std::size_t nelem,
     std::int32_t min_val,
     std::int32_t max_val,
     rmm::cuda_stream_view stream,
     rmm::device_async_resource_ref mr
 ) {
     // Fill vector with random data.
-    rmm::device_uvector<std::int32_t> vec(static_cast<std::size_t>(nelem), stream, mr);
+    using index_t = std::int64_t;
+    auto const end_index = rapidsmpf::safe_cast<index_t>(nelem);
+    rmm::device_uvector<std::int32_t> vec(nelem, stream, mr);
+    thrust::counting_iterator<index_t> const begin(0);
+    thrust::counting_iterator<index_t> const end(end_index);
     thrust::transform(
         rmm::exec_policy(stream),
-        thrust::make_counting_iterator(0),
-        thrust::make_counting_iterator(nelem),
+        begin,
+        end,
         vec.begin(),
-        [min_val, max_val] __device__(cudf::size_type index) {
-            thrust::default_random_engine engine(index);  // HACK: use the seed as index
+        [min_val, max_val] __device__(index_t index) {
+            thrust::default_random_engine engine(
+                static_cast<thrust::default_random_engine::result_type>(index)
+            );
             thrust::uniform_int_distribution<std::int32_t> dist(min_val, max_val);
             return dist(engine);
         }
@@ -46,7 +57,9 @@ std::unique_ptr<cudf::column> random_column(
     rmm::cuda_stream_view stream,
     rmm::device_async_resource_ref mr
 ) {
-    auto vec = random_device_vector(nrows, min_val, max_val, stream, mr);
+    auto vec = random_device_vector(
+        rapidsmpf::safe_cast<std::size_t>(nrows), min_val, max_val, stream, mr
+    );
     return std::make_unique<cudf::column>(
         std::move(vec), rmm::device_buffer{0, stream, mr}, 0
     );
@@ -71,8 +84,13 @@ void random_fill(rapidsmpf::Buffer& buffer, rmm::device_async_resource_ref mr) {
     switch (buffer.mem_type()) {
     case rapidsmpf::MemoryType::DEVICE:
         {
+            auto const num_elements = std::max<std::size_t>(
+                std::size_t{1},
+                buffer.size / sizeof(random_data_t)
+                    + (buffer.size % sizeof(random_data_t) != 0)
+            );
             auto vec = random_device_vector(
-                buffer.size / sizeof(std::int32_t) + sizeof(std::int32_t),
+                num_elements,
                 std::numeric_limits<std::int32_t>::min(),
                 std::numeric_limits<std::int32_t>::max(),
                 buffer.stream(),
diff --git a/cpp/benchmarks/utils/random_data.hpp b/cpp/benchmarks/utils/random_data.hpp
index 5a6f66fec..983f086a1 100644
--- a/cpp/benchmarks/utils/random_data.hpp
+++ b/cpp/benchmarks/utils/random_data.hpp
@@ -4,11 +4,15 @@
  */
 #pragma once
 
+#include <cstddef>
+#include <cstdint>
+
 #include <cudf/column/column.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/types.hpp>
 
 #include <rapidsmpf/memory/buffer.hpp>
+#include <rapidsmpf/utils/misc.hpp>
 
 
 /**
@@ -25,7 +29,8 @@ using random_data_t = std::int32_t;
 std::size_t constexpr random_table_size_lower_bound(
     cudf::size_type ncolumns, cudf::size_type nrows
 ) {
-    return static_cast<std::size_t>(ncolumns * nrows) * sizeof(random_data_t);
+    return rapidsmpf::safe_cast<std::size_t>(ncolumns)
+           * rapidsmpf::safe_cast<std::size_t>(nrows) * sizeof(random_data_t);
 }
 
 /**
@@ -44,7 +49,7 @@ std::size_t constexpr random_table_size_lower_bound(
  * @note The function uses the specified CUDA stream for asynchronous operations.
  */
 rmm::device_uvector<std::int32_t> random_device_vector(
-    cudf::size_type nelem,
+    std::size_t nelem,
     std::int32_t min_val,
     std::int32_t max_val,
     rmm::cuda_stream_view stream,