rapidsai · rapids-bot · May 7, 2026 · May 4, 2026 · May 4, 2026 · May 5, 2026
@@ -100,6 +100,25 @@ class PinnedMemoryResource final
     /// @brief Sentinel value indicating that pinned host memory is disabled.
     static constexpr std::nullopt_t Disabled = std::nullopt;
 
+    /**
+     * @brief Fraction of total host memory per GPU used as the initial pinned pool size
+     *        when no explicit `pinned_initial_pool_size` option is provided.
+     *
+     * Applied as: `initial_pool_size = get_host_memory_per_gpu() *
+     * DEFAULT_INIT_POOL_SIZE_FACTOR`.
+     */
+    static constexpr double DEFAULT_INIT_POOL_SIZE_FACTOR = 0.1;
+
+    /**
+     * @brief Fraction of total host memory per GPU used as the maximum pinned pool size
+     *        when no explicit `pinned_max_pool_size` option is provided.
+     *
+     * Applied as: `max_pool_size = get_host_memory_per_gpu() *
+     * DEFAULT_MAX_POOL_SIZE_FACTOR`. `get_host_memory_per_gpu()` is computed as total
+     * host memory divided by the number of GPUs visible to the system.
+     */
+    static constexpr double DEFAULT_MAX_POOL_SIZE_FACTOR = 0.8;
+
     /**
      * @brief Create a pinned memory resource if the system supports pinned memory.
      *
@@ -118,6 +137,13 @@ class PinnedMemoryResource final
     /**
      * @brief Construct from configuration options.
      *
+     * Recognized options:
+     * - `pinned_memory` (bool): enables pinned memory; defaults to `true`.
+     * - `pinned_initial_pool_size` (nbytes string): initial pool size; defaults to
+     *   `get_host_memory_per_gpu() * DEFAULT_INIT_POOL_SIZE_FACTOR`.
+     * - `pinned_max_pool_size` (nbytes string or empty): maximum pool size; defaults to
+     *   `get_host_memory_per_gpu() * DEFAULT_MAX_POOL_SIZE_FACTOR`.
+     *
      * @param options Configuration options.
      *
      * @return A `PinnedMemoryResource` if pinned memory is enabled and supported,

@@ -79,5 +79,11 @@ std::vector<int> get_current_numa_nodes() noexcept;
  */
 std::uint64_t get_numa_node_host_memory(int numa_id = get_current_numa_node()) noexcept;
 
+/**
+ * @brief Get the amount of host memory per GPU.
+ *
+ * @return Amount of host memory per GPU in bytes.
+ */
+std::uint64_t get_host_memory_per_gpu();
 
 }  // namespace rapidsmpf
@@ -74,21 +74,31 @@ std::optional<PinnedMemoryResource> PinnedMemoryResource::from_options(
     config::Options options
 ) {
     bool const pinned_memory = options.get<bool>("pinned_memory", [](auto const& s) {
-        return parse_string<bool>(s.empty() ? "True" : s);
+        return s.empty() ? true : parse_string<bool>(s);
     });
+
     if (pinned_memory && is_pinned_memory_resources_supported()) {
         PinnedPoolProperties pool_properties{
             .initial_pool_size = options.get<size_t>(
                 "pinned_initial_pool_size",
-                [](auto const& s) { return s.empty() ? 0 : parse_nbytes_unsigned(s); }
+                [](auto const& s) {
+                    return s.empty() ? safe_cast<size_t>(
+                                           get_host_memory_per_gpu()
+                                           * DEFAULT_INIT_POOL_SIZE_FACTOR
+                                       )
+                                     : parse_nbytes_unsigned(s);
+                }
             ),
             .max_pool_size = options.get<std::optional<size_t>>(
                 "pinned_max_pool_size", [](auto const& s) -> std::optional<size_t> {
-                    auto parsed = parse_optional(s);
+                    const auto parsed = parse_optional(s);
                     if (parsed.has_value() && !parsed->empty()) {
                         return parse_nbytes_unsigned(*parsed);
+                    } else {
+                        return safe_cast<size_t>(
+                            get_host_memory_per_gpu() * DEFAULT_MAX_POOL_SIZE_FACTOR
+                        );
                     }
-                    return std::nullopt;
                 }
             )
         };

@@ -7,6 +7,8 @@
 #include <sched.h>
 #include <unistd.h>
 
+#include <cucascade/memory/topology_discovery.hpp>
+
 #include <rapidsmpf/error.hpp>
 #include <rapidsmpf/system_info.hpp>
 
@@ -79,4 +81,24 @@ std::uint64_t get_numa_node_host_memory([[maybe_unused]] int numa_id) noexcept {
     return safe_cast<std::uint64_t>(ret);
 }
 
+namespace {
+const auto& get_topology() {
+    static const auto topo = [] {
+        cucascade::memory::topology_discovery discovery;
+        RAPIDSMPF_EXPECTS(
+            discovery.discover(),
+            "get_host_memory_per_gpu(): failed to discover system topology",
+            std::runtime_error
+        );
+        return discovery;
+    }();
+    return topo.get_topology();
+}
+}  // namespace
+
+std::uint64_t get_host_memory_per_gpu() {
+    auto const num_gpus = get_topology().num_gpus;
+    return get_total_host_memory() / std::max<std::uint64_t>(1, num_gpus);
+}
+
 }  // namespace rapidsmpf
@@ -250,7 +250,6 @@ add_executable(
   "${PROJECT_SOURCE_DIR}/src/bootstrap/bootstrap.cpp"
   "${PROJECT_SOURCE_DIR}/src/bootstrap/file_backend.cpp"
   "${PROJECT_SOURCE_DIR}/src/bootstrap/utils.cpp"
-  "${PROJECT_SOURCE_DIR}/src/system_info.cpp"
   "$<$<BOOL:${RAPIDSMPF_HAVE_SLURM}>:${PROJECT_SOURCE_DIR}/src/bootstrap/slurm_backend.cpp>"
 )
 set_target_properties(

@@ -16,6 +16,7 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_buffer.hpp>
 
+#include <rapidsmpf/config.hpp>
 #include <rapidsmpf/cuda_stream.hpp>
 #include <rapidsmpf/memory/pinned_memory_resource.hpp>
 #include <rapidsmpf/utils/misc.hpp>
@@ -265,7 +266,7 @@ std::size_t discover_pinned_pool_actual_size(
 
 }  // namespace
 
-TEST(PinnedResourceMaxSize, max_pool_size_limit) {
+TEST(PinnedResource, max_pool_size_limit) {
     // Ensure CUDA device context is initialized (required for pinned memory pools).
     RAPIDSMPF_CUDA_TRY(cudaFree(nullptr));
     auto stream = cudf::get_default_stream();
@@ -292,3 +293,24 @@ TEST(PinnedResourceMaxSize, max_pool_size_limit) {
     EXPECT_THROW(alloc_and_dealloc(actual_pool_size + 1), cuda::cuda_error);
     stream.synchronize();
 }
+
+TEST(PinnedResource, from_default_options) {
+    auto mr = rapidsmpf::PinnedMemoryResource::from_options(rapidsmpf::config::Options{});
+    if (mr == rapidsmpf::PinnedMemoryResource::Disabled) {
+        GTEST_SKIP() << "PinnedMemoryResource is not supported";
+    }
+    EXPECT_EQ(
+        mr->properties().initial_pool_size,
+        rapidsmpf::safe_cast<std::size_t>(
+            rapidsmpf::get_host_memory_per_gpu()
+            * rapidsmpf::PinnedMemoryResource::DEFAULT_INIT_POOL_SIZE_FACTOR
+        )
+    );
+    EXPECT_EQ(
+        mr->properties().max_pool_size.value(),
+        rapidsmpf::safe_cast<std::size_t>(
+            rapidsmpf::get_host_memory_per_gpu()
+            * rapidsmpf::PinnedMemoryResource::DEFAULT_MAX_POOL_SIZE_FACTOR
+        )
+    );
+}
diff --git a/docs/source/configuration.md b/docs/source/configuration.md
@@ -96,7 +96,7 @@ rapidsmpf::config::Options options{rapidsmpf::config::get_environment_variables(
 
 - **`pinned_memory`**
   - **Environment Variable**: `RAPIDSMPF_PINNED_MEMORY`
-  - **Default**: `false`
+  - **Default**: `true`
   - **Description**: Enables pinned host memory if it is available on the system.
     Pinned host memory provides higher bandwidth and lower latency for device-to-host
     transfers compared to regular pageable host memory. When enabled, RapidsMPF
@@ -105,17 +105,19 @@ rapidsmpf::config::Options options{rapidsmpf::config::get_environment_variables(
 
 - **`pinned_initial_pool_size`**
   - **Environment Variable**: `RAPIDSMPF_PINNED_INITIAL_POOL_SIZE`
-  - **Default**: `0`
-  - **Description**: Initial size (in bytes) of the pinned host memory pool when
-    `pinned_memory` is enabled. A value of `0` means the pool starts empty and grows
-    on demand. Accepts byte counts (e.g. `"1GiB"`, `"512MiB"`).
+  - **Default**: 10% of per-GPU host memory (`get_host_memory_per_gpu() * 0.1`)
+  - **Description**: Initial size of the pinned host memory pool when `pinned_memory` is
+    enabled. When unset or empty, the pool is pre-allocated to 10% of total host memory
+    divided by the number of GPUs in the system. Accepts byte counts
+    (e.g. `"1GiB"`, `"512MiB"`).
 
 - **`pinned_max_pool_size`**
   - **Environment Variable**: `RAPIDSMPF_PINNED_MAX_POOL_SIZE`
-  - **Default**: `"disabled"`
-  - **Description**: Maximum size (in bytes) of the pinned host memory pool when
-    `pinned_memory` is enabled. When unset or empty, the pool is allowed to grow
-    without an upper bound. Accepts byte counts (e.g. `"4GiB"`, `"2048MiB"`).
+  - **Default**: 80% of per-GPU host memory (`get_host_memory_per_gpu() * 0.8`)
+  - **Description**: Maximum size of the pinned host memory pool when `pinned_memory` is
+    enabled. When unset or empty, the pool is capped at 80% of total host memory divided
+    by the number of GPUs in the system. Accepts byte counts
+    (e.g. `"4GiB"`, `"2048MiB"`).
 
 - **`spill_device_limit`**
   - **Environment Variable**: `RAPIDSMPF_SPILL_DEVICE_LIMIT`

@@ -4,3 +4,4 @@
 def get_total_host_memory() -> int: ...
 def get_current_numa_node() -> int: ...
 def get_numa_node_host_memory(numa_id: int | None = None) -> int: ...
+def get_host_memory_per_gpu() -> int: ...
@@ -14,6 +14,9 @@ cdef extern from "<rapidsmpf/system_info.hpp>" nogil:
     cdef uint64_t cpp_get_numa_node_host_memory \
         "rapidsmpf::get_numa_node_host_memory"(int numa_id) noexcept
 
+    cdef uint64_t cpp_get_host_memory_per_gpu \
+        "rapidsmpf::get_host_memory_per_gpu"() except+
+
 
 def get_total_host_memory():
     """
@@ -84,3 +87,24 @@ def get_numa_node_host_memory(numa_id = None):
     else:
         _numa_id = numa_id
     return cpp_get_numa_node_host_memory(_numa_id)
+
+
+def get_host_memory_per_gpu():
+    """
+    Get the total host memory divided by the number of GPUs in the system.
+
+    Returns the amount of host memory attributed to each GPU, computed as
+    total host memory divided by the number of GPUs visible to the system.
+    Falls back to total host memory when only one GPU (or none) is present.
+
+    Returns
+    -------
+    int
+        Host memory per GPU in bytes.
+
+    Raises
+    ------
+    RuntimeError
+        If system topology discovery fails.
+    """
+    return cpp_get_host_memory_per_gpu()