rapidsai · rapids-bot · May 7, 2026 · May 4, 2026 · May 4, 2026 · May 5, 2026
@@ -100,6 +100,25 @@ class PinnedMemoryResource final
     /// @brief Sentinel value indicating that pinned host memory is disabled.
     static constexpr std::nullopt_t Disabled = std::nullopt;
 
+    /**
+     * @brief Fraction of total host memory per GPU used as the initial pinned pool size
+     *        when no explicit `pinned_initial_pool_size` option is provided.
+     *
+     * Applied as: `initial_pool_size = get_host_memory_per_gpu() *
+     * DefaultInitiPoolSizeFactor`.
+     */
+    static constexpr std::string_view DefaultInitiPoolSizeFactor = "10%";
+
+    /**
+     * @brief Fraction of total host memory per GPU used as the maximum pinned pool size
+     *        when no explicit `pinned_max_pool_size` option is provided.
+     *
+     * Applied as: `max_pool_size = get_host_memory_per_gpu() *
+     * DefaultMaxPoolSizeFactor`. `get_host_memory_per_gpu()` is computed as total
+     * host memory divided by the number of GPUs visible to the system.
+     */
+    static constexpr std::string_view DefaultMaxPoolSizeFactor = "80%";
+
     /**
      * @brief Create a pinned memory resource if the system supports pinned memory.
      *
@@ -118,6 +137,13 @@ class PinnedMemoryResource final
     /**
      * @brief Construct from configuration options.
      *
+     * Recognized options:
+     * - `pinned_memory` (bool): enables pinned memory; defaults to `true`.
+     * - `pinned_initial_pool_size` (nbytes string): initial pool size; defaults to
+     *   `get_host_memory_per_gpu() * DefaultInitiPoolSizeFactor`.
+     * - `pinned_max_pool_size` (nbytes string or empty): maximum pool size; defaults to
+     *   `get_host_memory_per_gpu() * DefaultMaxPoolSizeFactor`.
+     *
      * @param options Configuration options.
      *
      * @return A `PinnedMemoryResource` if pinned memory is enabled and supported,

@@ -79,5 +79,14 @@ std::vector<int> get_current_numa_nodes() noexcept;
  */
 std::uint64_t get_numa_node_host_memory(int numa_id = get_current_numa_node()) noexcept;
 
+/**
+ * @brief Get the amount of host memory per GPU, calculated as total host memory available
+ * to the current NUMA node divided by the number of GPUs bound to that NUMA node.
+ *
+ * @throws std::runtime_error if no GPUs are found on the current NUMA node.
+ *
+ * @return Amount of host memory per GPU in bytes.
+ */
+std::uint64_t get_host_memory_per_gpu();
 
 }  // namespace rapidsmpf
@@ -74,21 +74,27 @@ std::optional<PinnedMemoryResource> PinnedMemoryResource::from_options(
     config::Options options
 ) {
     bool const pinned_memory = options.get<bool>("pinned_memory", [](auto const& s) {
-        return parse_string<bool>(s.empty() ? "True" : s);
+        return s.empty() ? true : parse_string<bool>(s);
     });
+
     if (pinned_memory && is_pinned_memory_resources_supported()) {
+        auto const host_memory_per_gpu = get_host_memory_per_gpu();
         PinnedPoolProperties pool_properties{
             .initial_pool_size = options.get<size_t>(
                 "pinned_initial_pool_size",
-                [](auto const& s) { return s.empty() ? 0 : parse_nbytes_unsigned(s); }
+                [&](auto const& s) {
+                    return parse_nbytes_or_percent(
+                        s.empty() ? DefaultInitiPoolSizeFactor : s,
+                        safe_cast<double>(host_memory_per_gpu)
+                    );
+                }
             ),
             .max_pool_size = options.get<std::optional<size_t>>(
-                "pinned_max_pool_size", [](auto const& s) -> std::optional<size_t> {
-                    auto parsed = parse_optional(s);
-                    if (parsed.has_value() && !parsed->empty()) {
-                        return parse_nbytes_unsigned(*parsed);
-                    }
-                    return std::nullopt;
+                "pinned_max_pool_size", [&](auto const& s) {
+                    return parse_nbytes_or_percent(
+                        s.empty() ? DefaultMaxPoolSizeFactor : s,
+                        safe_cast<double>(host_memory_per_gpu)
+                    );
                 }
             )
         };

@@ -4,9 +4,14 @@
  */
 
 
+#include <algorithm>
+#include <optional>
+
 #include <sched.h>
 #include <unistd.h>
 
+#include <cucascade/memory/topology_discovery.hpp>
+
 #include <rapidsmpf/error.hpp>
 #include <rapidsmpf/system_info.hpp>
 
@@ -79,4 +84,36 @@ std::uint64_t get_numa_node_host_memory([[maybe_unused]] int numa_id) noexcept {
     return safe_cast<std::uint64_t>(ret);
 }
 
+namespace {
+const auto& get_topology() {
+    static const auto topo = [] -> std::optional<cucascade::memory::topology_discovery> {
+        cucascade::memory::topology_discovery discovery;
+        if (!discovery.discover()) {
+            return std::nullopt;
+        }
+        return discovery;
+    }();
+
+    RAPIDSMPF_EXPECTS(
+        topo.has_value(), "Failed to discover system topology", std::runtime_error
+    );
+    return topo->get_topology();
+}
+}  // namespace
+
+std::uint64_t get_host_memory_per_gpu() {
+    auto const current_numa_node = get_current_numa_node();
+    auto const& gpus = get_topology().gpus;
+    auto const num_local_gpus = std::ranges::count_if(gpus, [&](auto const& gpu) {
+        return gpu.numa_node == current_numa_node;
+    });
+    RAPIDSMPF_EXPECTS(
+        num_local_gpus > 0,
+        "get_host_memory_per_gpu(): no GPUs found on current NUMA node",
+        std::runtime_error
+    );
+    return get_numa_node_host_memory(current_numa_node)
+           / safe_cast<std::uint64_t>(num_local_gpus);
+}
+
 }  // namespace rapidsmpf
@@ -250,7 +250,6 @@ add_executable(
   "${PROJECT_SOURCE_DIR}/src/bootstrap/bootstrap.cpp"
   "${PROJECT_SOURCE_DIR}/src/bootstrap/file_backend.cpp"
   "${PROJECT_SOURCE_DIR}/src/bootstrap/utils.cpp"
-  "${PROJECT_SOURCE_DIR}/src/system_info.cpp"
   "$<$<BOOL:${RAPIDSMPF_HAVE_SLURM}>:${PROJECT_SOURCE_DIR}/src/bootstrap/slurm_backend.cpp>"
 )
 set_target_properties(

@@ -16,9 +16,12 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_buffer.hpp>
 
+#include <rapidsmpf/config.hpp>
 #include <rapidsmpf/cuda_stream.hpp>
 #include <rapidsmpf/memory/pinned_memory_resource.hpp>
+#include <rapidsmpf/system_info.hpp>
 #include <rapidsmpf/utils/misc.hpp>
+#include <rapidsmpf/utils/string.hpp>
 
 #include "utils.hpp"
 
@@ -265,7 +268,7 @@ std::size_t discover_pinned_pool_actual_size(
 
 }  // namespace
 
-TEST(PinnedResourceMaxSize, max_pool_size_limit) {
+TEST(PinnedResource, max_pool_size_limit) {
     // Ensure CUDA device context is initialized (required for pinned memory pools).
     RAPIDSMPF_CUDA_TRY(cudaFree(nullptr));
     auto stream = cudf::get_default_stream();
@@ -292,3 +295,25 @@ TEST(PinnedResourceMaxSize, max_pool_size_limit) {
     EXPECT_THROW(alloc_and_dealloc(actual_pool_size + 1), cuda::cuda_error);
     stream.synchronize();
 }
+
+TEST(PinnedResource, from_default_options) {
+    auto mr = rapidsmpf::PinnedMemoryResource::from_options(rapidsmpf::config::Options{});
+    if (mr == rapidsmpf::PinnedMemoryResource::Disabled) {
+        GTEST_SKIP() << "PinnedMemoryResource is not supported";
+    }
+    EXPECT_EQ(
+        mr->properties().initial_pool_size,
+        rapidsmpf::parse_nbytes_or_percent(
+            rapidsmpf::PinnedMemoryResource::DefaultInitiPoolSizeFactor,
+            static_cast<double>(rapidsmpf::get_host_memory_per_gpu())
+        )
+    );
+    EXPECT_EQ(
+        mr->properties().max_pool_size.value(),
+        rapidsmpf::parse_nbytes_or_percent(
+            rapidsmpf::PinnedMemoryResource::DefaultMaxPoolSizeFactor,
+            static_cast<double>(rapidsmpf::get_host_memory_per_gpu())
+
+        )
+    );
+}
diff --git a/docs/source/configuration.md b/docs/source/configuration.md
@@ -96,7 +96,7 @@ rapidsmpf::config::Options options{rapidsmpf::config::get_environment_variables(
 
 - **`pinned_memory`**
   - **Environment Variable**: `RAPIDSMPF_PINNED_MEMORY`
-  - **Default**: `false`
+  - **Default**: `true`
   - **Description**: Enables pinned host memory if it is available on the system.
     Pinned host memory provides higher bandwidth and lower latency for device-to-host
     transfers compared to regular pageable host memory. When enabled, RapidsMPF
@@ -105,17 +105,19 @@ rapidsmpf::config::Options options{rapidsmpf::config::get_environment_variables(
 
 - **`pinned_initial_pool_size`**
   - **Environment Variable**: `RAPIDSMPF_PINNED_INITIAL_POOL_SIZE`
-  - **Default**: `0`
-  - **Description**: Initial size (in bytes) of the pinned host memory pool when
-    `pinned_memory` is enabled. A value of `0` means the pool starts empty and grows
-    on demand. Accepts byte counts (e.g. `"1GiB"`, `"512MiB"`).
+  - **Default**: 10% of per-GPU host memory
+  - **Description**: Initial size of the pinned host memory pool when `pinned_memory` is
+    enabled. When unset or empty, the pool is pre-allocated to 10% of total host memory 
+    available in the current NUMA node divided by the number of GPUs in that NUMA node. 
+    Accepts byte counts or percentage (e.g. `"1GiB"`, `"512MiB"`).
 
 - **`pinned_max_pool_size`**
   - **Environment Variable**: `RAPIDSMPF_PINNED_MAX_POOL_SIZE`
-  - **Default**: `"disabled"`
-  - **Description**: Maximum size (in bytes) of the pinned host memory pool when
-    `pinned_memory` is enabled. When unset or empty, the pool is allowed to grow
-    without an upper bound. Accepts byte counts (e.g. `"4GiB"`, `"2048MiB"`).
+  - **Default**: 80% of per-GPU host memory
+  - **Description**: Maximum size of the pinned host memory pool when `pinned_memory` is
+    enabled. When unset or empty, the pool is capped at 80% of total host memory 
+    available in the current NUMA node divided by the number of GPUs in that NUMA node.  
+    Accepts byte counts or percentage (e.g. `"4GiB"`, `"2048MiB"`).
 
 - **`spill_device_limit`**
   - **Environment Variable**: `RAPIDSMPF_SPILL_DEVICE_LIMIT`

@@ -4,3 +4,4 @@
 def get_total_host_memory() -> int: ...
 def get_current_numa_node() -> int: ...
 def get_numa_node_host_memory(numa_id: int | None = None) -> int: ...
+def get_host_memory_per_gpu() -> int: ...
@@ -14,6 +14,9 @@ cdef extern from "<rapidsmpf/system_info.hpp>" nogil:
     cdef uint64_t cpp_get_numa_node_host_memory \
         "rapidsmpf::get_numa_node_host_memory"(int numa_id) noexcept
 
+    cdef uint64_t cpp_get_host_memory_per_gpu \
+        "rapidsmpf::get_host_memory_per_gpu"() except+
+
 
 def get_total_host_memory():
     """
@@ -84,3 +87,23 @@ def get_numa_node_host_memory(numa_id = None):
     else:
         _numa_id = numa_id
     return cpp_get_numa_node_host_memory(_numa_id)
+
+
+def get_host_memory_per_gpu():
+    """
+    Get the total host memory divided by the number of GPUs in the system.
+
+    Returns the amount of host memory attributed to each GPU, computed as
+    total host memory divided by the number of GPUs visible to the system.
+    Falls back to total host memory when only one GPU (or none) is present.
+
+    Returns
+    -------
+    Host memory per GPU in bytes.
+
+    Raises
+    ------
+    RuntimeError
+        If system topology discovery fails.
+    """
+    return cpp_get_host_memory_per_gpu()