Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions cpp/include/rapidsmpf/memory/pinned_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,25 @@ class PinnedMemoryResource final
/// @brief Sentinel value indicating that pinned host memory is disabled.
static constexpr std::nullopt_t Disabled = std::nullopt;

/**
* @brief Fraction of total host memory per GPU used as the initial pinned pool size
* when no explicit `pinned_initial_pool_size` option is provided.
*
* Applied as: `initial_pool_size = get_host_memory_per_gpu() *
* DefaultInitiPoolSizeFactor`.
*/
static constexpr std::string_view DefaultInitiPoolSizeFactor = "10%";

/**
* @brief Fraction of total host memory per GPU used as the maximum pinned pool size
* when no explicit `pinned_max_pool_size` option is provided.
*
* Applied as: `max_pool_size = get_host_memory_per_gpu() *
* DefaultMaxPoolSizeFactor`. `get_host_memory_per_gpu()` is computed as total
* host memory divided by the number of GPUs visible to the system.
*/
static constexpr std::string_view DefaultMaxPoolSizeFactor = "80%";

/**
* @brief Create a pinned memory resource if the system supports pinned memory.
*
Expand All @@ -118,6 +137,13 @@ class PinnedMemoryResource final
/**
* @brief Construct from configuration options.
*
* Recognized options:
* - `pinned_memory` (bool): enables pinned memory; defaults to `true`.
* - `pinned_initial_pool_size` (nbytes string): initial pool size; defaults to
* `get_host_memory_per_gpu() * DefaultInitiPoolSizeFactor`.
* - `pinned_max_pool_size` (nbytes string or empty): maximum pool size; defaults to
* `get_host_memory_per_gpu() * DefaultMaxPoolSizeFactor`.
*
* @param options Configuration options.
*
* @return A `PinnedMemoryResource` if pinned memory is enabled and supported,
Expand Down
9 changes: 9 additions & 0 deletions cpp/include/rapidsmpf/system_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,5 +79,14 @@ std::vector<int> get_current_numa_nodes() noexcept;
*/
std::uint64_t get_numa_node_host_memory(int numa_id = get_current_numa_node()) noexcept;

/**
* @brief Get the amount of host memory per GPU, calculated as total host memory available
* to the current NUMA node divided by the number of GPUs bound to that NUMA node.
*
* @throws std::runtime_error if no GPUs are found on the current NUMA node.
*
Comment thread
nirandaperera marked this conversation as resolved.
* @return Amount of host memory per GPU in bytes.
*/
std::uint64_t get_host_memory_per_gpu();

} // namespace rapidsmpf
22 changes: 14 additions & 8 deletions cpp/src/memory/pinned_memory_resource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,21 +74,27 @@ std::optional<PinnedMemoryResource> PinnedMemoryResource::from_options(
config::Options options
) {
bool const pinned_memory = options.get<bool>("pinned_memory", [](auto const& s) {
return parse_string<bool>(s.empty() ? "True" : s);
return s.empty() ? true : parse_string<bool>(s);
});

if (pinned_memory && is_pinned_memory_resources_supported()) {
auto const host_memory_per_gpu = get_host_memory_per_gpu();
PinnedPoolProperties pool_properties{
.initial_pool_size = options.get<size_t>(
"pinned_initial_pool_size",
[](auto const& s) { return s.empty() ? 0 : parse_nbytes_unsigned(s); }
[&](auto const& s) {
return parse_nbytes_or_percent(
s.empty() ? DefaultInitiPoolSizeFactor : s,
safe_cast<double>(host_memory_per_gpu)
);
}
),
.max_pool_size = options.get<std::optional<size_t>>(
"pinned_max_pool_size", [](auto const& s) -> std::optional<size_t> {
auto parsed = parse_optional(s);
if (parsed.has_value() && !parsed->empty()) {
return parse_nbytes_unsigned(*parsed);
}
return std::nullopt;
"pinned_max_pool_size", [&](auto const& s) {
return parse_nbytes_or_percent(
s.empty() ? DefaultMaxPoolSizeFactor : s,
safe_cast<double>(host_memory_per_gpu)
);
}
)
};
Expand Down
37 changes: 37 additions & 0 deletions cpp/src/system_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,14 @@
*/


#include <algorithm>
#include <optional>

#include <sched.h>
#include <unistd.h>

#include <cucascade/memory/topology_discovery.hpp>

#include <rapidsmpf/error.hpp>
#include <rapidsmpf/system_info.hpp>

Expand Down Expand Up @@ -79,4 +84,36 @@ std::uint64_t get_numa_node_host_memory([[maybe_unused]] int numa_id) noexcept {
return safe_cast<std::uint64_t>(ret);
}

namespace {
const auto& get_topology() {
static const auto topo = [] -> std::optional<cucascade::memory::topology_discovery> {
cucascade::memory::topology_discovery discovery;
if (!discovery.discover()) {
return std::nullopt;
}
return discovery;
}();

RAPIDSMPF_EXPECTS(
topo.has_value(), "Failed to discover system topology", std::runtime_error
);
return topo->get_topology();
}
} // namespace

std::uint64_t get_host_memory_per_gpu() {
auto const current_numa_node = get_current_numa_node();
auto const& gpus = get_topology().gpus;
auto const num_local_gpus = std::ranges::count_if(gpus, [&](auto const& gpu) {
return gpu.numa_node == current_numa_node;
});
RAPIDSMPF_EXPECTS(
num_local_gpus > 0,
"get_host_memory_per_gpu(): no GPUs found on current NUMA node",
std::runtime_error
);
return get_numa_node_host_memory(current_numa_node)
/ safe_cast<std::uint64_t>(num_local_gpus);
}

} // namespace rapidsmpf
1 change: 0 additions & 1 deletion cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,6 @@ add_executable(
"${PROJECT_SOURCE_DIR}/src/bootstrap/bootstrap.cpp"
"${PROJECT_SOURCE_DIR}/src/bootstrap/file_backend.cpp"
"${PROJECT_SOURCE_DIR}/src/bootstrap/utils.cpp"
"${PROJECT_SOURCE_DIR}/src/system_info.cpp"
"$<$<BOOL:${RAPIDSMPF_HAVE_SLURM}>:${PROJECT_SOURCE_DIR}/src/bootstrap/slurm_backend.cpp>"
)
set_target_properties(
Expand Down
27 changes: 26 additions & 1 deletion cpp/tests/test_host_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@
#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_buffer.hpp>

#include <rapidsmpf/config.hpp>
#include <rapidsmpf/cuda_stream.hpp>
#include <rapidsmpf/memory/pinned_memory_resource.hpp>
#include <rapidsmpf/system_info.hpp>
#include <rapidsmpf/utils/misc.hpp>
#include <rapidsmpf/utils/string.hpp>

#include "utils.hpp"

Expand Down Expand Up @@ -265,7 +268,7 @@ std::size_t discover_pinned_pool_actual_size(

} // namespace

TEST(PinnedResourceMaxSize, max_pool_size_limit) {
TEST(PinnedResource, max_pool_size_limit) {
// Ensure CUDA device context is initialized (required for pinned memory pools).
RAPIDSMPF_CUDA_TRY(cudaFree(nullptr));
auto stream = cudf::get_default_stream();
Expand All @@ -292,3 +295,25 @@ TEST(PinnedResourceMaxSize, max_pool_size_limit) {
EXPECT_THROW(alloc_and_dealloc(actual_pool_size + 1), cuda::cuda_error);
stream.synchronize();
}

TEST(PinnedResource, from_default_options) {
auto mr = rapidsmpf::PinnedMemoryResource::from_options(rapidsmpf::config::Options{});
if (mr == rapidsmpf::PinnedMemoryResource::Disabled) {
GTEST_SKIP() << "PinnedMemoryResource is not supported";
}
EXPECT_EQ(
mr->properties().initial_pool_size,
rapidsmpf::parse_nbytes_or_percent(
rapidsmpf::PinnedMemoryResource::DefaultInitiPoolSizeFactor,
static_cast<double>(rapidsmpf::get_host_memory_per_gpu())
)
);
EXPECT_EQ(
mr->properties().max_pool_size.value(),
rapidsmpf::parse_nbytes_or_percent(
rapidsmpf::PinnedMemoryResource::DefaultMaxPoolSizeFactor,
static_cast<double>(rapidsmpf::get_host_memory_per_gpu())

)
);
}
20 changes: 11 additions & 9 deletions docs/source/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ rapidsmpf::config::Options options{rapidsmpf::config::get_environment_variables(

- **`pinned_memory`**
- **Environment Variable**: `RAPIDSMPF_PINNED_MEMORY`
- **Default**: `false`
- **Default**: `true`
- **Description**: Enables pinned host memory if it is available on the system.
Pinned host memory provides higher bandwidth and lower latency for device-to-host
transfers compared to regular pageable host memory. When enabled, RapidsMPF
Expand All @@ -105,17 +105,19 @@ rapidsmpf::config::Options options{rapidsmpf::config::get_environment_variables(

- **`pinned_initial_pool_size`**
- **Environment Variable**: `RAPIDSMPF_PINNED_INITIAL_POOL_SIZE`
- **Default**: `0`
- **Description**: Initial size (in bytes) of the pinned host memory pool when
`pinned_memory` is enabled. A value of `0` means the pool starts empty and grows
on demand. Accepts byte counts (e.g. `"1GiB"`, `"512MiB"`).
- **Default**: 10% of per-GPU host memory
- **Description**: Initial size of the pinned host memory pool when `pinned_memory` is
enabled. When unset or empty, the pool is pre-allocated to 10% of total host memory
available in the current NUMA node divided by the number of GPUs in that NUMA node.
Accepts byte counts or percentage (e.g. `"1GiB"`, `"512MiB"`).

- **`pinned_max_pool_size`**
- **Environment Variable**: `RAPIDSMPF_PINNED_MAX_POOL_SIZE`
- **Default**: `"disabled"`
- **Description**: Maximum size (in bytes) of the pinned host memory pool when
`pinned_memory` is enabled. When unset or empty, the pool is allowed to grow
without an upper bound. Accepts byte counts (e.g. `"4GiB"`, `"2048MiB"`).
- **Default**: 80% of per-GPU host memory
- **Description**: Maximum size of the pinned host memory pool when `pinned_memory` is
enabled. When unset or empty, the pool is capped at 80% of total host memory
available in the current NUMA node divided by the number of GPUs in that NUMA node.
Accepts byte counts or percentage (e.g. `"4GiB"`, `"2048MiB"`).

- **`spill_device_limit`**
- **Environment Variable**: `RAPIDSMPF_SPILL_DEVICE_LIMIT`
Expand Down
1 change: 1 addition & 0 deletions python/rapidsmpf/rapidsmpf/utils/system_info.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
def get_total_host_memory() -> int: ...
def get_current_numa_node() -> int: ...
def get_numa_node_host_memory(numa_id: int | None = None) -> int: ...
def get_host_memory_per_gpu() -> int: ...
23 changes: 23 additions & 0 deletions python/rapidsmpf/rapidsmpf/utils/system_info.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ cdef extern from "<rapidsmpf/system_info.hpp>" nogil:
cdef uint64_t cpp_get_numa_node_host_memory \
"rapidsmpf::get_numa_node_host_memory"(int numa_id) noexcept

cdef uint64_t cpp_get_host_memory_per_gpu \
"rapidsmpf::get_host_memory_per_gpu"() except+


def get_total_host_memory():
"""
Expand Down Expand Up @@ -84,3 +87,23 @@ def get_numa_node_host_memory(numa_id = None):
else:
_numa_id = numa_id
return cpp_get_numa_node_host_memory(_numa_id)


def get_host_memory_per_gpu():
"""
Get the total host memory divided by the number of GPUs in the system.

Returns the amount of host memory attributed to each GPU, computed as
total host memory divided by the number of GPUs visible to the system.
Falls back to total host memory when only one GPU (or none) is present.

Returns
-------
Host memory per GPU in bytes.

Raises
------
RuntimeError
If system topology discovery fails.
"""
return cpp_get_host_memory_per_gpu()
Loading