Skip to content
Open
5 changes: 5 additions & 0 deletions gloo/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ list(APPEND GLOO_HDRS
"${CMAKE_CURRENT_SOURCE_DIR}/types.h"
)

if(NOT MSVC AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm)")
list(APPEND GLOO_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/allreduce_shm.cc")
list(APPEND GLOO_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/allreduce_shm.h")
endif()

if(USE_CUDA)
file(GLOB GLOO_CUDA_SRCS
"${CMAKE_CURRENT_SOURCE_DIR}/cuda*.cc"
Expand Down
20 changes: 19 additions & 1 deletion gloo/allreduce.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,12 @@
#include <array>
#include <cstring>

#if GLOO_SHM_ALLREDUCE_APPLICABLE
#include "gloo/allreduce_shm.h"
#endif
#include "gloo/common/logging.h"
#include "gloo/math.h"
#include "gloo/transport/device.h"
#include "gloo/types.h"

namespace gloo {
Expand Down Expand Up @@ -131,14 +135,28 @@ void allreduce(const detail::AllreduceOptionsImpl& opts) {
return;
}

switch (opts.algorithm) {
auto algorithm = opts.algorithm;

#if GLOO_SHM_ALLREDUCE_APPLICABLE
if (algorithm == detail::AllreduceOptionsImpl::UNSPECIFIED &&
context->isIntraNode() && !context->getDevice()->hasGPUDirect()) {
algorithm = detail::AllreduceOptionsImpl::SHM;

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I dont see users to be able to use explicit algorithm - this will override anything user explicitly specifies. should we check Algorithm::UNSPECIFIED before we override?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've modified it to make sure it will override Algorithm::UNSPECIFIED only when shm allreduce is applicable. Also I added unit test for shm allreduce in gloo/test/allreduce_test.cc

}
#endif

switch (algorithm) {
case detail::AllreduceOptionsImpl::UNSPECIFIED:
case detail::AllreduceOptionsImpl::RING:
ring(opts, reduceInputs, broadcastOutputs);
break;
case detail::AllreduceOptionsImpl::BCUBE:
bcube(opts, reduceInputs, broadcastOutputs);
break;
#if GLOO_SHM_ALLREDUCE_APPLICABLE
case detail::AllreduceOptionsImpl::SHM:
shm(opts);
break;
#endif
default:
GLOO_ENFORCE(false, "Algorithm not handled.");
}
Expand Down
7 changes: 7 additions & 0 deletions gloo/allreduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ namespace gloo {

namespace detail {

#if !defined(_WIN32) && !defined(__aarch64__) && !defined(__arm__)
#define GLOO_SHM_ALLREDUCE_APPLICABLE 1
#else
#define GLOO_SHM_ALLREDUCE_APPLICABLE 0
#endif

struct AllreduceOptionsImpl {
// This type describes the function to use for element wise reduction.
//
Expand All @@ -39,6 +45,7 @@ struct AllreduceOptionsImpl {
UNSPECIFIED = 0,
RING = 1,
BCUBE = 2,
SHM = 3,
};

explicit AllreduceOptionsImpl(const std::shared_ptr<Context>& context)
Expand Down
Loading
Loading