diff --git a/gloo/test/base_test.h b/gloo/test/base_test.h index 307b4a8a6..7da5cfdea 100644 --- a/gloo/test/base_test.h +++ b/gloo/test/base_test.h @@ -10,6 +10,7 @@ #include +#include #include #include #include @@ -112,6 +113,13 @@ class BaseTest : public ::testing::Test { device_creator, std::function)> fn, int base = 2) { + // Track whether workers found the transport unavailable so we can call + // GTEST_SKIP() from the main thread after joining. GTEST_SKIP() is not + // thread-safe and must not be called from worker threads — doing so + // causes a data race on GTest internals that can manifest as + // "terminate called recursively" (SIGABRT / exit code 134). + std::atomic transportUnavailable{false}; + Barrier barrier(size); auto store = std::make_shared<::gloo::rendezvous::HashStore>(); @@ -119,11 +127,11 @@ class BaseTest : public ::testing::Test { auto context = std::make_shared<::gloo::rendezvous::Context>(rank, size, base); - // Create device per thread to avoid collisions then they are using the + // Create device per thread to avoid collisions when they are using the // socket address. auto device = device_creator(transport); if (!device) { - GTEST_SKIP() << "Skipping test: transport not available"; + transportUnavailable.store(true); return; } context->connectFullMesh(store, device); @@ -150,6 +158,10 @@ class BaseTest : public ::testing::Test { context->closeConnections(); } }); + + if (transportUnavailable.load()) { + GTEST_SKIP() << "Skipping test: transport not available"; + } } void spawn( diff --git a/gloo/transport/ibverbs/device.cc b/gloo/transport/ibverbs/device.cc index 48bd2782a..65cacc4d6 100644 --- a/gloo/transport/ibverbs/device.cc +++ b/gloo/transport/ibverbs/device.cc @@ -154,14 +154,22 @@ Device::~Device() { done_ = true; loop_->join(); + // Log errors instead of throwing — destructors are implicitly noexcept + // in C++11+, so a throw here calls std::terminate(). rv = ibv_destroy_comp_channel(comp_channel_); - GLOO_ENFORCE_EQ(rv, 0, strerror(errno)); + if (rv != 0) { + GLOO_ERROR("ibv_destroy_comp_channel: ", strerror(errno)); + } rv = ibv_dealloc_pd(pd_); - GLOO_ENFORCE_EQ(rv, 0, strerror(errno)); + if (rv != 0) { + GLOO_ERROR("ibv_dealloc_pd: ", strerror(errno)); + } rv = ibv_close_device(context_); - GLOO_ENFORCE_EQ(rv, 0, strerror(errno)); + if (rv != 0) { + GLOO_ERROR("ibv_close_device: ", strerror(errno)); + } } std::string Device::str() const {