Bump gloo

Summary:
Latest version of Gloo takes care of MPI_Init/MPI_Finalize for us, so
this commit removes handling that from caffe2/contrib/gloo. It also
imports CMake NCCL module changes from Gloo to stay consistent and
allow setting NCCL_INCLUDE_DIR and NCCL_LIB_DIR separately.
Closes https://github.com/caffe2/caffe2/pull/1295

Reviewed By: dzhulgakov

Differential Revision: D5979364

Pulled By: pietern

fbshipit-source-id: 794b00b0a445317c30a13cc8f0f4dc38e590cc77
This commit is contained in:
Pieter Noordhuis
2017-10-05 16:36:49 -07:00
committed by Facebook Github Bot
parent 225de6628c
commit db06e91097
5 changed files with 11 additions and 54 deletions

View File

@ -19,14 +19,6 @@
#include "caffe2/core/logging.h"
#include "caffe2/core/tensor.h"
#if defined(GLOO_USE_MPI) && GLOO_USE_MPI
#include <mutex>
#endif
#if defined(GLOO_USE_MPI) && GLOO_USE_MPI
#include <mpi.h>
#endif
#include <gloo/transport/tcp/device.h>
#if defined(GLOO_USE_IBVERBS) && GLOO_USE_IBVERBS
#include <gloo/transport/ibverbs/device.h>
@ -68,26 +60,5 @@ std::shared_ptr<::gloo::transport::Device> createDevice(
CAFFE_THROW("Invalid transport: ", attr.transport);
}
#if defined(GLOO_USE_MPI) && GLOO_USE_MPI
static std::mutex mpiMutex;
static int mpiRefs = 0;
void mpiInitialize() {
std::lock_guard<std::mutex> lock(mpiMutex);
if (mpiRefs++ == 0) {
auto rv = MPI_Init(nullptr, nullptr);
CAFFE_ENFORCE_EQ(rv, 0, "MPI_Init() failed");
}
}
void mpiFinalize() {
std::lock_guard<std::mutex> lock(mpiMutex);
if (--mpiRefs == 0) {
auto rv = MPI_Finalize();
CAFFE_ENFORCE_EQ(rv, 0, "MPI_Finalize() failed");
}
}
#endif
} // namespace gloo
} // namespace caffe2

View File

@ -40,10 +40,5 @@ struct createDeviceAttr {
std::shared_ptr<::gloo::transport::Device> createDevice(
const createDeviceAttr attr);
#if defined(GLOO_USE_MPI) && GLOO_USE_MPI
void mpiInitialize();
void mpiFinalize();
#endif
} // namespace gloo
} // namespace caffe2

View File

@ -63,25 +63,14 @@ class CreateCommonWorld final : public Operator<Context> {
ws_->CreateBlob(status_blob_);
}
initialize();
#if defined(GLOO_USE_MPI) && GLOO_USE_MPI
if (mpi_rendezvous_) {
mpiInitialize();
}
#endif
}
virtual ~CreateCommonWorld() {
#if defined(GLOO_USE_MPI) && GLOO_USE_MPI
if (mpi_rendezvous_) {
mpiFinalize();
}
#endif
}
CommonWorld rendezvousWithMPI() {
#if defined(GLOO_USE_MPI) && GLOO_USE_MPI
auto context = std::make_shared<::gloo::mpi::Context>(MPI_COMM_WORLD);
auto context = ::gloo::mpi::Context::createManaged();
if (timeout_ms_ != -1) {
context->setTimeout(std::chrono::milliseconds(timeout_ms_));
}

View File

@ -1,7 +1,9 @@
# - Try to find NCCL
# Find the nccl libraries
#
# The following variables are optionally searched for defaults
# NCCL_ROOT_DIR: Base directory where all NCCL components are found
# NCCL_ROOT_DIR: Base directory where all NCCL components are found
# NCCL_INCLUDE_DIR: Directory where NCCL header is found
# NCCL_LIB_DIR: Directory where NCCL library is found
#
# The following are set after configuration is done:
# NCCL_FOUND
@ -10,26 +12,26 @@
set(NCCL_ROOT_DIR "" CACHE PATH "Folder contains NVIDIA NCCL")
find_path(NCCL_INCLUDE_DIR
find_path(NCCL_INCLUDE_DIRS
NAMES nccl.h
HINTS
${NCCL_INCLUDE_DIR}
${NCCL_ROOT_DIR}
${NCCL_ROOT_DIR}/include)
find_library(NCCL_LIBRARY
find_library(NCCL_LIBRARIES
NAMES nccl
HINTS
${NCCL_LIB_DIR}
${NCCL_ROOT_DIR}
${NCCL_ROOT_DIR}/lib
${NCCL_ROOT_DIR}/lib/x86_64-linux-gnu
${NCCL_ROOT_DIR}/lib64)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(NCCL DEFAULT_MSG NCCL_INCLUDE_DIR NCCL_LIBRARY)
find_package_handle_standard_args(NCCL DEFAULT_MSG NCCL_INCLUDE_DIRS NCCL_LIBRARIES)
if(NCCL_FOUND)
set(NCCL_INCLUDE_DIRS ${NCCL_INCLUDE_DIR})
set(NCCL_LIBRARIES ${NCCL_LIBRARY})
message(STATUS "Found NCCL (include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES})")
mark_as_advanced(NCCL_ROOT_DIR NCCL_INCLUDE_DIRS NCCL_LIBRARIES)
endif()