mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Bump gloo
Summary: Latest version of Gloo takes care of MPI_Init/MPI_Finalize for us, so this commit removes handling that from caffe2/contrib/gloo. It also imports CMake NCCL module changes from Gloo to stay consistent and allow setting NCCL_INCLUDE_DIR and NCCL_LIB_DIR separately. Closes https://github.com/caffe2/caffe2/pull/1295 Reviewed By: dzhulgakov Differential Revision: D5979364 Pulled By: pietern fbshipit-source-id: 794b00b0a445317c30a13cc8f0f4dc38e590cc77
This commit is contained in:
committed by
Facebook Github Bot
parent
225de6628c
commit
db06e91097
@ -19,14 +19,6 @@
|
||||
#include "caffe2/core/logging.h"
|
||||
#include "caffe2/core/tensor.h"
|
||||
|
||||
#if defined(GLOO_USE_MPI) && GLOO_USE_MPI
|
||||
#include <mutex>
|
||||
#endif
|
||||
|
||||
#if defined(GLOO_USE_MPI) && GLOO_USE_MPI
|
||||
#include <mpi.h>
|
||||
#endif
|
||||
|
||||
#include <gloo/transport/tcp/device.h>
|
||||
#if defined(GLOO_USE_IBVERBS) && GLOO_USE_IBVERBS
|
||||
#include <gloo/transport/ibverbs/device.h>
|
||||
@ -68,26 +60,5 @@ std::shared_ptr<::gloo::transport::Device> createDevice(
|
||||
CAFFE_THROW("Invalid transport: ", attr.transport);
|
||||
}
|
||||
|
||||
#if defined(GLOO_USE_MPI) && GLOO_USE_MPI
|
||||
static std::mutex mpiMutex;
|
||||
static int mpiRefs = 0;
|
||||
|
||||
void mpiInitialize() {
|
||||
std::lock_guard<std::mutex> lock(mpiMutex);
|
||||
if (mpiRefs++ == 0) {
|
||||
auto rv = MPI_Init(nullptr, nullptr);
|
||||
CAFFE_ENFORCE_EQ(rv, 0, "MPI_Init() failed");
|
||||
}
|
||||
}
|
||||
|
||||
void mpiFinalize() {
|
||||
std::lock_guard<std::mutex> lock(mpiMutex);
|
||||
if (--mpiRefs == 0) {
|
||||
auto rv = MPI_Finalize();
|
||||
CAFFE_ENFORCE_EQ(rv, 0, "MPI_Finalize() failed");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace gloo
|
||||
} // namespace caffe2
|
||||
|
@ -40,10 +40,5 @@ struct createDeviceAttr {
|
||||
std::shared_ptr<::gloo::transport::Device> createDevice(
|
||||
const createDeviceAttr attr);
|
||||
|
||||
#if defined(GLOO_USE_MPI) && GLOO_USE_MPI
|
||||
void mpiInitialize();
|
||||
void mpiFinalize();
|
||||
#endif
|
||||
|
||||
} // namespace gloo
|
||||
} // namespace caffe2
|
||||
|
@ -63,25 +63,14 @@ class CreateCommonWorld final : public Operator<Context> {
|
||||
ws_->CreateBlob(status_blob_);
|
||||
}
|
||||
initialize();
|
||||
|
||||
#if defined(GLOO_USE_MPI) && GLOO_USE_MPI
|
||||
if (mpi_rendezvous_) {
|
||||
mpiInitialize();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual ~CreateCommonWorld() {
|
||||
#if defined(GLOO_USE_MPI) && GLOO_USE_MPI
|
||||
if (mpi_rendezvous_) {
|
||||
mpiFinalize();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
CommonWorld rendezvousWithMPI() {
|
||||
#if defined(GLOO_USE_MPI) && GLOO_USE_MPI
|
||||
auto context = std::make_shared<::gloo::mpi::Context>(MPI_COMM_WORLD);
|
||||
auto context = ::gloo::mpi::Context::createManaged();
|
||||
if (timeout_ms_ != -1) {
|
||||
context->setTimeout(std::chrono::milliseconds(timeout_ms_));
|
||||
}
|
||||
|
@ -1,7 +1,9 @@
|
||||
# - Try to find NCCL
|
||||
# Find the nccl libraries
|
||||
#
|
||||
# The following variables are optionally searched for defaults
|
||||
# NCCL_ROOT_DIR: Base directory where all NCCL components are found
|
||||
# NCCL_ROOT_DIR: Base directory where all NCCL components are found
|
||||
# NCCL_INCLUDE_DIR: Directory where NCCL header is found
|
||||
# NCCL_LIB_DIR: Directory where NCCL library is found
|
||||
#
|
||||
# The following are set after configuration is done:
|
||||
# NCCL_FOUND
|
||||
@ -10,26 +12,26 @@
|
||||
|
||||
set(NCCL_ROOT_DIR "" CACHE PATH "Folder contains NVIDIA NCCL")
|
||||
|
||||
find_path(NCCL_INCLUDE_DIR
|
||||
find_path(NCCL_INCLUDE_DIRS
|
||||
NAMES nccl.h
|
||||
HINTS
|
||||
${NCCL_INCLUDE_DIR}
|
||||
${NCCL_ROOT_DIR}
|
||||
${NCCL_ROOT_DIR}/include)
|
||||
|
||||
find_library(NCCL_LIBRARY
|
||||
find_library(NCCL_LIBRARIES
|
||||
NAMES nccl
|
||||
HINTS
|
||||
${NCCL_LIB_DIR}
|
||||
${NCCL_ROOT_DIR}
|
||||
${NCCL_ROOT_DIR}/lib
|
||||
${NCCL_ROOT_DIR}/lib/x86_64-linux-gnu
|
||||
${NCCL_ROOT_DIR}/lib64)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(NCCL DEFAULT_MSG NCCL_INCLUDE_DIR NCCL_LIBRARY)
|
||||
find_package_handle_standard_args(NCCL DEFAULT_MSG NCCL_INCLUDE_DIRS NCCL_LIBRARIES)
|
||||
|
||||
if(NCCL_FOUND)
|
||||
set(NCCL_INCLUDE_DIRS ${NCCL_INCLUDE_DIR})
|
||||
set(NCCL_LIBRARIES ${NCCL_LIBRARY})
|
||||
message(STATUS "Found NCCL (include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES})")
|
||||
mark_as_advanced(NCCL_ROOT_DIR NCCL_INCLUDE_DIRS NCCL_LIBRARIES)
|
||||
endif()
|
||||
|
2
third_party/gloo
vendored
2
third_party/gloo
vendored
Submodule third_party/gloo updated: 3b0f3f9ce9...1f4124e71a
Reference in New Issue
Block a user