[ProcessGroupNCCL] Remove jumper to UCC (#114170)

The "jumper" to UCC lib in ProcessGroupNCCL was a temporary solution a while back. Cleaning it now that UCC has its own "PG" representation.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/114170
Approved by: https://github.com/wconstab, https://github.com/fduwjj, https://github.com/XilunWu, https://github.com/Aidyn-A
This commit is contained in:
Ke Wen
2023-11-22 15:35:03 +00:00
committed by PyTorch MergeBot
parent d7f698102e
commit f2ca07b680
7 changed files with 1 additions and 81 deletions

View File

@ -328,9 +328,6 @@ cmake_dependent_option(
USE_C10D_GLOO "USE C10D GLOO" ON "USE_DISTRIBUTED;USE_GLOO" OFF)
cmake_dependent_option(
USE_C10D_NCCL "USE C10D NCCL" ON "USE_DISTRIBUTED;USE_NCCL" OFF)
cmake_dependent_option(
USE_NCCL_WITH_UCC "Enable UCC support for ProcessGroupNCCL. Only available if USE_C10D_NCCL is on." OFF
"USE_C10D_NCCL" OFF)
cmake_dependent_option(
USE_C10D_MPI "USE C10D MPI" ON "USE_DISTRIBUTED;USE_MPI" OFF)
cmake_dependent_option(

View File

@ -1330,9 +1330,6 @@ if(USE_DISTRIBUTED)
target_compile_definitions(torch_hip PUBLIC USE_C10D_NCCL)
else()
target_compile_definitions(torch_cuda PUBLIC USE_C10D_NCCL)
if(USE_NCCL_WITH_UCC)
target_compile_definitions(torch_cuda PUBLIC USE_NCCL_WITH_UCC)
endif()
endif()
endif()
if(USE_MPI AND USE_C10D_MPI)

View File

@ -154,7 +154,6 @@ function(caffe2_print_configuration_summary)
message(STATUS " USE_NCCL : ${USE_NCCL}")
if(${USE_NCCL})
message(STATUS " USE_SYSTEM_NCCL : ${USE_SYSTEM_NCCL}")
message(STATUS " USE_NCCL_WITH_UCC : ${USE_NCCL_WITH_UCC}")
endif()
message(STATUS " USE_NNPACK : ${USE_NNPACK}")
message(STATUS " USE_NUMPY : ${USE_NUMPY}")

View File

@ -1,6 +1,5 @@
#include <torch/csrc/distributed/c10d/NCCLUtils.hpp>
#include <torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp>
#include <torch/csrc/distributed/c10d/UCCForNCCL.hpp>
#include <fstream>
#include <mutex>
#include <sstream>
@ -1064,28 +1063,6 @@ ProcessGroupNCCL::ProcessGroupNCCL(
&cacheAllocatorDeregisterHook);
allocatorHooksAttached = true;
}
#ifdef USE_NCCL_WITH_UCC
static c10::once_flag initialize_ucc_lib_flag;
c10::call_once(initialize_ucc_lib_flag, [&] {
uccLib_ = loadTorchUCC();
if (uccLib_ != nullptr) {
LOG(INFO) << "[Rank " << rank_ << "] torch_ucc.so loaded";
}
});
if (uccLib_ != nullptr) {
LOG(INFO) << "[Rank " << rank_ << "] torch_ucc.so loaded";
typedef c10::intrusive_ptr<Backend> fn(
const c10::intrusive_ptr<Store>& store, int rank, int size);
auto createProcessGroupUCC =
reinterpret_cast<fn*>(uccLib_->sym("createProcessGroupUCC"));
if (createProcessGroupUCC != nullptr) {
uccPG_ = createProcessGroupUCC(store, rank_, size_);
LOG(INFO) << "[Rank " << rank_ << "] ProcessGroupUCC created.";
}
}
#endif
}
void ProcessGroupNCCL::runHealthCheck() {
@ -4134,18 +4111,6 @@ c10::intrusive_ptr<Work> ProcessGroupNCCL::_allgather_base(
avoidRecordStreams);
}
#ifdef USE_NCCL_WITH_UCC
std::shared_ptr<at::DynamicLibrary> ProcessGroupNCCL::uccLib_ = nullptr;
#endif
bool ProcessGroupNCCL::isUCCAvailable() const {
#ifdef USE_NCCL_WITH_UCC
return (uccPG_ != nullptr);
#else
return false;
#endif
}
} // namespace c10d
#endif // USE_C10D_NCCL

View File

@ -12,7 +12,6 @@
#include <torch/csrc/distributed/c10d/Backend.hpp>
#include <torch/csrc/distributed/c10d/NCCLUtils.hpp>
#include <torch/csrc/distributed/c10d/Store.hpp>
#include <torch/csrc/distributed/c10d/UCCForNCCL.hpp>
#include <ATen/DynamicLibrary.h>
#include <ATen/cuda/CUDAContext.h>
@ -530,9 +529,6 @@ class TORCH_API ProcessGroupNCCL : public Backend {
// Provide an API for users to define their own ways to store NCCL debug info.
void registerDebugInfoWriter(std::unique_ptr<DebugInfoWriter> writer);
// Tests if the UCC fallback path is available
bool isUCCAvailable() const;
// Provides an API to abort the ProcessGroup (similar to ncclCommAbort)
// instead of relying on ProcessGroupNCCL destructor.
void abort(c10::optional<std::string> abortReason = c10::nullopt);
@ -899,11 +895,6 @@ class TORCH_API ProcessGroupNCCL : public Backend {
// The callback function to store NCCL debug info.
std::unique_ptr<DebugInfoWriter> debugInfoWriter_ = nullptr;
#ifdef USE_NCCL_WITH_UCC
// ProcessGroupUCC shared library handle and ProcessGroup pointer
static std::shared_ptr<at::DynamicLibrary> uccLib_;
c10::intrusive_ptr<Backend> uccPG_;
#endif
size_t uid_;
};

View File

@ -1,27 +0,0 @@
#pragma once
#include <cassert>
#include <memory>
#include <string>
#include <vector>
#include <ATen/DynamicLibrary.h>
namespace c10d {
inline std::shared_ptr<at::DynamicLibrary> loadTorchUCC() {
const char* path = std::getenv("TORCH_UCC_LIBRARY_PATH");
if (path != nullptr) {
try {
return std::make_shared<at::DynamicLibrary>(path);
} catch (const c10::DynamicLibraryError& e) {
TORCH_WARN(
"TORCH_UCC_LIBRARY_PATH is set, "
"but the loading of torch_ucc.so failed with:",
e.msg());
}
}
return nullptr;
}
} // namespace c10d

View File

@ -2294,9 +2294,7 @@ options :class:`~torch.distributed.ProcessGroupNCCL.Options`).
"comm_split_count",
&::c10d::ProcessGroupNCCL::getCommSplitCounter)
.def_property_readonly(
"options", &::c10d::ProcessGroupNCCL::getOptions)
.def_property_readonly(
"is_ucc_available", &::c10d::ProcessGroupNCCL::isUCCAvailable);
"options", &::c10d::ProcessGroupNCCL::getOptions);
#ifdef NCCL_HAS_COMM_CTA_CGA
py::class_<ncclConfig_t>(