mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[ProcessGroupNCCL] Remove jumper to UCC (#114170)
The "jumper" to UCC lib in ProcessGroupNCCL was a temporary solution a while back. Cleaning it now that UCC has its own "PG" representation. Pull Request resolved: https://github.com/pytorch/pytorch/pull/114170 Approved by: https://github.com/wconstab, https://github.com/fduwjj, https://github.com/XilunWu, https://github.com/Aidyn-A
This commit is contained in:
@ -328,9 +328,6 @@ cmake_dependent_option(
|
||||
USE_C10D_GLOO "USE C10D GLOO" ON "USE_DISTRIBUTED;USE_GLOO" OFF)
|
||||
cmake_dependent_option(
|
||||
USE_C10D_NCCL "USE C10D NCCL" ON "USE_DISTRIBUTED;USE_NCCL" OFF)
|
||||
cmake_dependent_option(
|
||||
USE_NCCL_WITH_UCC "Enable UCC support for ProcessGroupNCCL. Only available if USE_C10D_NCCL is on." OFF
|
||||
"USE_C10D_NCCL" OFF)
|
||||
cmake_dependent_option(
|
||||
USE_C10D_MPI "USE C10D MPI" ON "USE_DISTRIBUTED;USE_MPI" OFF)
|
||||
cmake_dependent_option(
|
||||
|
@ -1330,9 +1330,6 @@ if(USE_DISTRIBUTED)
|
||||
target_compile_definitions(torch_hip PUBLIC USE_C10D_NCCL)
|
||||
else()
|
||||
target_compile_definitions(torch_cuda PUBLIC USE_C10D_NCCL)
|
||||
if(USE_NCCL_WITH_UCC)
|
||||
target_compile_definitions(torch_cuda PUBLIC USE_NCCL_WITH_UCC)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
if(USE_MPI AND USE_C10D_MPI)
|
||||
|
@ -154,7 +154,6 @@ function(caffe2_print_configuration_summary)
|
||||
message(STATUS " USE_NCCL : ${USE_NCCL}")
|
||||
if(${USE_NCCL})
|
||||
message(STATUS " USE_SYSTEM_NCCL : ${USE_SYSTEM_NCCL}")
|
||||
message(STATUS " USE_NCCL_WITH_UCC : ${USE_NCCL_WITH_UCC}")
|
||||
endif()
|
||||
message(STATUS " USE_NNPACK : ${USE_NNPACK}")
|
||||
message(STATUS " USE_NUMPY : ${USE_NUMPY}")
|
||||
|
@ -1,6 +1,5 @@
|
||||
#include <torch/csrc/distributed/c10d/NCCLUtils.hpp>
|
||||
#include <torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp>
|
||||
#include <torch/csrc/distributed/c10d/UCCForNCCL.hpp>
|
||||
#include <fstream>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
@ -1064,28 +1063,6 @@ ProcessGroupNCCL::ProcessGroupNCCL(
|
||||
&cacheAllocatorDeregisterHook);
|
||||
allocatorHooksAttached = true;
|
||||
}
|
||||
|
||||
#ifdef USE_NCCL_WITH_UCC
|
||||
static c10::once_flag initialize_ucc_lib_flag;
|
||||
c10::call_once(initialize_ucc_lib_flag, [&] {
|
||||
uccLib_ = loadTorchUCC();
|
||||
if (uccLib_ != nullptr) {
|
||||
LOG(INFO) << "[Rank " << rank_ << "] torch_ucc.so loaded";
|
||||
}
|
||||
});
|
||||
|
||||
if (uccLib_ != nullptr) {
|
||||
LOG(INFO) << "[Rank " << rank_ << "] torch_ucc.so loaded";
|
||||
typedef c10::intrusive_ptr<Backend> fn(
|
||||
const c10::intrusive_ptr<Store>& store, int rank, int size);
|
||||
auto createProcessGroupUCC =
|
||||
reinterpret_cast<fn*>(uccLib_->sym("createProcessGroupUCC"));
|
||||
if (createProcessGroupUCC != nullptr) {
|
||||
uccPG_ = createProcessGroupUCC(store, rank_, size_);
|
||||
LOG(INFO) << "[Rank " << rank_ << "] ProcessGroupUCC created.";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void ProcessGroupNCCL::runHealthCheck() {
|
||||
@ -4134,18 +4111,6 @@ c10::intrusive_ptr<Work> ProcessGroupNCCL::_allgather_base(
|
||||
avoidRecordStreams);
|
||||
}
|
||||
|
||||
#ifdef USE_NCCL_WITH_UCC
|
||||
std::shared_ptr<at::DynamicLibrary> ProcessGroupNCCL::uccLib_ = nullptr;
|
||||
#endif
|
||||
|
||||
bool ProcessGroupNCCL::isUCCAvailable() const {
|
||||
#ifdef USE_NCCL_WITH_UCC
|
||||
return (uccPG_ != nullptr);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace c10d
|
||||
|
||||
#endif // USE_C10D_NCCL
|
||||
|
@ -12,7 +12,6 @@
|
||||
#include <torch/csrc/distributed/c10d/Backend.hpp>
|
||||
#include <torch/csrc/distributed/c10d/NCCLUtils.hpp>
|
||||
#include <torch/csrc/distributed/c10d/Store.hpp>
|
||||
#include <torch/csrc/distributed/c10d/UCCForNCCL.hpp>
|
||||
|
||||
#include <ATen/DynamicLibrary.h>
|
||||
#include <ATen/cuda/CUDAContext.h>
|
||||
@ -530,9 +529,6 @@ class TORCH_API ProcessGroupNCCL : public Backend {
|
||||
// Provide an API for users to define their own ways to store NCCL debug info.
|
||||
void registerDebugInfoWriter(std::unique_ptr<DebugInfoWriter> writer);
|
||||
|
||||
// Tests if the UCC fallback path is available
|
||||
bool isUCCAvailable() const;
|
||||
|
||||
// Provides an API to abort the ProcessGroup (similar to ncclCommAbort)
|
||||
// instead of relying on ProcessGroupNCCL destructor.
|
||||
void abort(c10::optional<std::string> abortReason = c10::nullopt);
|
||||
@ -899,11 +895,6 @@ class TORCH_API ProcessGroupNCCL : public Backend {
|
||||
// The callback function to store NCCL debug info.
|
||||
std::unique_ptr<DebugInfoWriter> debugInfoWriter_ = nullptr;
|
||||
|
||||
#ifdef USE_NCCL_WITH_UCC
|
||||
// ProcessGroupUCC shared library handle and ProcessGroup pointer
|
||||
static std::shared_ptr<at::DynamicLibrary> uccLib_;
|
||||
c10::intrusive_ptr<Backend> uccPG_;
|
||||
#endif
|
||||
size_t uid_;
|
||||
};
|
||||
|
||||
|
@ -1,27 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <ATen/DynamicLibrary.h>
|
||||
|
||||
namespace c10d {
|
||||
|
||||
inline std::shared_ptr<at::DynamicLibrary> loadTorchUCC() {
|
||||
const char* path = std::getenv("TORCH_UCC_LIBRARY_PATH");
|
||||
if (path != nullptr) {
|
||||
try {
|
||||
return std::make_shared<at::DynamicLibrary>(path);
|
||||
} catch (const c10::DynamicLibraryError& e) {
|
||||
TORCH_WARN(
|
||||
"TORCH_UCC_LIBRARY_PATH is set, "
|
||||
"but the loading of torch_ucc.so failed with:",
|
||||
e.msg());
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace c10d
|
@ -2294,9 +2294,7 @@ options :class:`~torch.distributed.ProcessGroupNCCL.Options`).
|
||||
"comm_split_count",
|
||||
&::c10d::ProcessGroupNCCL::getCommSplitCounter)
|
||||
.def_property_readonly(
|
||||
"options", &::c10d::ProcessGroupNCCL::getOptions)
|
||||
.def_property_readonly(
|
||||
"is_ucc_available", &::c10d::ProcessGroupNCCL::isUCCAvailable);
|
||||
"options", &::c10d::ProcessGroupNCCL::getOptions);
|
||||
|
||||
#ifdef NCCL_HAS_COMM_CTA_CGA
|
||||
py::class_<ncclConfig_t>(
|
||||
|
Reference in New Issue
Block a user