mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Try to land https://github.com/pytorch/pytorch/pull/136789/files on our end and fix any remaining issues. Pull Request resolved: https://github.com/pytorch/pytorch/pull/144794 Approved by: https://github.com/kwen2501, https://github.com/eqy, https://github.com/atalman
This commit is contained in:
@ -26,6 +26,10 @@ constexpr int64_t kCommInitBusyWaitMillis = 2;
|
||||
#define NCCL_HAS_COMM_SPLIT
|
||||
#endif
|
||||
|
||||
#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 23, 0)
|
||||
#define NCCL_HAS_INIT_RANK_SCALABLE
|
||||
#endif
|
||||
|
||||
// ncclGetLastError() is enabled only for NCCL versions 2.13+
|
||||
// ncclRemoteError only exists in NCCL versions 2.13+
|
||||
#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 13, 0)
|
||||
@ -212,6 +216,13 @@ class NCCLComm {
|
||||
ncclUniqueId commId,
|
||||
at::DeviceIndex deviceIndex,
|
||||
ncclConfig_t& config);
|
||||
#ifdef NCCL_HAS_INIT_RANK_SCALABLE
|
||||
static std::shared_ptr<NCCLComm> create_scalable(
|
||||
int numRanks,
|
||||
int rank,
|
||||
std::vector<ncclUniqueId>& commIds,
|
||||
ncclConfig_t& config);
|
||||
#endif // NCCL_HAS_INIT_RANK_SCALABLE
|
||||
#endif // NCCL_HAS_CONFIG
|
||||
|
||||
#ifdef NCCL_HAS_COMM_SPLIT
|
||||
|
Reference in New Issue
Block a user