mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
gloo: support ibverbs in cmake (#153425)
This updates the gloo submodule in PyTorch to a version that supports the new ibverbs backend that can be used with PyTorch. Test plan: ``` sudo dnf install rdma-core-devel USE_GLOO_IBVERBS=ON python setup.py develop torchrun --nproc_per_node 2 ~/scripts/gloo_ibverbs_test.py ``` ```py """ run with: torchrun --nproc_per_node 2 ~/scripts/gloo_ibverbs_test.py """ import os os.environ["GLOO_DEVICE_TRANSPORT"] = "IBVERBS" import torch import torch.distributed as dist dist.init_process_group("gloo") rank = dist.get_rank() if rank == 0: device = "cpu" else: device = "cuda" print(device) t = torch.full((10, 100), fill_value=(rank+1), device=device) target = torch.full((10, 100), fill_value=3, device=device) dist.all_reduce(t) torch.testing.assert_close(t, target) t = torch.full((10, 100), fill_value=(rank+1), device=device) if rank == 0: dist.send(t, dst=1) else: dist.recv(t, src=0) torch.testing.assert_close(t, torch.full_like(t, 1)) ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/153425 Approved by: https://github.com/fduwjj
This commit is contained in:
committed by
PyTorch MergeBot
parent
dde705864a
commit
9c3cef437c
@ -331,6 +331,9 @@ cmake_dependent_option(
|
||||
cmake_dependent_option(
|
||||
USE_GLOO_WITH_OPENSSL "Use Gloo with OpenSSL. Only available if USE_GLOO is on." OFF
|
||||
"USE_GLOO AND LINUX AND NOT INTERN_BUILD_MOBILE" OFF)
|
||||
cmake_dependent_option(
|
||||
USE_GLOO_IBVERBS "Use Gloo with ibverbs backend. Only available if USE_GLOO is on." OFF
|
||||
"USE_GLOO AND LINUX AND NOT INTERN_BUILD_MOBILE" OFF)
|
||||
cmake_dependent_option(
|
||||
USE_C10D_GLOO "USE C10D GLOO" ON "USE_DISTRIBUTED;USE_GLOO" OFF)
|
||||
cmake_dependent_option(
|
||||
|
@ -1217,6 +1217,10 @@ if(USE_GLOO)
|
||||
set(GLOO_INSTALL OFF CACHE BOOL "" FORCE)
|
||||
set(GLOO_STATIC_OR_SHARED STATIC CACHE STRING "" FORCE)
|
||||
|
||||
if(USE_GLOO_IBVERBS)
|
||||
set(USE_IBVERBS ON)
|
||||
endif()
|
||||
|
||||
# Temporarily override variables to avoid building Gloo tests/benchmarks
|
||||
set(__BUILD_TEST ${BUILD_TEST})
|
||||
set(__BUILD_BENCHMARK ${BUILD_BENCHMARK})
|
||||
|
@ -188,6 +188,7 @@ function(caffe2_print_configuration_summary)
|
||||
message(STATUS " USE_MPI : ${USE_MPI}")
|
||||
message(STATUS " USE_GLOO : ${USE_GLOO}")
|
||||
message(STATUS " USE_GLOO_WITH_OPENSSL : ${USE_GLOO_WITH_OPENSSL}")
|
||||
message(STATUS " USE_GLOO_IBVERBS : ${USE_GLOO_IBVERBS}")
|
||||
message(STATUS " USE_TENSORPIPE : ${USE_TENSORPIPE}")
|
||||
endif()
|
||||
if(NOT "${SELECTED_OP_LIST}" STREQUAL "")
|
||||
|
2
third_party/gloo
vendored
2
third_party/gloo
vendored
Submodule third_party/gloo updated: c610704276...09e5711ef5
@ -151,7 +151,10 @@ static std::shared_ptr<::gloo::transport::Device> makeIBVerbsDevice(
|
||||
const std::string& interface,
|
||||
const std::string& hostname,
|
||||
bool lazyInit) {
|
||||
TORCH_CHECK(hostname.empty(), "ibverbs transport does not support hostname");
|
||||
if (!hostname.empty()) {
|
||||
TORCH_WARN(
|
||||
"ibverbs transport does not support hostname, defaulting to any");
|
||||
}
|
||||
|
||||
TORCH_CHECK(!lazyInit, "transport does not support lazy init");
|
||||
|
||||
|
Reference in New Issue
Block a user