mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
Detect NVSHMEM location (#153010)
### Changes - Detect NVSHMEM install location via `sysconfig.get_path("purelib")`, which typically resolves to `<conda_env>/lib/python/site-packages`, and NVSHMEM include and lib live under `nvidia/nvshmem` - Added link dir via `target_link_directories` - Removed direct dependency on mlx5 - Added preload rule (following other other NVIDIA libs) ### Plan of Record 1. End user experience: link against NVSHMEM dynamically (NVSHMEM lib size is 100M, similar to NCCL, thus we'd like users to `pip install nvshmem` than torch carrying the bits) 2. Developer experience: at compile time, prefers wheel dependency than using Git submodule General rule: submodule for small lib that torch can statically link with If user pip install a lib, our CI build process should do the same, rather than building from Git submodule (just for its header, for example) 3. Keep `USE_NVSHMEM` to gate non-Linux platforms, like Windows, Mac 4. At configuration time, we should be able to detect whether nvshmem is available, if not, we don't build `NVSHMEMSymmetricMemory` at all. For now, we have symbol dependency on two particular libs from NVSHMEM: - libnvshmem_host.so: contains host side APIs; - libnvshmem_device.a: contains device-side global variables AND device function impls. Pull Request resolved: https://github.com/pytorch/pytorch/pull/153010 Approved by: https://github.com/ngimel, https://github.com/fduwjj, https://github.com/Skylion007
This commit is contained in:
@ -979,10 +979,21 @@ elseif(USE_CUDA)
|
||||
endif()
|
||||
|
||||
# Use env var for these for now for prototyping purposes
|
||||
set(USE_NVSHMEM $ENV{USE_NVSHMEM} CACHE BOOL "Enable NVSHMEM support")
|
||||
set(NVSHMEM_HOME $ENV{NVSHMEM_HOME} CACHE PATH "Path to NVSHMEM build dir")
|
||||
set(USE_NVSHMEM $ENV{USE_NVSHMEM} CACHE BOOL "Whether to build with NVSHMEM support")
|
||||
# If user has specified NVSHMEM_HOME, we use it;
|
||||
# Otherwise, NVSHMEM_HOME is auto detected in tools/setup_helpers/cmake.py
|
||||
if($ENV{NVSHMEM_HOME})
|
||||
set(NVSHMEM_HOME $ENV{NVSHMEM_HOME} CACHE PATH "Path to NVSHMEM build dir")
|
||||
endif()
|
||||
|
||||
if(USE_NVSHMEM AND NOT DEFINED NVSHMEM_HOME)
|
||||
message(WARNING "USE_NVSHMEM set to 1 but NVSHMEM_HOME not found. Please run `pip install nvidia-nvshmem-<version>`, or set NVSHMEM_HOME to the NVSHMEM build dir")
|
||||
# Disable nvshmem if NVSHMEM_HOME is not found
|
||||
set(USE_NVSHMEM FALSE CACHE BOOL "Whether to build with NVSHMEM support")
|
||||
endif()
|
||||
|
||||
if(USE_NVSHMEM)
|
||||
message("Building with NVSHMEM support: '${NVSHMEM_HOME}'")
|
||||
set(NVSHMEM_INCLUDE_DIR "${NVSHMEM_HOME}/include")
|
||||
set(NVSHMEM_LIB_DIR "${NVSHMEM_HOME}/lib")
|
||||
|
||||
@ -1000,18 +1011,17 @@ elseif(USE_CUDA)
|
||||
set_target_properties(nvshmem_extension PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
target_compile_options(nvshmem_extension PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-rdc=true>)
|
||||
target_compile_options(nvshmem_extension PRIVATE "-U__CUDA_NO_HALF_OPERATORS__")
|
||||
target_link_directories(nvshmem_extension PRIVATE ${NVSHMEM_LIB_DIR})
|
||||
target_link_libraries(nvshmem_extension PRIVATE
|
||||
${NVSHMEM_LIB_DIR}/libnvshmem.a
|
||||
${NVSHMEM_LIB_DIR}/nvshmem_bootstrap_uid.so
|
||||
# Full path needed bc nvshmem wheel ships with .so.3 instead of .so;
|
||||
# otherwise, we could just write `nvshmem_host`
|
||||
${NVSHMEM_LIB_DIR}/libnvshmem_host.so.3
|
||||
nvshmem_device
|
||||
)
|
||||
target_link_libraries(nvshmem_extension PRIVATE mlx5)
|
||||
target_link_libraries(torch_cuda PRIVATE nvshmem_extension)
|
||||
install(TARGETS nvshmem_extension EXPORT Caffe2Targets DESTINATION lib)
|
||||
install(
|
||||
FILES ${NVSHMEM_LIB_DIR}/nvshmem_bootstrap_uid.so
|
||||
DESTINATION lib
|
||||
)
|
||||
endif()
|
||||
|
||||
if(USE_UCC)
|
||||
target_link_libraries(torch_cuda PRIVATE __caffe2_ucc)
|
||||
target_compile_definitions(torch_cuda PRIVATE USE_UCC)
|
||||
|
@ -288,6 +288,12 @@ class CMake:
|
||||
}
|
||||
)
|
||||
|
||||
# Detect build dependencies from python lib path (in order to set *_HOME variables)
|
||||
# NVSHMEM
|
||||
nvshmem_home = py_lib_path + "/nvidia/nvshmem"
|
||||
if os.path.exists(nvshmem_home):
|
||||
build_options["NVSHMEM_HOME"] = nvshmem_home
|
||||
|
||||
# Options starting with CMAKE_
|
||||
cmake__options = {
|
||||
"CMAKE_INSTALL_PREFIX": install_dir,
|
||||
|
@ -357,6 +357,7 @@ def _load_global_deps() -> None:
|
||||
"cusolver": "libcusolver.so.*[0-9]",
|
||||
"nccl": "libnccl.so.*[0-9]",
|
||||
"nvtx": "libnvToolsExt.so.*[0-9]",
|
||||
"nvshmem": "libnvshmem_host.so.*[0-9]",
|
||||
}
|
||||
# cufiile is only available on cuda 12+
|
||||
# TODO: Remove once CUDA 11.8 binaries are deprecated
|
||||
|
Reference in New Issue
Block a user