mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[Binary-builds]Use System NCCL by default in CI/CD. (#152835)
Use System NCCl by default. The correct nccl version is already built into the Manylinux docker image. Will followup with PR on detecting if user has NCCL installed and enabling USE_SYSTEM_NCCL by default in this case. Pull Request resolved: https://github.com/pytorch/pytorch/pull/152835 Approved by: https://github.com/malfet
This commit is contained in:
committed by
PyTorch MergeBot
parent
967937872f
commit
22641f42b6
@ -27,6 +27,7 @@ if [ "$DESIRED_CUDA" = "cpu" ]; then
|
||||
USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn
|
||||
else
|
||||
echo "BASE_CUDA_VERSION is set to: $DESIRED_CUDA"
|
||||
export USE_SYSTEM_NCCL=1
|
||||
#USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files
|
||||
USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda
|
||||
fi
|
||||
|
@ -15,6 +15,9 @@ export INSTALL_TEST=0 # dont install test binaries into site-packages
|
||||
export USE_CUPTI_SO=0
|
||||
export USE_CUSPARSELT=${USE_CUSPARSELT:-1} # Enable if not disabled by libtorch build
|
||||
export USE_CUFILE=${USE_CUFILE:-1}
|
||||
export USE_SYSTEM_NCCL=1
|
||||
export NCCL_INCLUDE_DIR="/usr/local/cuda/include/"
|
||||
export NCCL_LIB_DIR="/usr/local/cuda/lib64/"
|
||||
|
||||
# Keep an array of cmake variables to add to
|
||||
if [[ -z "$CMAKE_ARGS" ]]; then
|
||||
@ -172,12 +175,9 @@ if [[ $CUDA_VERSION == 12* ]]; then
|
||||
export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN'
|
||||
export FORCE_RPATH="--force-rpath"
|
||||
export USE_STATIC_NCCL=0
|
||||
export USE_SYSTEM_NCCL=1
|
||||
export ATEN_STATIC_CUDA=0
|
||||
export USE_CUDA_STATIC_LINK=0
|
||||
export USE_CUPTI_SO=1
|
||||
export NCCL_INCLUDE_DIR="/usr/local/cuda/include/"
|
||||
export NCCL_LIB_DIR="/usr/local/cuda/lib64/"
|
||||
fi
|
||||
elif [[ $CUDA_VERSION == "11.8" ]]; then
|
||||
export USE_STATIC_CUDNN=0
|
||||
@ -254,12 +254,9 @@ elif [[ $CUDA_VERSION == "11.8" ]]; then
|
||||
export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN'
|
||||
export FORCE_RPATH="--force-rpath"
|
||||
export USE_STATIC_NCCL=0
|
||||
export USE_SYSTEM_NCCL=1
|
||||
export ATEN_STATIC_CUDA=0
|
||||
export USE_CUDA_STATIC_LINK=0
|
||||
export USE_CUPTI_SO=1
|
||||
export NCCL_INCLUDE_DIR="/usr/local/cuda/include/"
|
||||
export NCCL_LIB_DIR="/usr/local/cuda/lib64/"
|
||||
fi
|
||||
else
|
||||
echo "Unknown cuda version $CUDA_VERSION"
|
||||
|
@ -57,7 +57,8 @@ if(NCCL_FOUND) # obtaining NCCL version and some sanity checks
|
||||
include(CheckCXXSymbolExists)
|
||||
check_cxx_symbol_exists(NCCL_VERSION_CODE nccl.h NCCL_VERSION_DEFINED)
|
||||
|
||||
if (NCCL_VERSION_DEFINED)
|
||||
# this condition check only works for non static NCCL linking
|
||||
if (NCCL_VERSION_DEFINED AND NOT USE_STATIC_NCCL)
|
||||
set(file "${PROJECT_BINARY_DIR}/detect_nccl_version.cc")
|
||||
file(WRITE ${file} "
|
||||
#include <iostream>
|
||||
@ -65,7 +66,6 @@ if(NCCL_FOUND) # obtaining NCCL version and some sanity checks
|
||||
int main()
|
||||
{
|
||||
std::cout << NCCL_MAJOR << '.' << NCCL_MINOR << '.' << NCCL_PATCH << std::endl;
|
||||
|
||||
int x;
|
||||
ncclGetVersion(&x);
|
||||
return x == NCCL_VERSION_CODE;
|
||||
@ -80,11 +80,9 @@ if(NCCL_FOUND) # obtaining NCCL version and some sanity checks
|
||||
(include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES}) Please set NCCL_INCLUDE_DIR and NCCL_LIB_DIR manually.")
|
||||
endif()
|
||||
message(STATUS "NCCL version: ${NCCL_VERSION_FROM_HEADER}")
|
||||
else()
|
||||
message(STATUS "NCCL version < 2.3.5-5")
|
||||
endif ()
|
||||
set (CMAKE_REQUIRED_INCLUDES ${OLD_CMAKE_REQUIRED_INCLUDES})
|
||||
|
||||
set (CMAKE_REQUIRED_INCLUDES ${OLD_CMAKE_REQUIRED_INCLUDES})
|
||||
message(STATUS "Found NCCL (include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES})")
|
||||
mark_as_advanced(NCCL_ROOT_DIR NCCL_INCLUDE_DIRS NCCL_LIBRARIES)
|
||||
endif()
|
||||
|
Reference in New Issue
Block a user