[Binary-builds]Use System NCCL by default in CI/CD. (#152835)

Use System NCCl by default. The correct nccl version is already built into the Manylinux docker image.

Will followup with PR on detecting if user has NCCL installed and enabling USE_SYSTEM_NCCL by default in this case.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/152835
Approved by: https://github.com/malfet
This commit is contained in:
atalman
2025-05-30 18:51:45 +00:00
committed by PyTorch MergeBot
parent 967937872f
commit 22641f42b6
3 changed files with 7 additions and 11 deletions

View File

@ -27,6 +27,7 @@ if [ "$DESIRED_CUDA" = "cpu" ]; then
USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn
else else
echo "BASE_CUDA_VERSION is set to: $DESIRED_CUDA" echo "BASE_CUDA_VERSION is set to: $DESIRED_CUDA"
export USE_SYSTEM_NCCL=1
#USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files #USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files
USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda
fi fi

View File

@ -15,6 +15,9 @@ export INSTALL_TEST=0 # dont install test binaries into site-packages
export USE_CUPTI_SO=0 export USE_CUPTI_SO=0
export USE_CUSPARSELT=${USE_CUSPARSELT:-1} # Enable if not disabled by libtorch build export USE_CUSPARSELT=${USE_CUSPARSELT:-1} # Enable if not disabled by libtorch build
export USE_CUFILE=${USE_CUFILE:-1} export USE_CUFILE=${USE_CUFILE:-1}
export USE_SYSTEM_NCCL=1
export NCCL_INCLUDE_DIR="/usr/local/cuda/include/"
export NCCL_LIB_DIR="/usr/local/cuda/lib64/"
# Keep an array of cmake variables to add to # Keep an array of cmake variables to add to
if [[ -z "$CMAKE_ARGS" ]]; then if [[ -z "$CMAKE_ARGS" ]]; then
@ -172,12 +175,9 @@ if [[ $CUDA_VERSION == 12* ]]; then
export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN' export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN'
export FORCE_RPATH="--force-rpath" export FORCE_RPATH="--force-rpath"
export USE_STATIC_NCCL=0 export USE_STATIC_NCCL=0
export USE_SYSTEM_NCCL=1
export ATEN_STATIC_CUDA=0 export ATEN_STATIC_CUDA=0
export USE_CUDA_STATIC_LINK=0 export USE_CUDA_STATIC_LINK=0
export USE_CUPTI_SO=1 export USE_CUPTI_SO=1
export NCCL_INCLUDE_DIR="/usr/local/cuda/include/"
export NCCL_LIB_DIR="/usr/local/cuda/lib64/"
fi fi
elif [[ $CUDA_VERSION == "11.8" ]]; then elif [[ $CUDA_VERSION == "11.8" ]]; then
export USE_STATIC_CUDNN=0 export USE_STATIC_CUDNN=0
@ -254,12 +254,9 @@ elif [[ $CUDA_VERSION == "11.8" ]]; then
export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN' export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN'
export FORCE_RPATH="--force-rpath" export FORCE_RPATH="--force-rpath"
export USE_STATIC_NCCL=0 export USE_STATIC_NCCL=0
export USE_SYSTEM_NCCL=1
export ATEN_STATIC_CUDA=0 export ATEN_STATIC_CUDA=0
export USE_CUDA_STATIC_LINK=0 export USE_CUDA_STATIC_LINK=0
export USE_CUPTI_SO=1 export USE_CUPTI_SO=1
export NCCL_INCLUDE_DIR="/usr/local/cuda/include/"
export NCCL_LIB_DIR="/usr/local/cuda/lib64/"
fi fi
else else
echo "Unknown cuda version $CUDA_VERSION" echo "Unknown cuda version $CUDA_VERSION"

View File

@ -57,7 +57,8 @@ if(NCCL_FOUND) # obtaining NCCL version and some sanity checks
include(CheckCXXSymbolExists) include(CheckCXXSymbolExists)
check_cxx_symbol_exists(NCCL_VERSION_CODE nccl.h NCCL_VERSION_DEFINED) check_cxx_symbol_exists(NCCL_VERSION_CODE nccl.h NCCL_VERSION_DEFINED)
if (NCCL_VERSION_DEFINED) # this condition check only works for non static NCCL linking
if (NCCL_VERSION_DEFINED AND NOT USE_STATIC_NCCL)
set(file "${PROJECT_BINARY_DIR}/detect_nccl_version.cc") set(file "${PROJECT_BINARY_DIR}/detect_nccl_version.cc")
file(WRITE ${file} " file(WRITE ${file} "
#include <iostream> #include <iostream>
@ -65,7 +66,6 @@ if(NCCL_FOUND) # obtaining NCCL version and some sanity checks
int main() int main()
{ {
std::cout << NCCL_MAJOR << '.' << NCCL_MINOR << '.' << NCCL_PATCH << std::endl; std::cout << NCCL_MAJOR << '.' << NCCL_MINOR << '.' << NCCL_PATCH << std::endl;
int x; int x;
ncclGetVersion(&x); ncclGetVersion(&x);
return x == NCCL_VERSION_CODE; return x == NCCL_VERSION_CODE;
@ -80,11 +80,9 @@ if(NCCL_FOUND) # obtaining NCCL version and some sanity checks
(include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES}) Please set NCCL_INCLUDE_DIR and NCCL_LIB_DIR manually.") (include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES}) Please set NCCL_INCLUDE_DIR and NCCL_LIB_DIR manually.")
endif() endif()
message(STATUS "NCCL version: ${NCCL_VERSION_FROM_HEADER}") message(STATUS "NCCL version: ${NCCL_VERSION_FROM_HEADER}")
else()
message(STATUS "NCCL version < 2.3.5-5")
endif () endif ()
set (CMAKE_REQUIRED_INCLUDES ${OLD_CMAKE_REQUIRED_INCLUDES})
set (CMAKE_REQUIRED_INCLUDES ${OLD_CMAKE_REQUIRED_INCLUDES})
message(STATUS "Found NCCL (include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES})") message(STATUS "Found NCCL (include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES})")
mark_as_advanced(NCCL_ROOT_DIR NCCL_INCLUDE_DIRS NCCL_LIBRARIES) mark_as_advanced(NCCL_ROOT_DIR NCCL_INCLUDE_DIRS NCCL_LIBRARIES)
endif() endif()