mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
NCCL: Re-enable parallel builds (#83696)
Since #83173 was merged I have noticed some CI being slowed down by the nccl building step. e.g. if there are no C++ changes then sccache compiles everything else very quickly and nccl becomes the limiting factor. This re-enables parallel builds with some safeguards to protect against oversubscription. When `make` is the parent build system, we can use `$(MAKE)` and the `make` jobserver will coordinate job allocation with the sub-process. For other build systems, this calls `make` with the `-l` flag which should prevent it launching jobs when the system load average is already too high. Pull Request resolved: https://github.com/pytorch/pytorch/pull/83696 Approved by: https://github.com/malfet
This commit is contained in:
committed by
PyTorch MergeBot
parent
d5af2a70ba
commit
2000eba454
22
cmake/External/nccl.cmake
vendored
22
cmake/External/nccl.cmake
vendored
@ -15,21 +15,37 @@ if(NOT __NCCL_INCLUDED)
|
||||
# this second replacement is needed when there are multiple archs
|
||||
string(REPLACE ";-gencode" " -gencode" NVCC_GENCODE "${NVCC_GENCODE}")
|
||||
|
||||
if("${CMAKE_GENERATOR}" MATCHES "Make")
|
||||
# Recursive make with jobserver for parallelism
|
||||
set(MAKE_COMMAND "$(MAKE)")
|
||||
else()
|
||||
if(DEFINED ENV{MAX_JOBS})
|
||||
set(MAX_JOBS "$ENV{MAX_JOBS}")
|
||||
else()
|
||||
include(ProcessorCount)
|
||||
ProcessorCount(NUM_HARDWARE_THREADS)
|
||||
# Assume 2 hardware threads per cpu core
|
||||
math(EXPR MAX_JOBS "${NUM_HARDWARE_THREADS} / 2")
|
||||
endif()
|
||||
|
||||
# Parallel build with CPU load limit to avoid oversubscription
|
||||
set(MAKE_COMMAND "make" "-j${MAX_JOBS}" "-l${MAX_JOBS}")
|
||||
endif()
|
||||
|
||||
set(__NCCL_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/nccl")
|
||||
ExternalProject_Add(nccl_external
|
||||
SOURCE_DIR ${PROJECT_SOURCE_DIR}/third_party/nccl/nccl
|
||||
BUILD_IN_SOURCE 1
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND
|
||||
env
|
||||
make
|
||||
${MAKE_COMMAND}
|
||||
"CXX=${CMAKE_CXX_COMPILER}"
|
||||
"CUDA_HOME=${CUDA_TOOLKIT_ROOT_DIR}"
|
||||
"NVCC=${CUDA_NVCC_EXECUTABLE}"
|
||||
"NVCC_GENCODE=${NVCC_GENCODE}"
|
||||
"BUILDDIR=${__NCCL_BUILD_DIR}"
|
||||
"VERBOSE=0"
|
||||
BUILD_BYPRODUCTS "${__NCCL_BUILD_DIR}/lib/libnccl_static.a"
|
||||
BUILD_BYPRODUCTS "${__NCCL_BUILD_DIR}/lib/libnccl_static.a"
|
||||
INSTALL_COMMAND ""
|
||||
)
|
||||
|
||||
|
Reference in New Issue
Block a user