mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[SymmMem] find_path does not search /usr/local/lib (#157695)
This PR uses `find_library` to replace `find_path`. It also searches for NVSHMEM host lib and device lib separately. Tested against system install location: /usr/local/lib and /usr/local/include. Pull Request resolved: https://github.com/pytorch/pytorch/pull/157695 Approved by: https://github.com/Skylion007 ghstack dependencies: #157513
This commit is contained in:
@ -999,20 +999,30 @@ elseif(USE_CUDA)
|
||||
# location, e.g.
|
||||
# `/path/to/conda/lib/python3.10/site-packages/nvidia/nvshmem`,
|
||||
# 3. Let CMake find it in the default system paths, e.g. /usr/local.
|
||||
find_path(NVSHMEM_LIB_DIR
|
||||
find_library(NVSHMEM_HOST_LIB
|
||||
# In pip install case, the lib suffix is `.so.3` instead of `.so`
|
||||
NAMES libnvshmem_host.so libnvshmem_host.so.3
|
||||
PATHS $ENV{NVSHMEM_HOME}/lib ${NVSHMEM_PY_DIR}/lib
|
||||
DOC "The location of NVSHMEM library.")
|
||||
NAMES nvshmem_host nvshmem_host.so.3
|
||||
HINTS $ENV{NVSHMEM_HOME} ${NVSHMEM_PY_DIR}
|
||||
PATH_SUFFIXES lib lib64 cuda/lib cuda/lib64 lib/x64
|
||||
DOC "The location of NVSHMEM host library.")
|
||||
find_library(NVSHMEM_DEVICE_LIB
|
||||
# Device lib is a `.a` file
|
||||
NAMES nvshmem_device
|
||||
HINTS $ENV{NVSHMEM_HOME} ${NVSHMEM_PY_DIR}
|
||||
PATH_SUFFIXES lib lib64 cuda/lib cuda/lib64 lib/x64
|
||||
DOC "The location of NVSHMEM device library.")
|
||||
find_path(NVSHMEM_INCLUDE_DIR
|
||||
NAMES nvshmem.h
|
||||
PATHS $ENV{NVSHMEM_HOME}/include ${NVSHMEM_PY_DIR}/include
|
||||
HINTS $ENV{NVSHMEM_HOME}/include ${NVSHMEM_PY_DIR}/include
|
||||
DOC "The location of NVSHMEM headers.")
|
||||
message(STATUS "NVSHMEM_HOST_LIB: '${NVSHMEM_HOST_LIB}'")
|
||||
message(STATUS "NVSHMEM_DEVICE_LIB: '${NVSHMEM_DEVICE_LIB}'")
|
||||
message(STATUS "NVSHMEM_INCLUDE_DIR: '${NVSHMEM_INCLUDE_DIR}'")
|
||||
endif()
|
||||
|
||||
# If NVSHMEM_LIBRARY is found, we build torch_cuda with NVSHMEM support.
|
||||
if(NVSHMEM_LIB_DIR AND NVSHMEM_INCLUDE_DIR)
|
||||
message(STATUS "Building with NVSHMEM support: '${NVSHMEM_LIB_DIR}'")
|
||||
if(NVSHMEM_HOST_LIB AND NVSHMEM_DEVICE_LIB AND NVSHMEM_INCLUDE_DIR)
|
||||
message(STATUS "NVSHMEM found, building with NVSHMEM support")
|
||||
include_directories(${NVSHMEM_INCLUDE_DIR})
|
||||
|
||||
# Linking with nvshmem requires the source binary to be built with -rdc
|
||||
@ -1027,12 +1037,9 @@ elseif(USE_CUDA)
|
||||
set_target_properties(nvshmem_extension PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
target_compile_options(nvshmem_extension PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-rdc=true>)
|
||||
target_compile_options(nvshmem_extension PRIVATE "-U__CUDA_NO_HALF_OPERATORS__")
|
||||
target_link_directories(nvshmem_extension PRIVATE ${NVSHMEM_LIB_DIR})
|
||||
target_link_libraries(nvshmem_extension PRIVATE
|
||||
# Full path needed bc nvshmem wheel ships with .so.3 instead of .so;
|
||||
# otherwise, we could just write `nvshmem_host`
|
||||
${NVSHMEM_LIB_DIR}/libnvshmem_host.so.3
|
||||
nvshmem_device
|
||||
${NVSHMEM_HOST_LIB}
|
||||
${NVSHMEM_DEVICE_LIB}
|
||||
)
|
||||
target_compile_definitions(torch_cuda PUBLIC USE_NVSHMEM)
|
||||
target_compile_definitions(nvshmem_extension PUBLIC USE_NVSHMEM)
|
||||
|
@ -172,7 +172,7 @@ function(caffe2_print_configuration_summary)
|
||||
if(${USE_NCCL})
|
||||
message(STATUS " USE_SYSTEM_NCCL : ${USE_SYSTEM_NCCL}")
|
||||
endif()
|
||||
message(STATUS " NVSHMEM_LIB_DIR : ${NVSHMEM_LIB_DIR}")
|
||||
message(STATUS " Found NVSHMEM : ${NVSHMEM_INCLUDE_DIR}")
|
||||
message(STATUS " USE_NNPACK : ${USE_NNPACK}")
|
||||
message(STATUS " USE_NUMPY : ${USE_NUMPY}")
|
||||
message(STATUS " USE_OBSERVERS : ${USE_OBSERVERS}")
|
||||
|
@ -11,6 +11,7 @@ from torch.testing._internal.common_distributed import MultiProcContinousTest
|
||||
from torch.testing._internal.common_utils import (
|
||||
instantiate_parametrized_tests,
|
||||
parametrize,
|
||||
requires_cuda_p2p_access,
|
||||
run_tests,
|
||||
skip_but_pass_in_sandcastle_if,
|
||||
skipIfRocm,
|
||||
@ -32,6 +33,7 @@ device_module = torch.get_device_module(device_type)
|
||||
|
||||
@instantiate_parametrized_tests
|
||||
@requires_nvshmem()
|
||||
@requires_cuda_p2p_access()
|
||||
class NVSHMEMSymmetricMemoryTest(MultiProcContinousTest):
|
||||
def _init_device(self) -> None:
|
||||
# TODO: relieve this (seems to hang if without)
|
||||
|
@ -19,6 +19,7 @@
|
||||
#endif // Must be done before nvshmem.h is included
|
||||
|
||||
#include <nvshmem.h>
|
||||
#include <nvshmemx.h>
|
||||
|
||||
namespace c10d::nvshmem_extension {
|
||||
|
||||
|
Reference in New Issue
Block a user