gloo: fix building system gloo with CUDA/HIP (#146637)

Fix incorrect linking of Gloo's libraries when building with system Gloo. Previously, either Gloo's native library or Gloo's CUDA library were linked. However, Gloo had changed such that all users of Gloo must link the native library, and can optionally link the CUDA or HIP library for Gloo + CUDA/HIP support.
This had been updated when building/linking with vendored Gloo, but not when using system Gloo.

Fixes: #146239

Reported-by: Adam J Stewart <ajstewart426@gmail.com>

Pull Request resolved: https://github.com/pytorch/pytorch/pull/146637
Approved by: https://github.com/malfet
This commit is contained in:
Nathan Brown
2025-08-06 22:56:31 +00:00
committed by PyTorch MergeBot
parent 3a2c3c8ed3
commit 93da9952a7
2 changed files with 25 additions and 25 deletions

View File

@ -1235,10 +1235,17 @@ if(USE_GLOO)
if(NOT Gloo_FOUND)
message(FATAL_ERROR "Cannot find gloo")
endif()
message("Found gloo: ${Gloo_LIBRARY}")
message("Found gloo: ${Gloo_NATIVE_LIBRARY}, cuda lib: ${Gloo_CUDA_LIBRARY}, hip lib: ${Gloo_HIP_LIBRARY}")
message("Found gloo include directories: ${Gloo_INCLUDE_DIRS}")
add_library(gloo SHARED IMPORTED)
set_target_properties(gloo PROPERTIES IMPORTED_LOCATION ${Gloo_LIBRARY})
set_target_properties(gloo PROPERTIES IMPORTED_LOCATION ${Gloo_NATIVE_LIBRARY})
if(USE_CUDA)
add_library(gloo_cuda SHARED IMPORTED)
set_target_properties(gloo_cuda PROPERTIES IMPORTED_LOCATION ${Gloo_CUDA_LIBRARY})
elseif(USE_ROCM)
add_library(gloo_hip SHARED IMPORTED)
set_target_properties(gloo_hip PROPERTIES IMPORTED_LOCATION ${Gloo_HIP_LIBRARY})
endif()
# need to use Gloo_INCLUDE_DIRS over third_party/gloo to find Gloo's auto-generated config.h
include_directories(BEFORE SYSTEM ${Gloo_INCLUDE_DIRS})
endif()

View File

@ -1,7 +1,8 @@
# Try to find the Gloo library and headers.
# Gloo_FOUND - system has Gloo lib
# Gloo_INCLUDE_DIRS - the Gloo include directory
# Gloo_LIBRARY/Gloo_NATIVE_LIBRARY - libraries needed to use Gloo
# Gloo_NATIVE_LIBRARY - base gloo library, needs to be linked
# Gloo_CUDA_LIBRARY/Gloo_HIP_LIBRARY - CUDA/HIP support library in Gloo
find_path(Gloo_INCLUDE_DIR
NAMES gloo/common/common.h
@ -10,40 +11,32 @@ find_path(Gloo_INCLUDE_DIR
find_library(Gloo_NATIVE_LIBRARY
NAMES gloo
DOC "The Gloo library (without CUDA)"
DOC "The Gloo library"
)
# Gloo has optional CUDA support
# if Gloo + CUDA is desired, Gloo_CUDA_LIBRARY
# needs to be linked into desired target
find_library(Gloo_CUDA_LIBRARY
NAMES gloo_cuda
DOC "The Gloo library (with CUDA)"
DOC "Gloo's CUDA support/code"
)
# Gloo has optional HIP support
# if Gloo + HIP is desired, Gloo_HIP_LIBRARY
# needs to be linked to desired target
find_library(Gloo_HIP_LIBRARY
NAMES gloo_hiop
DOC "Gloo's HIP support/code"
)
set(Gloo_INCLUDE_DIRS ${Gloo_INCLUDE_DIR})
# use the CUDA library depending on the Gloo_USE_CUDA variable
if (DEFINED Gloo_USE_CUDA)
if (${Gloo_USE_CUDA})
set(Gloo_LIBRARY ${Gloo_CUDA_LIBRARY})
set(Gloo_NATIVE_LIBRARY ${Gloo_NATIVE_LIBRARY})
else()
set(Gloo_LIBRARY ${Gloo_NATIVE_LIBRARY})
set(Gloo_NATIVE_LIBRARY ${Gloo_NATIVE_LIBRARY})
endif()
else()
# else try to use the CUDA library if found
if (${Gloo_CUDA_LIBRARY} STREQUAL "Gloo_CUDA_LIBRARY-NOTFOUND")
set(Gloo_LIBRARY ${Gloo_NATIVE_LIBRARY})
set(Gloo_NATIVE_LIBRARY ${Gloo_NATIVE_LIBRARY})
else()
set(Gloo_LIBRARY ${Gloo_CUDA_LIBRARY})
set(Gloo_NATIVE_LIBRARY ${Gloo_NATIVE_LIBRARY})
endif()
endif()
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Gloo
FOUND_VAR Gloo_FOUND
REQUIRED_VARS Gloo_INCLUDE_DIR Gloo_LIBRARY
REQUIRED_VARS Gloo_INCLUDE_DIR Gloo_NATIVE_LIBRARY
)
mark_as_advanced(Gloo_FOUND)