Revert "Use official CUDAToolkit module in CMake (#154595)"

This reverts commit 08dae945ae380d80efbaf140a95abfc5d96e5100.

Reverted https://github.com/pytorch/pytorch/pull/154595 on behalf of https://github.com/malfet due to It breaks on some local setup with no clear diagnostic, but looks like it fails to find cuFile ([comment](https://github.com/pytorch/pytorch/pull/154595#issuecomment-2997959344))
This commit is contained in:
PyTorch MergeBot
2025-06-23 21:15:31 +00:00
parent 31e1274597
commit b1d62febd0
25 changed files with 4584 additions and 194 deletions

View File

@ -79,7 +79,6 @@ def package_cuda_wheel(wheel_path, desired_cuda) -> None:
os.system(f"unzip {wheel_path} -d {folder}/tmp")
libs_to_copy = [
"/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12",
"/usr/local/cuda/extras/CUPTI/lib64/libnvperf_host.so",
"/usr/local/cuda/lib64/libcudnn.so.9",
"/usr/local/cuda/lib64/libcublas.so.12",
"/usr/local/cuda/lib64/libcublasLt.so.12",

View File

@ -131,8 +131,6 @@ if [[ $CUDA_VERSION == 12* ]]; then
"/usr/local/cuda/lib64/libnvrtc-builtins.so"
"/usr/local/cuda/lib64/libcufile.so.0"
"/usr/local/cuda/lib64/libcufile_rdma.so.1"
"/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12"
"/usr/local/cuda/extras/CUPTI/lib64/libnvperf_host.so"
)
DEPS_SONAME+=(
"libcudnn_adv.so.9"
@ -151,8 +149,6 @@ if [[ $CUDA_VERSION == 12* ]]; then
"libnvrtc-builtins.so"
"libcufile.so.0"
"libcufile_rdma.so.1"
"libcupti.so.12"
"libnvperf_host.so"
)
else
echo "Using nvidia libs from pypi."

View File

@ -8,7 +8,6 @@ copy "%CUDA_PATH%\bin\cusolver*64_*.dll*" pytorch\torch\lib
copy "%CUDA_PATH%\bin\cudnn*64_*.dll*" pytorch\torch\lib
copy "%CUDA_PATH%\bin\nvrtc*64_*.dll*" pytorch\torch\lib
copy "%CUDA_PATH%\extras\CUPTI\lib64\cupti64_*.dll*" pytorch\torch\lib
copy "%CUDA_PATH%\extras\CUPTI\lib64\nvperf_host*.dll*" pytorch\torch\lib
copy "%PYTHON_LIB_PATH%\libiomp*5md.dll" pytorch\torch\lib

View File

@ -1302,7 +1302,11 @@ if(BUILD_SHARED_LIBS)
DESTINATION share/cmake/Caffe2/public
COMPONENT dev)
install(
FILES ${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUDNN.cmake
DIRECTORY ${PROJECT_SOURCE_DIR}/cmake/Modules_CUDA_fix
DESTINATION share/cmake/Caffe2/
COMPONENT dev)
install(
FILES ${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUDAToolkit.cmake
DESTINATION share/cmake/Caffe2/
COMPONENT dev)
install(

View File

@ -10,7 +10,8 @@ endif()
list(APPEND CMAKE_MODULE_PATH
${CMAKE_CURRENT_SOURCE_DIR}/../cmake/Modules
${CMAKE_CURRENT_SOURCE_DIR}/../cmake/public)
${CMAKE_CURRENT_SOURCE_DIR}/../cmake/public
${CMAKE_CURRENT_SOURCE_DIR}/../cmake/Modules_CUDA_fix)
cmake_policy(SET CMP0012 NEW)

View File

@ -561,7 +561,7 @@ if(USE_CUDA)
set(DELAY_LOAD_FLAGS "")
endif()
target_link_libraries(caffe2_nvrtc PRIVATE torch::nvrtc ${DELAY_LOAD_FLAGS})
target_link_libraries(caffe2_nvrtc PRIVATE caffe2::nvrtc ${DELAY_LOAD_FLAGS})
install(TARGETS caffe2_nvrtc DESTINATION "${TORCH_INSTALL_LIB_DIR}")
if(USE_NCCL)
list(APPEND Caffe2_GPU_SRCS
@ -1076,9 +1076,26 @@ elseif(USE_CUDA)
torch_cuda
)
if($ENV{ATEN_STATIC_CUDA})
target_link_libraries(torch_cuda_linalg PRIVATE CUDA::cusolver_static)
if(CUDA_VERSION_MAJOR LESS_EQUAL 11)
target_link_libraries(torch_cuda_linalg PRIVATE
CUDA::cusolver_static
${CUDAToolkit_LIBRARY_DIR}/liblapack_static.a # needed for libcusolver_static
)
elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 12)
target_link_libraries(torch_cuda_linalg PRIVATE
CUDA::cusolver_static
${CUDAToolkit_LIBRARY_DIR}/libcusolver_lapack_static.a # needed for libcusolver_static
)
endif()
else()
target_link_libraries(torch_cuda_linalg PRIVATE CUDA::cusolver)
target_link_libraries(torch_cuda_linalg PRIVATE
CUDA::cusolver
)
endif()
# NS: TODO, is this really necessary?
if(USE_MAGMA AND CAFFE2_STATIC_LINK_CUDA)
target_link_libraries(torch_cuda_linalg PRIVATE
CUDA::culibos ${CMAKE_DL_LIBS})
endif()
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/cuda/LinearAlgebraStubs.cpp PROPERTIES COMPILE_FLAGS "-DBUILD_LAZY_CUDA_LINALG")
install(TARGETS torch_cuda_linalg DESTINATION "${TORCH_INSTALL_LIB_DIR}")

View File

@ -50,7 +50,7 @@ if(USE_CUDA)
if(NOT CAFFE2_USE_NVRTC)
caffe2_update_option(USE_NVRTC OFF)
endif()
list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS torch::curand torch::cufft torch::cublas)
list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS caffe2::curand caffe2::cufft caffe2::cublas)
if(CAFFE2_USE_CUDNN)
list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS torch::cudnn)
else()

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,11 @@
# This is a wrapper of the upstream `./upstream/FindCUDA.cmake` that
# automatically includes `./upstream/CMakeInitializeConfigs.cmake` before
# `./upstream/FindCUDA.cmake`. The `CMakeInitializeConfigs.cmake`, which is
# absent in old CMake versions, creates some necessary variables for the later
# to run.
# See ./README.md for details.
set(UPSTREAM_FIND_CUDA_DIR "${CMAKE_CURRENT_LIST_DIR}/upstream/")
include("${UPSTREAM_FIND_CUDA_DIR}/CMakeInitializeConfigs.cmake")
include("${UPSTREAM_FIND_CUDA_DIR}/FindCUDA.cmake")

View File

@ -0,0 +1,27 @@
This `./upstream` subfolder contains fixes for `FindCUDA` that are introduced in
later versions of cmake but cause generator expression errors in earlier CMake
versions. Specifically:
1. a problem where a generator expression for include directories was
passed to NVCC, where the generator expression itself was prefixed by `-I`.
As the NNPACK include directory generator expression expands to multiple
directories, the second and later ones were not prefixed by `-I`, causing
NVCC to return an error. First fixed in CMake 3.7 (see
[Kitware/CMake@7ded655f](https://github.com/Kitware/CMake/commit/7ded655f)).
2. Windows VS2017 fixes that allows one to define the ccbin path
differently between earlier versions of Visual Studio and VS2017. First
introduced after 3.10.1 master version (see
[Kitware/CMake@bc88329e](https://github.com/Kitware/CMake/commit/bc88329e)).
The downside of using these fixes is that `./upstream/CMakeInitializeConfigs.cmake`,
defining some new CMake variables (added in
[Kitware/CMake@48f7e2d3](https://github.com/Kitware/CMake/commit/48f7e2d3)),
must be included before `./upstream/FindCUDA.cmake` to support older CMake
versions. A wrapper `./FindCUDA.cmake` is created to do this automatically, and
to allow submodules to use these fixes because we can't patch their
`CMakeList.txt`.
If you need to update files under `./upstream` folder, we recommend you issue PRs
against [the CMake mainline branch](https://github.com/Kitware/CMake/blob/master/Modules/FindCUDA.cmake),
and then backport it here for earlier CMake compatibility.

View File

@ -0,0 +1,40 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
# file Copyright.txt or https://cmake.org/licensing for details.
# Present in upstream, but not supported on versions of cmake we need to support
# include_guard(GLOBAL)
# Initializes `<_PREFIX>_<CONFIG>` variables from the corresponding
# `<_PREFIX>_<CONFIG>_INIT`, for the configurations currently used.
function(cmake_initialize_per_config_variable _PREFIX _DOCSTRING)
string(STRIP "${${_PREFIX}_INIT}" _INIT)
set("${_PREFIX}" "${_INIT}"
CACHE STRING "${_DOCSTRING} during all build types.")
mark_as_advanced("${_PREFIX}")
if (NOT CMAKE_NOT_USING_CONFIG_FLAGS)
set(_CONFIGS Debug Release MinSizeRel RelWithDebInfo)
get_property(_GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
if (_GENERATOR_IS_MULTI_CONFIG)
list(APPEND _CONFIGS ${CMAKE_CONFIGURATION_TYPES})
else()
if (NOT CMAKE_NO_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "${CMAKE_BUILD_TYPE_INIT}" CACHE STRING
"Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel ...")
endif()
list(APPEND _CONFIGS ${CMAKE_BUILD_TYPE})
endif()
list(REMOVE_DUPLICATES _CONFIGS)
foreach(_BUILD_TYPE IN LISTS _CONFIGS)
if (NOT "${_BUILD_TYPE}" STREQUAL "")
string(TOUPPER "${_BUILD_TYPE}" _BUILD_TYPE)
string(STRIP "${${_PREFIX}_${_BUILD_TYPE}_INIT}" _INIT)
set("${_PREFIX}_${_BUILD_TYPE}" "${_INIT}"
CACHE STRING "${_DOCSTRING} during ${_BUILD_TYPE} builds.")
mark_as_advanced("${_PREFIX}_${_BUILD_TYPE}")
endif()
endforeach()
endif()
endfunction()

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,106 @@
# James Bigler, NVIDIA Corp (nvidia.com - jbigler)
# Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html
#
# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved.
#
# Copyright (c) 2007-2009
# Scientific Computing and Imaging Institute, University of Utah
#
# This code is licensed under the MIT License. See the FindCUDA.cmake script
# for the text of the license.
# The MIT License
#
# License for the specific language governing rights and limitations under
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
#
#######################################################################
# This converts a file written in makefile syntax into one that can be included
# by CMake.
# Input variables
#
# verbose:BOOL=<> OFF: Be as quiet as possible (default)
# ON : Extra output
#
# input_file:FILEPATH=<> Path to dependency file in makefile format
#
# output_file:FILEPATH=<> Path to file with dependencies in CMake readable variable
#
file(READ ${input_file} depend_text)
if (NOT "${depend_text}" STREQUAL "")
# message("FOUND DEPENDS")
string(REPLACE "\\ " " " depend_text ${depend_text})
# This works for the nvcc -M generated dependency files.
string(REGEX REPLACE "^.* : " "" depend_text ${depend_text})
string(REGEX REPLACE "[ \\\\]*\n" ";" depend_text ${depend_text})
set(dependency_list "")
foreach(file ${depend_text})
string(REGEX REPLACE "^ +" "" file ${file})
# OK, now if we had a UNC path, nvcc has a tendency to only output the first '/'
# instead of '//'. Here we will test to see if the file exists, if it doesn't then
# try to prepend another '/' to the path and test again. If it still fails remove the
# path.
if(NOT EXISTS "${file}")
if (EXISTS "/${file}")
set(file "/${file}")
else()
if(verbose)
message(WARNING " Removing non-existent dependency file: ${file}")
endif()
set(file "")
endif()
endif()
# Make sure we check to see if we have a file, before asking if it is not a directory.
# if(NOT IS_DIRECTORY "") will return TRUE.
if(file AND NOT IS_DIRECTORY "${file}")
# If softlinks start to matter, we should change this to REALPATH. For now we need
# to flatten paths, because nvcc can generate stuff like /bin/../include instead of
# just /include.
get_filename_component(file_absolute "${file}" ABSOLUTE)
list(APPEND dependency_list "${file_absolute}")
endif()
endforeach()
else()
# message("FOUND NO DEPENDS")
endif()
# Remove the duplicate entries and sort them.
list(REMOVE_DUPLICATES dependency_list)
list(SORT dependency_list)
foreach(file ${dependency_list})
string(APPEND cuda_nvcc_depend " \"${file}\"\n")
endforeach()
file(WRITE ${output_file} "# Generated by: make2cmake.cmake\nSET(CUDA_NVCC_DEPEND\n ${cuda_nvcc_depend})\n\n")

View File

@ -0,0 +1,109 @@
# James Bigler, NVIDIA Corp (nvidia.com - jbigler)
# Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html
#
# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved.
#
# Copyright (c) 2007-2009
# Scientific Computing and Imaging Institute, University of Utah
#
# This code is licensed under the MIT License. See the FindCUDA.cmake script
# for the text of the license.
# The MIT License
#
# License for the specific language governing rights and limitations under
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
#
#######################################################################
# Parses a .cubin file produced by nvcc and reports statistics about the file.
file(READ ${input_file} file_text)
if (NOT "${file_text}" STREQUAL "")
string(REPLACE ";" "\\;" file_text ${file_text})
string(REPLACE "\ncode" ";code" file_text ${file_text})
list(LENGTH file_text len)
foreach(line ${file_text})
# Only look at "code { }" blocks.
if(line MATCHES "^code")
# Break into individual lines.
string(REGEX REPLACE "\n" ";" line ${line})
foreach(entry ${line})
# Extract kernel names.
if (${entry} MATCHES "[^g]name = ([^ ]+)")
set(entry "${CMAKE_MATCH_1}")
# Check to see if the kernel name starts with "_"
set(skip FALSE)
# if (${entry} MATCHES "^_")
# Skip the rest of this block.
# message("Skipping ${entry}")
# set(skip TRUE)
# else ()
message("Kernel: ${entry}")
# endif ()
endif()
# Skip the rest of the block if necessary
if(NOT skip)
# Registers
if (${entry} MATCHES "reg([ ]+)=([ ]+)([^ ]+)")
set(entry "${CMAKE_MATCH_3}")
message("Registers: ${entry}")
endif()
# Local memory
if (${entry} MATCHES "lmem([ ]+)=([ ]+)([^ ]+)")
set(entry "${CMAKE_MATCH_3}")
message("Local: ${entry}")
endif()
# Shared memory
if (${entry} MATCHES "smem([ ]+)=([ ]+)([^ ]+)")
set(entry "${CMAKE_MATCH_3}")
message("Shared: ${entry}")
endif()
if (${entry} MATCHES "^}")
message("")
endif()
endif()
endforeach()
endif()
endforeach()
else()
# message("FOUND NO DEPENDS")
endif()

View File

@ -0,0 +1,303 @@
# James Bigler, NVIDIA Corp (nvidia.com - jbigler)
#
# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved.
#
# This code is licensed under the MIT License. See the FindCUDA.cmake script
# for the text of the license.
# The MIT License
#
# License for the specific language governing rights and limitations under
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
##########################################################################
# This file runs the nvcc commands to produce the desired output file along with
# the dependency file needed by CMake to compute dependencies. In addition the
# file checks the output of each command and if the command fails it deletes the
# output files.
# Input variables
#
# verbose:BOOL=<> OFF: Be as quiet as possible (default)
# ON : Describe each step
#
# build_configuration:STRING=<> Typically one of Debug, MinSizeRel, Release, or
# RelWithDebInfo, but it should match one of the
# entries in CUDA_HOST_FLAGS. This is the build
# configuration used when compiling the code. If
# blank or unspecified Debug is assumed as this is
# what CMake does.
#
# generated_file:STRING=<> File to generate. This argument must be passed in.
#
# generated_cubin_file:STRING=<> File to generate. This argument must be passed
# in if build_cubin is true.
cmake_policy(PUSH)
cmake_policy(SET CMP0007 NEW)
cmake_policy(SET CMP0010 NEW)
if(NOT generated_file)
message(FATAL_ERROR "You must specify generated_file on the command line")
endif()
# Set these up as variables to make reading the generated file easier
set(CMAKE_COMMAND "@CMAKE_COMMAND@") # path
set(source_file "@source_file@") # path
set(NVCC_generated_dependency_file "@NVCC_generated_dependency_file@") # path
set(cmake_dependency_file "@cmake_dependency_file@") # path
set(CUDA_make2cmake "@CUDA_make2cmake@") # path
set(CUDA_parse_cubin "@CUDA_parse_cubin@") # path
set(build_cubin @build_cubin@) # bool
set(CUDA_HOST_COMPILER "@CUDA_HOST_COMPILER@") # path
# We won't actually use these variables for now, but we need to set this, in
# order to force this file to be run again if it changes.
set(generated_file_path "@generated_file_path@") # path
set(generated_file_internal "@generated_file@") # path
set(generated_cubin_file_internal "@generated_cubin_file@") # path
set(CUDA_NVCC_EXECUTABLE "@CUDA_NVCC_EXECUTABLE@") # path
set(CUDA_NVCC_FLAGS @CUDA_NVCC_FLAGS@ ;; @CUDA_WRAP_OPTION_NVCC_FLAGS@) # list
@CUDA_NVCC_FLAGS_CONFIG@
set(nvcc_flags @nvcc_flags@) # list
set(CUDA_NVCC_INCLUDE_DIRS [==[@CUDA_NVCC_INCLUDE_DIRS@]==]) # list (needs to be in lua quotes to address backslashes)
string(REPLACE "\\" "/" CUDA_NVCC_INCLUDE_DIRS "${CUDA_NVCC_INCLUDE_DIRS}")
set(CUDA_NVCC_COMPILE_DEFINITIONS [==[@CUDA_NVCC_COMPILE_DEFINITIONS@]==]) # list (needs to be in lua quotes see #16510 ).
set(format_flag "@format_flag@") # string
set(cuda_language_flag @cuda_language_flag@) # list
# Clean up list of include directories and add -I flags
list(REMOVE_DUPLICATES CUDA_NVCC_INCLUDE_DIRS)
set(CUDA_NVCC_INCLUDE_ARGS)
foreach(dir ${CUDA_NVCC_INCLUDE_DIRS})
# Extra quotes are added around each flag to help nvcc parse out flags with spaces.
list(APPEND CUDA_NVCC_INCLUDE_ARGS "-I${dir}")
endforeach()
# Clean up list of compile definitions, add -D flags, and append to nvcc_flags
list(REMOVE_DUPLICATES CUDA_NVCC_COMPILE_DEFINITIONS)
foreach(def ${CUDA_NVCC_COMPILE_DEFINITIONS})
list(APPEND nvcc_flags "-D${def}")
endforeach()
if(build_cubin AND NOT generated_cubin_file)
message(FATAL_ERROR "You must specify generated_cubin_file on the command line")
endif()
# This is the list of host compilation flags. It C or CXX should already have
# been chosen by FindCUDA.cmake.
@CUDA_HOST_FLAGS@
# Take the compiler flags and package them up to be sent to the compiler via -Xcompiler
set(nvcc_host_compiler_flags "")
# If we weren't given a build_configuration, use Debug.
if(NOT build_configuration)
set(build_configuration Debug)
endif()
string(TOUPPER "${build_configuration}" build_configuration)
#message("CUDA_NVCC_HOST_COMPILER_FLAGS = ${CUDA_NVCC_HOST_COMPILER_FLAGS}")
foreach(flag ${CMAKE_HOST_FLAGS} ${CMAKE_HOST_FLAGS_${build_configuration}})
# Extra quotes are added around each flag to help nvcc parse out flags with spaces.
string(APPEND nvcc_host_compiler_flags ",\"${flag}\"")
endforeach()
if (nvcc_host_compiler_flags)
set(nvcc_host_compiler_flags "-Xcompiler" ${nvcc_host_compiler_flags})
endif()
#message("nvcc_host_compiler_flags = \"${nvcc_host_compiler_flags}\"")
# Add the build specific configuration flags
list(APPEND CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS_${build_configuration}})
# Any -ccbin existing in CUDA_NVCC_FLAGS gets highest priority
list( FIND CUDA_NVCC_FLAGS "-ccbin" ccbin_found0 )
list( FIND CUDA_NVCC_FLAGS "--compiler-bindir" ccbin_found1 )
if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 AND CUDA_HOST_COMPILER )
if (CUDA_HOST_COMPILER STREQUAL "@_CUDA_MSVC_HOST_COMPILER@" AND DEFINED CCBIN)
set(CCBIN -ccbin "${CCBIN}")
else()
set(CCBIN -ccbin "${CUDA_HOST_COMPILER}")
endif()
endif()
# cuda_execute_process - Executes a command with optional command echo and status message.
#
# status - Status message to print if verbose is true
# command - COMMAND argument from the usual execute_process argument structure
# ARGN - Remaining arguments are the command with arguments
#
# CUDA_result - return value from running the command
#
# Make this a macro instead of a function, so that things like RESULT_VARIABLE
# and other return variables are present after executing the process.
macro(cuda_execute_process status command)
set(_command ${command})
if(NOT "x${_command}" STREQUAL "xCOMMAND")
message(FATAL_ERROR "Malformed call to cuda_execute_process. Missing COMMAND as second argument. (command = ${command})")
endif()
if(verbose)
execute_process(COMMAND "${CMAKE_COMMAND}" -E echo -- ${status})
# Now we need to build up our command string. We are accounting for quotes
# and spaces, anything else is left up to the user to fix if they want to
# copy and paste a runnable command line.
set(cuda_execute_process_string)
foreach(arg ${ARGN})
# If there are quotes, escape them, so they come through.
string(REPLACE "\"" "\\\"" arg ${arg})
# Args with spaces need quotes around them to get them to be parsed as a single argument.
if(arg MATCHES " ")
list(APPEND cuda_execute_process_string "\"${arg}\"")
else()
list(APPEND cuda_execute_process_string ${arg})
endif()
endforeach()
# Echo the command
execute_process(COMMAND ${CMAKE_COMMAND} -E echo ${cuda_execute_process_string})
endif()
# Run the command
execute_process(COMMAND ${ARGN} RESULT_VARIABLE CUDA_result )
endmacro()
# Delete the target file
cuda_execute_process(
"Removing ${generated_file}"
COMMAND "${CMAKE_COMMAND}" -E remove "${generated_file}"
)
# For CUDA 2.3 and below, -G -M doesn't work, so remove the -G flag
# for dependency generation and hope for the best.
set(depends_CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}")
set(CUDA_VERSION @CUDA_VERSION@)
# nvcc doesn't define __CUDACC__ for some reason when generating dependency files. This
# can cause incorrect dependencies when #including files based on this macro which is
# defined in the generating passes of nvcc invocation. We will go ahead and manually
# define this for now until a future version fixes this bug.
set(CUDACC_DEFINE -D__CUDACC__)
# Generate the dependency file
cuda_execute_process(
"Generating dependency file: ${NVCC_generated_dependency_file}"
COMMAND "${CUDA_NVCC_EXECUTABLE}"
-M
${CUDACC_DEFINE}
"${source_file}"
-o "${NVCC_generated_dependency_file}"
${CCBIN}
${nvcc_flags}
${nvcc_host_compiler_flags}
${depends_CUDA_NVCC_FLAGS}
-DNVCC
${CUDA_NVCC_INCLUDE_ARGS}
)
if(CUDA_result)
message(FATAL_ERROR "Error generating ${generated_file}")
endif()
# Generate the cmake readable dependency file to a temp file. Don't put the
# quotes just around the filenames for the input_file and output_file variables.
# CMake will pass the quotes through and not be able to find the file.
cuda_execute_process(
"Generating temporary cmake readable file: ${cmake_dependency_file}.tmp"
COMMAND "${CMAKE_COMMAND}"
-D "input_file:FILEPATH=${NVCC_generated_dependency_file}"
-D "output_file:FILEPATH=${cmake_dependency_file}.tmp"
-D "verbose=${verbose}"
-P "${CUDA_make2cmake}"
)
if(CUDA_result)
message(FATAL_ERROR "Error generating ${generated_file}")
endif()
# Copy the file if it is different
cuda_execute_process(
"Copy if different ${cmake_dependency_file}.tmp to ${cmake_dependency_file}"
COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${cmake_dependency_file}.tmp" "${cmake_dependency_file}"
)
if(CUDA_result)
message(FATAL_ERROR "Error generating ${generated_file}")
endif()
# Delete the temporary file
cuda_execute_process(
"Removing ${cmake_dependency_file}.tmp and ${NVCC_generated_dependency_file}"
COMMAND "${CMAKE_COMMAND}" -E remove "${cmake_dependency_file}.tmp" "${NVCC_generated_dependency_file}"
)
if(CUDA_result)
message(FATAL_ERROR "Error generating ${generated_file}")
endif()
# Generate the code
cuda_execute_process(
"Generating ${generated_file}"
COMMAND "${CUDA_NVCC_EXECUTABLE}"
"${source_file}"
${cuda_language_flag}
${format_flag} -o "${generated_file}"
${CCBIN}
${nvcc_flags}
${nvcc_host_compiler_flags}
${CUDA_NVCC_FLAGS}
-DNVCC
${CUDA_NVCC_INCLUDE_ARGS}
)
if(CUDA_result)
# Since nvcc can sometimes leave half done files make sure that we delete the output file.
cuda_execute_process(
"Removing ${generated_file}"
COMMAND "${CMAKE_COMMAND}" -E remove "${generated_file}"
)
message(FATAL_ERROR "Error generating file ${generated_file}")
else()
if(verbose)
message("Generated ${generated_file} successfully.")
endif()
endif()
# Cubin resource report commands.
if( build_cubin )
# Run with -cubin to produce resource usage report.
cuda_execute_process(
"Generating ${generated_cubin_file}"
COMMAND "${CUDA_NVCC_EXECUTABLE}"
"${source_file}"
${CUDA_NVCC_FLAGS}
${nvcc_flags}
${CCBIN}
${nvcc_host_compiler_flags}
-DNVCC
-cubin
-o "${generated_cubin_file}"
${CUDA_NVCC_INCLUDE_ARGS}
)
# Execute the parser script.
cuda_execute_process(
"Executing the parser script"
COMMAND "${CMAKE_COMMAND}"
-D "input_file:STRING=${generated_cubin_file}"
-P "${CUDA_parse_cubin}"
)
endif()
cmake_policy(POP)

View File

@ -0,0 +1,300 @@
# Synopsis:
# CUDA_SELECT_NVCC_ARCH_FLAGS(out_variable [target_CUDA_architectures])
# -- Selects GPU arch flags for nvcc based on target_CUDA_architectures
# target_CUDA_architectures : Auto | Common | All | LIST(ARCH_AND_PTX ...)
# - "Auto" detects local machine GPU compute arch at runtime.
# - "Common" and "All" cover common and entire subsets of architectures
# ARCH_AND_PTX : NAME | NUM.NUM | NUM.NUM(NUM.NUM) | NUM.NUM+PTX
# NAME: Kepler Maxwell Kepler+Tegra Kepler+Tesla Maxwell+Tegra Pascal Volta Turing Ampere
# NUM: Any number. Only those pairs are currently accepted by NVCC though:
# 3.5 3.7 5.0 5.2 5.3 6.0 6.2 7.0 7.2 7.5 8.0
# Returns LIST of flags to be added to CUDA_NVCC_FLAGS in ${out_variable}
# Additionally, sets ${out_variable}_readable to the resulting numeric list
# Example:
# CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS 3.0 3.5+PTX 5.2(5.0) Maxwell)
# LIST(APPEND CUDA_NVCC_FLAGS ${ARCH_FLAGS})
#
# More info on CUDA architectures: https://en.wikipedia.org/wiki/CUDA
#
if(CMAKE_CUDA_COMPILER_LOADED) # CUDA as a language
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA"
AND CMAKE_CUDA_COMPILER_VERSION MATCHES "^([0-9]+\\.[0-9]+)")
set(CUDA_VERSION "${CMAKE_MATCH_1}")
endif()
endif()
# See: https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#gpu-feature-list
# This list will be used for CUDA_ARCH_NAME = All option
set(CUDA_KNOWN_GPU_ARCHITECTURES "Kepler" "Maxwell")
# This list will be used for CUDA_ARCH_NAME = Common option (enabled by default)
set(CUDA_COMMON_GPU_ARCHITECTURES "3.5" "5.0")
# This list is used to filter CUDA archs when autodetecting
set(CUDA_ALL_GPU_ARCHITECTURES "3.5" "5.0")
if(CUDA_VERSION VERSION_GREATER "10.5")
list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Ampere")
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "8.0")
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "8.0")
if(CUDA_VERSION VERSION_LESS "11.1")
set(CUDA_LIMIT_GPU_ARCHITECTURE "8.0")
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "8.0+PTX")
endif()
endif()
if(NOT CUDA_VERSION VERSION_LESS "11.1")
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "8.6")
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "8.6")
set(CUDA_LIMIT_GPU_ARCHITECUTRE "8.6")
if(CUDA_VERSION VERSION_LESS "11.8")
set(CUDA_LIMIT_GPU_ARCHITECTURE "8.9")
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "8.6+PTX")
endif()
endif()
if(NOT CUDA_VERSION VERSION_LESS "11.8")
list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Ada")
list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Hopper")
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "8.9")
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "9.0")
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "8.9")
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "9.0")
if(CUDA_VERSION VERSION_LESS "12.0")
set(CUDA_LIMIT_GPU_ARCHITECTURE "9.0")
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "8.9+PTX")
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "9.0+PTX")
endif()
endif()
if(NOT CUDA_VERSION VERSION_LESS "12.0")
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "9.0a")
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "9.0a")
list(REMOVE_ITEM CUDA_COMMON_GPU_ARCHITECTURES "3.5")
list(REMOVE_ITEM CUDA_ALL_GPU_ARCHITECTURES "3.5")
endif()
if(CUDA_VERSION VERSION_GREATER "12.6")
list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Blackwell")
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "10.0")
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "10.0a")
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "10.1a")
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "12.0")
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "12.0a")
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "10.0")
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "10.0a")
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "10.1a")
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "12.0")
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "12.0a")
endif()
################################################################################################
# A function for automatic detection of GPUs installed (if autodetection is enabled)
# Usage:
# CUDA_DETECT_INSTALLED_GPUS(OUT_VARIABLE)
#
function(CUDA_DETECT_INSTALLED_GPUS OUT_VARIABLE)
if(NOT CUDA_GPU_DETECT_OUTPUT)
if(CMAKE_CUDA_COMPILER_LOADED) # CUDA as a language
set(file "${PROJECT_BINARY_DIR}/detect_cuda_compute_capabilities.cu")
else()
set(file "${PROJECT_BINARY_DIR}/detect_cuda_compute_capabilities.cpp")
endif()
file(WRITE ${file} ""
"#include <cuda_runtime.h>\n"
"#include <cstdio>\n"
"int main()\n"
"{\n"
" int count = 0;\n"
" if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n"
" if (count == 0) return -1;\n"
" for (int device = 0; device < count; ++device)\n"
" {\n"
" cudaDeviceProp prop;\n"
" if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n"
" std::printf(\"%d.%d \", prop.major, prop.minor);\n"
" }\n"
" return 0;\n"
"}\n")
if(CMAKE_CUDA_COMPILER_LOADED) # CUDA as a language
try_run(run_result compile_result ${PROJECT_BINARY_DIR} ${file}
RUN_OUTPUT_VARIABLE compute_capabilities)
else()
try_run(run_result compile_result ${PROJECT_BINARY_DIR} ${file}
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CUDA_INCLUDE_DIRS}"
LINK_LIBRARIES ${CUDA_LIBRARIES}
RUN_OUTPUT_VARIABLE compute_capabilities)
endif()
# Filter unrelated content out of the output.
string(REGEX MATCHALL "[0-9]+\\.[0-9]+" compute_capabilities "${compute_capabilities}")
if(run_result EQUAL 0)
string(REPLACE "2.1" "2.1(2.0)" compute_capabilities "${compute_capabilities}")
set(CUDA_GPU_DETECT_OUTPUT ${compute_capabilities}
CACHE INTERNAL "Returned GPU architectures from detect_gpus tool" FORCE)
endif()
endif()
if(NOT CUDA_GPU_DETECT_OUTPUT)
message(STATUS "Automatic GPU detection failed. Building for common architectures.")
set(${OUT_VARIABLE} ${CUDA_COMMON_GPU_ARCHITECTURES} PARENT_SCOPE)
else()
# Filter based on CUDA version supported archs
set(CUDA_GPU_DETECT_OUTPUT_FILTERED "")
separate_arguments(CUDA_GPU_DETECT_OUTPUT)
foreach(ITEM IN ITEMS ${CUDA_GPU_DETECT_OUTPUT})
if(CUDA_LIMIT_GPU_ARCHITECTURE AND (ITEM VERSION_GREATER CUDA_LIMIT_GPU_ARCHITECTURE OR
ITEM VERSION_EQUAL CUDA_LIMIT_GPU_ARCHITECTURE))
list(GET CUDA_COMMON_GPU_ARCHITECTURES -1 NEWITEM)
string(APPEND CUDA_GPU_DETECT_OUTPUT_FILTERED " ${NEWITEM}")
else()
string(APPEND CUDA_GPU_DETECT_OUTPUT_FILTERED " ${ITEM}")
endif()
endforeach()
set(${OUT_VARIABLE} ${CUDA_GPU_DETECT_OUTPUT_FILTERED} PARENT_SCOPE)
endif()
endfunction()
################################################################################################
# Function for selecting GPU arch flags for nvcc based on CUDA architectures from parameter list
# Usage:
# SELECT_NVCC_ARCH_FLAGS(out_variable [list of CUDA compute archs])
function(CUDA_SELECT_NVCC_ARCH_FLAGS out_variable)
set(CUDA_ARCH_LIST "${ARGN}")
if("X${CUDA_ARCH_LIST}" STREQUAL "X" )
set(CUDA_ARCH_LIST "Auto")
endif()
set(cuda_arch_bin)
set(cuda_arch_ptx)
if("${CUDA_ARCH_LIST}" STREQUAL "All")
set(CUDA_ARCH_LIST ${CUDA_KNOWN_GPU_ARCHITECTURES})
elseif("${CUDA_ARCH_LIST}" STREQUAL "Common")
set(CUDA_ARCH_LIST ${CUDA_COMMON_GPU_ARCHITECTURES})
elseif("${CUDA_ARCH_LIST}" STREQUAL "Auto")
CUDA_DETECT_INSTALLED_GPUS(CUDA_ARCH_LIST)
message(STATUS "Autodetected CUDA architecture(s): ${CUDA_ARCH_LIST}")
endif()
# Now process the list and look for names
string(REGEX REPLACE "[ \t]+" ";" CUDA_ARCH_LIST "${CUDA_ARCH_LIST}")
list(REMOVE_DUPLICATES CUDA_ARCH_LIST)
foreach(arch_name ${CUDA_ARCH_LIST})
set(arch_bin)
set(arch_ptx)
set(add_ptx FALSE)
# Check to see if we are compiling PTX
if(arch_name MATCHES "(.*)\\+PTX$")
set(add_ptx TRUE)
set(arch_name ${CMAKE_MATCH_1})
endif()
if(arch_name MATCHES "^([0-9]+\\.[0-9]a?(\\([0-9]+\\.[0-9]\\))?)$")
set(arch_bin ${CMAKE_MATCH_1})
set(arch_ptx ${arch_bin})
else()
# Look for it in our list of known architectures
if(${arch_name} STREQUAL "Kepler+Tesla")
set(arch_bin 3.7)
elseif(${arch_name} STREQUAL "Kepler")
set(arch_bin 3.5)
set(arch_ptx 3.5)
elseif(${arch_name} STREQUAL "Maxwell+Tegra")
set(arch_bin 5.3)
elseif(${arch_name} STREQUAL "Maxwell")
set(arch_bin 5.0 5.2)
set(arch_ptx 5.2)
elseif(${arch_name} STREQUAL "Pascal")
set(arch_bin 6.0 6.1)
set(arch_ptx 6.1)
elseif(${arch_name} STREQUAL "Volta+Tegra")
set(arch_bin 7.2)
elseif(${arch_name} STREQUAL "Volta")
set(arch_bin 7.0 7.0)
set(arch_ptx 7.0)
elseif(${arch_name} STREQUAL "Turing")
set(arch_bin 7.5)
set(arch_ptx 7.5)
elseif(${arch_name} STREQUAL "Ampere+Tegra")
set(arch_bin 8.7)
elseif(${arch_name} STREQUAL "Ampere")
set(arch_bin 8.0 8.6)
set(arch_ptx 8.0 8.6)
elseif(${arch_name} STREQUAL "Ada")
set(arch_bin 8.9)
set(arch_ptx 8.9)
elseif(${arch_name} STREQUAL "Hopper")
set(arch_bin 9.0)
set(arch_ptx 9.0)
elseif(${arch_name} STREQUAL "Blackwell+Tegra")
set(arch_bin 10.1)
elseif(${arch_name} STREQUAL "Blackwell")
set(arch_bin 10.0 12.0)
set(arch_ptx 10.0 12.0)
else()
message(SEND_ERROR "Found Unknown CUDA Architecture Name in CUDA_SELECT_NVCC_ARCH_FLAGS: ${arch_name} ")
endif()
endif()
if(NOT arch_bin)
message(SEND_ERROR "arch_bin wasn't set for some reason")
endif()
list(APPEND cuda_arch_bin ${arch_bin})
if(add_ptx)
if (NOT arch_ptx)
set(arch_ptx ${arch_bin})
endif()
list(APPEND cuda_arch_ptx ${arch_ptx})
endif()
endforeach()
# remove dots and convert to lists
string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}")
string(REGEX REPLACE "\\." "" cuda_arch_ptx "${cuda_arch_ptx}")
string(REGEX MATCHALL "[0-9()]+a?" cuda_arch_bin "${cuda_arch_bin}")
string(REGEX MATCHALL "[0-9]+a?" cuda_arch_ptx "${cuda_arch_ptx}")
if(cuda_arch_bin)
list(REMOVE_DUPLICATES cuda_arch_bin)
endif()
if(cuda_arch_ptx)
list(REMOVE_DUPLICATES cuda_arch_ptx)
endif()
set(nvcc_flags "")
set(nvcc_archs_readable "")
# Tell NVCC to add binaries for the specified GPUs
foreach(arch ${cuda_arch_bin})
if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
# User explicitly specified ARCH for the concrete CODE
list(APPEND nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
list(APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1})
else()
# User didn't explicitly specify ARCH for the concrete CODE, we assume ARCH=CODE
list(APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch})
list(APPEND nvcc_archs_readable sm_${arch})
endif()
endforeach()
# Tell NVCC to add PTX intermediate code for the specified architectures
foreach(arch ${cuda_arch_ptx})
list(APPEND nvcc_flags -gencode arch=compute_${arch},code=compute_${arch})
list(APPEND nvcc_archs_readable compute_${arch})
endforeach()
string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}")
set(${out_variable} ${nvcc_flags} PARENT_SCOPE)
set(${out_variable}_readable ${nvcc_archs_readable} PARENT_SCOPE)
endfunction()

View File

@ -0,0 +1,386 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
# file Copyright.txt or https://cmake.org/licensing for details.
#[=======================================================================[.rst:
FindPackageHandleStandardArgs
-----------------------------
This module provides a function intended to be used in :ref:`Find Modules`
implementing :command:`find_package(<PackageName>)` calls. It handles the
``REQUIRED``, ``QUIET`` and version-related arguments of ``find_package``.
It also sets the ``<PackageName>_FOUND`` variable. The package is
considered found if all variables listed contain valid results, e.g.
valid filepaths.
.. command:: find_package_handle_standard_args
There are two signatures::
find_package_handle_standard_args(<PackageName>
(DEFAULT_MSG|<custom-failure-message>)
<required-var>...
)
find_package_handle_standard_args(<PackageName>
[FOUND_VAR <result-var>]
[REQUIRED_VARS <required-var>...]
[VERSION_VAR <version-var>]
[HANDLE_COMPONENTS]
[CONFIG_MODE]
[FAIL_MESSAGE <custom-failure-message>]
)
The ``<PackageName>_FOUND`` variable will be set to ``TRUE`` if all
the variables ``<required-var>...`` are valid and any optional
constraints are satisfied, and ``FALSE`` otherwise. A success or
failure message may be displayed based on the results and on
whether the ``REQUIRED`` and/or ``QUIET`` option was given to
the :command:`find_package` call.
The options are:
``(DEFAULT_MSG|<custom-failure-message>)``
In the simple signature this specifies the failure message.
Use ``DEFAULT_MSG`` to ask for a default message to be computed
(recommended). Not valid in the full signature.
``FOUND_VAR <result-var>``
Obsolete. Specifies either ``<PackageName>_FOUND`` or
``<PACKAGENAME>_FOUND`` as the result variable. This exists only
for compatibility with older versions of CMake and is now ignored.
Result variables of both names are always set for compatibility.
``REQUIRED_VARS <required-var>...``
Specify the variables which are required for this package.
These may be named in the generated failure message asking the
user to set the missing variable values. Therefore these should
typically be cache entries such as ``FOO_LIBRARY`` and not output
variables like ``FOO_LIBRARIES``.
``VERSION_VAR <version-var>``
Specify the name of a variable that holds the version of the package
that has been found. This version will be checked against the
(potentially) specified required version given to the
:command:`find_package` call, including its ``EXACT`` option.
The default messages include information about the required
version and the version which has been actually found, both
if the version is ok or not.
``HANDLE_COMPONENTS``
Enable handling of package components. In this case, the command
will report which components have been found and which are missing,
and the ``<PackageName>_FOUND`` variable will be set to ``FALSE``
if any of the required components (i.e. not the ones listed after
the ``OPTIONAL_COMPONENTS`` option of :command:`find_package`) are
missing.
``CONFIG_MODE``
Specify that the calling find module is a wrapper around a
call to ``find_package(<PackageName> NO_MODULE)``. This implies
a ``VERSION_VAR`` value of ``<PackageName>_VERSION``. The command
will automatically check whether the package configuration file
was found.
``FAIL_MESSAGE <custom-failure-message>``
Specify a custom failure message instead of using the default
generated message. Not recommended.
Example for the simple signature:
.. code-block:: cmake
find_package_handle_standard_args(LibXml2 DEFAULT_MSG
LIBXML2_LIBRARY LIBXML2_INCLUDE_DIR)
The ``LibXml2`` package is considered to be found if both
``LIBXML2_LIBRARY`` and ``LIBXML2_INCLUDE_DIR`` are valid.
Then also ``LibXml2_FOUND`` is set to ``TRUE``. If it is not found
and ``REQUIRED`` was used, it fails with a
:command:`message(FATAL_ERROR)`, independent whether ``QUIET`` was
used or not. If it is found, success will be reported, including
the content of the first ``<required-var>``. On repeated CMake runs,
the same message will not be printed again.
Example for the full signature:
.. code-block:: cmake
find_package_handle_standard_args(LibArchive
REQUIRED_VARS LibArchive_LIBRARY LibArchive_INCLUDE_DIR
VERSION_VAR LibArchive_VERSION)
In this case, the ``LibArchive`` package is considered to be found if
both ``LibArchive_LIBRARY`` and ``LibArchive_INCLUDE_DIR`` are valid.
Also the version of ``LibArchive`` will be checked by using the version
contained in ``LibArchive_VERSION``. Since no ``FAIL_MESSAGE`` is given,
the default messages will be printed.
Another example for the full signature:
.. code-block:: cmake
find_package(Automoc4 QUIET NO_MODULE HINTS /opt/automoc4)
find_package_handle_standard_args(Automoc4 CONFIG_MODE)
In this case, a ``FindAutmoc4.cmake`` module wraps a call to
``find_package(Automoc4 NO_MODULE)`` and adds an additional search
directory for ``automoc4``. Then the call to
``find_package_handle_standard_args`` produces a proper success/failure
message.
#]=======================================================================]
include(${CMAKE_CURRENT_LIST_DIR}/FindPackageMessage.cmake)
# internal helper macro
macro(_FPHSA_FAILURE_MESSAGE _msg)
if (${_NAME}_FIND_REQUIRED)
message(FATAL_ERROR "${_msg}")
else ()
if (NOT ${_NAME}_FIND_QUIETLY)
message(STATUS "${_msg}")
endif ()
endif ()
endmacro()
# internal helper macro to generate the failure message when used in CONFIG_MODE:
macro(_FPHSA_HANDLE_FAILURE_CONFIG_MODE)
# <name>_CONFIG is set, but FOUND is false, this means that some other of the REQUIRED_VARS was not found:
if(${_NAME}_CONFIG)
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: missing:${MISSING_VARS} (found ${${_NAME}_CONFIG} ${VERSION_MSG})")
else()
# If _CONSIDERED_CONFIGS is set, the config-file has been found, but no suitable version.
# List them all in the error message:
if(${_NAME}_CONSIDERED_CONFIGS)
set(configsText "")
list(LENGTH ${_NAME}_CONSIDERED_CONFIGS configsCount)
math(EXPR configsCount "${configsCount} - 1")
foreach(currentConfigIndex RANGE ${configsCount})
list(GET ${_NAME}_CONSIDERED_CONFIGS ${currentConfigIndex} filename)
list(GET ${_NAME}_CONSIDERED_VERSIONS ${currentConfigIndex} version)
string(APPEND configsText " ${filename} (version ${version})\n")
endforeach()
if (${_NAME}_NOT_FOUND_MESSAGE)
string(APPEND configsText " Reason given by package: ${${_NAME}_NOT_FOUND_MESSAGE}\n")
endif()
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} ${VERSION_MSG}, checked the following files:\n${configsText}")
else()
# Simple case: No Config-file was found at all:
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: found neither ${_NAME}Config.cmake nor ${_NAME_LOWER}-config.cmake ${VERSION_MSG}")
endif()
endif()
endmacro()
function(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FIRST_ARG)
# Set up the arguments for `cmake_parse_arguments`.
set(options CONFIG_MODE HANDLE_COMPONENTS)
set(oneValueArgs FAIL_MESSAGE VERSION_VAR FOUND_VAR)
set(multiValueArgs REQUIRED_VARS)
# Check whether we are in 'simple' or 'extended' mode:
set(_KEYWORDS_FOR_EXTENDED_MODE ${options} ${oneValueArgs} ${multiValueArgs} )
list(FIND _KEYWORDS_FOR_EXTENDED_MODE "${_FIRST_ARG}" INDEX)
if(${INDEX} EQUAL -1)
set(FPHSA_FAIL_MESSAGE ${_FIRST_ARG})
set(FPHSA_REQUIRED_VARS ${ARGN})
set(FPHSA_VERSION_VAR)
else()
cmake_parse_arguments(FPHSA "${options}" "${oneValueArgs}" "${multiValueArgs}" ${_FIRST_ARG} ${ARGN})
if(FPHSA_UNPARSED_ARGUMENTS)
message(FATAL_ERROR "Unknown keywords given to FIND_PACKAGE_HANDLE_STANDARD_ARGS(): \"${FPHSA_UNPARSED_ARGUMENTS}\"")
endif()
if(NOT FPHSA_FAIL_MESSAGE)
set(FPHSA_FAIL_MESSAGE "DEFAULT_MSG")
endif()
# In config-mode, we rely on the variable <package>_CONFIG, which is set by find_package()
# when it successfully found the config-file, including version checking:
if(FPHSA_CONFIG_MODE)
list(INSERT FPHSA_REQUIRED_VARS 0 ${_NAME}_CONFIG)
list(REMOVE_DUPLICATES FPHSA_REQUIRED_VARS)
set(FPHSA_VERSION_VAR ${_NAME}_VERSION)
endif()
if(NOT FPHSA_REQUIRED_VARS)
message(FATAL_ERROR "No REQUIRED_VARS specified for FIND_PACKAGE_HANDLE_STANDARD_ARGS()")
endif()
endif()
# now that we collected all arguments, process them
if("x${FPHSA_FAIL_MESSAGE}" STREQUAL "xDEFAULT_MSG")
set(FPHSA_FAIL_MESSAGE "Could NOT find ${_NAME}")
endif()
list(GET FPHSA_REQUIRED_VARS 0 _FIRST_REQUIRED_VAR)
string(TOUPPER ${_NAME} _NAME_UPPER)
string(TOLOWER ${_NAME} _NAME_LOWER)
if(FPHSA_FOUND_VAR)
if(FPHSA_FOUND_VAR MATCHES "^${_NAME}_FOUND$" OR FPHSA_FOUND_VAR MATCHES "^${_NAME_UPPER}_FOUND$")
set(_FOUND_VAR ${FPHSA_FOUND_VAR})
else()
message(FATAL_ERROR "The argument for FOUND_VAR is \"${FPHSA_FOUND_VAR}\", but only \"${_NAME}_FOUND\" and \"${_NAME_UPPER}_FOUND\" are valid names.")
endif()
else()
set(_FOUND_VAR ${_NAME_UPPER}_FOUND)
endif()
# collect all variables which were not found, so they can be printed, so the
# user knows better what went wrong (#6375)
set(MISSING_VARS "")
set(DETAILS "")
# check if all passed variables are valid
set(FPHSA_FOUND_${_NAME} TRUE)
foreach(_CURRENT_VAR ${FPHSA_REQUIRED_VARS})
if(NOT ${_CURRENT_VAR})
set(FPHSA_FOUND_${_NAME} FALSE)
string(APPEND MISSING_VARS " ${_CURRENT_VAR}")
else()
string(APPEND DETAILS "[${${_CURRENT_VAR}}]")
endif()
endforeach()
if(FPHSA_FOUND_${_NAME})
set(${_NAME}_FOUND TRUE)
set(${_NAME_UPPER}_FOUND TRUE)
else()
set(${_NAME}_FOUND FALSE)
set(${_NAME_UPPER}_FOUND FALSE)
endif()
# component handling
unset(FOUND_COMPONENTS_MSG)
unset(MISSING_COMPONENTS_MSG)
if(FPHSA_HANDLE_COMPONENTS)
foreach(comp ${${_NAME}_FIND_COMPONENTS})
if(${_NAME}_${comp}_FOUND)
if(NOT DEFINED FOUND_COMPONENTS_MSG)
set(FOUND_COMPONENTS_MSG "found components: ")
endif()
string(APPEND FOUND_COMPONENTS_MSG " ${comp}")
else()
if(NOT DEFINED MISSING_COMPONENTS_MSG)
set(MISSING_COMPONENTS_MSG "missing components: ")
endif()
string(APPEND MISSING_COMPONENTS_MSG " ${comp}")
if(${_NAME}_FIND_REQUIRED_${comp})
set(${_NAME}_FOUND FALSE)
string(APPEND MISSING_VARS " ${comp}")
endif()
endif()
endforeach()
set(COMPONENT_MSG "${FOUND_COMPONENTS_MSG} ${MISSING_COMPONENTS_MSG}")
string(APPEND DETAILS "[c${COMPONENT_MSG}]")
endif()
# version handling:
set(VERSION_MSG "")
set(VERSION_OK TRUE)
# check with DEFINED here as the requested or found version may be "0"
if (DEFINED ${_NAME}_FIND_VERSION)
if(DEFINED ${FPHSA_VERSION_VAR})
set(_FOUND_VERSION ${${FPHSA_VERSION_VAR}})
if(${_NAME}_FIND_VERSION_EXACT) # exact version required
# count the dots in the version string
string(REGEX REPLACE "[^.]" "" _VERSION_DOTS "${_FOUND_VERSION}")
# add one dot because there is one dot more than there are components
string(LENGTH "${_VERSION_DOTS}." _VERSION_DOTS)
if (_VERSION_DOTS GREATER ${_NAME}_FIND_VERSION_COUNT)
# Because of the C++ implementation of find_package() ${_NAME}_FIND_VERSION_COUNT
# is at most 4 here. Therefore a simple lookup table is used.
if (${_NAME}_FIND_VERSION_COUNT EQUAL 1)
set(_VERSION_REGEX "[^.]*")
elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 2)
set(_VERSION_REGEX "[^.]*\\.[^.]*")
elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 3)
set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*")
else ()
set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*\\.[^.]*")
endif ()
string(REGEX REPLACE "^(${_VERSION_REGEX})\\..*" "\\1" _VERSION_HEAD "${_FOUND_VERSION}")
unset(_VERSION_REGEX)
if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _VERSION_HEAD)
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"")
set(VERSION_OK FALSE)
else ()
set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")")
endif ()
unset(_VERSION_HEAD)
else ()
if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _FOUND_VERSION)
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"")
set(VERSION_OK FALSE)
else ()
set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")")
endif ()
endif ()
unset(_VERSION_DOTS)
else() # minimum version specified:
if (${_NAME}_FIND_VERSION VERSION_GREATER _FOUND_VERSION)
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is at least \"${${_NAME}_FIND_VERSION}\"")
set(VERSION_OK FALSE)
else ()
set(VERSION_MSG "(found suitable version \"${_FOUND_VERSION}\", minimum required is \"${${_NAME}_FIND_VERSION}\")")
endif ()
endif()
else()
# if the package was not found, but a version was given, add that to the output:
if(${_NAME}_FIND_VERSION_EXACT)
set(VERSION_MSG "(Required is exact version \"${${_NAME}_FIND_VERSION}\")")
else()
set(VERSION_MSG "(Required is at least version \"${${_NAME}_FIND_VERSION}\")")
endif()
endif()
else ()
# Check with DEFINED as the found version may be 0.
if(DEFINED ${FPHSA_VERSION_VAR})
set(VERSION_MSG "(found version \"${${FPHSA_VERSION_VAR}}\")")
endif()
endif ()
if(VERSION_OK)
string(APPEND DETAILS "[v${${FPHSA_VERSION_VAR}}(${${_NAME}_FIND_VERSION})]")
else()
set(${_NAME}_FOUND FALSE)
endif()
# print the result:
if (${_NAME}_FOUND)
FIND_PACKAGE_MESSAGE(${_NAME} "Found ${_NAME}: ${${_FIRST_REQUIRED_VAR}} ${VERSION_MSG} ${COMPONENT_MSG}" "${DETAILS}")
else ()
if(FPHSA_CONFIG_MODE)
_FPHSA_HANDLE_FAILURE_CONFIG_MODE()
else()
if(NOT VERSION_OK)
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: ${VERSION_MSG} (found ${${_FIRST_REQUIRED_VAR}})")
else()
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} (missing:${MISSING_VARS}) ${VERSION_MSG}")
endif()
endif()
endif ()
set(${_NAME}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)
set(${_NAME_UPPER}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)
endfunction()

View File

@ -0,0 +1,47 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
# file Copyright.txt or https://cmake.org/licensing for details.
#.rst:
# FindPackageMessage
# ------------------
#
#
#
# FIND_PACKAGE_MESSAGE(<name> "message for user" "find result details")
#
# This macro is intended to be used in FindXXX.cmake modules files. It
# will print a message once for each unique find result. This is useful
# for telling the user where a package was found. The first argument
# specifies the name (XXX) of the package. The second argument
# specifies the message to display. The third argument lists details
# about the find result so that if they change the message will be
# displayed again. The macro also obeys the QUIET argument to the
# find_package command.
#
# Example:
#
# ::
#
# if(X11_FOUND)
# FIND_PACKAGE_MESSAGE(X11 "Found X11: ${X11_X11_LIB}"
# "[${X11_X11_LIB}][${X11_INCLUDE_DIR}]")
# else()
# ...
# endif()
function(FIND_PACKAGE_MESSAGE pkg msg details)
# Avoid printing a message repeatedly for the same find result.
if(NOT ${pkg}_FIND_QUIETLY)
string(REPLACE "\n" "" details "${details}")
set(DETAILS_VAR FIND_PACKAGE_MESSAGE_DETAILS_${pkg})
if(NOT "${details}" STREQUAL "${${DETAILS_VAR}}")
# The message has not yet been printed.
message(STATUS "${msg}")
# Save the find details in the cache to avoid printing the same
# message again.
set("${DETAILS_VAR}" "${details}"
CACHE INTERNAL "Details about finding ${pkg}")
endif()
endif()
endfunction()

View File

@ -0,0 +1,5 @@
If you need to update files under this folder, we recommend you issue PRs
against [the CMake mainline branch](https://github.com/Kitware/CMake/blob/master/Modules/FindCUDA.cmake),
and then backport it here for earlier CMake compatibility.
See [this](../README.md) for more details.

View File

@ -76,7 +76,7 @@ function(caffe2_print_configuration_summary)
message(STATUS " USE_CUSPARSELT : ${USE_CUSPARSELT}")
message(STATUS " USE_CUDSS : ${USE_CUDSS}")
message(STATUS " USE_CUFILE : ${USE_CUFILE}")
message(STATUS " CUDA version : ${CUDAToolkit_VERSION}")
message(STATUS " CUDA version : ${CUDA_VERSION}")
message(STATUS " USE_FLASH_ATTENTION : ${USE_FLASH_ATTENTION}")
message(STATUS " USE_MEM_EFF_ATTENTION : ${USE_MEM_EFF_ATTENTION}")
if(${USE_CUDNN})
@ -107,8 +107,9 @@ function(caffe2_print_configuration_summary)
get_target_property(__tmp torch::cudss INTERFACE_LINK_LIBRARIES)
message(STATUS " cuDSS library : ${__tmp}")
endif()
message(STATUS " CUDA include path : ${CUDAToolkit_INCLUDE_DIRS}")
message(STATUS " NVCC executable : ${CUDAToolkit_NVCC_EXECUTABLE}")
message(STATUS " nvrtc : ${CUDA_nvrtc_LIBRARY}")
message(STATUS " CUDA include path : ${CUDA_INCLUDE_DIRS}")
message(STATUS " NVCC executable : ${CUDA_NVCC_EXECUTABLE}")
message(STATUS " CUDA compiler : ${CMAKE_CUDA_COMPILER}")
message(STATUS " CUDA flags : ${CMAKE_CUDA_FLAGS}")
message(STATUS " CUDA host compiler : ${CMAKE_CUDA_HOST_COMPILER}")

View File

@ -130,7 +130,7 @@ if(@USE_CUDA@)
find_library(CAFFE2_NVRTC_LIBRARY caffe2_nvrtc PATHS "${TORCH_INSTALL_PREFIX}/lib")
list(APPEND TORCH_CUDA_LIBRARIES ${CAFFE2_NVRTC_LIBRARY})
else()
list(APPEND TORCH_CUDA_LIBRARIES torch::nvrtc)
set(TORCH_CUDA_LIBRARIES ${CUDA_NVRTC_LIB})
endif()
if(@BUILD_SHARED_LIBS@)

View File

@ -1,6 +1,13 @@
# ---[ cuda
include_guard(GLOBAL)
# Poor man's include guard
if(TARGET torch::cudart)
return()
endif()
# sccache is only supported in CMake master and not in the newest official
# release (3.11.3) yet. Hence we need our own Modules_CUDA_fix to enable sccache.
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/../Modules_CUDA_fix)
# We don't want to statically link cudart, because we rely on it's dynamic linkage in
# python (follow along torch/cuda/__init__.py and usage of cudaGetErrorName).
@ -18,16 +25,9 @@ if(NOT MSVC)
set(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE INTERNAL "")
endif()
# Enable CUDA language support
if(CUDA_TOOLKIT_ROOT_DIR AND NOT CUDAToolkit_ROOT)
set(CUDAToolkit_ROOT "${CUDA_TOOLKIT_ROOT_DIR}")
endif()
# CMP0074 - find_package will respect <PackageName>_ROOT variables
cmake_policy(SET CMP0074 NEW)
find_package(CUDAToolkit)
if(NOT CUDAToolkit_FOUND)
# Find CUDA.
find_package(CUDA)
if(NOT CUDA_FOUND)
message(WARNING
"PyTorch: CUDA cannot be found. Depending on whether you are building "
"PyTorch or a PyTorch dependent library, the next warning / error will "
@ -36,10 +36,8 @@ if(NOT CUDAToolkit_FOUND)
return()
endif()
if(CUDAToolkit_VERSION VERSION_LESS 11.0)
message(FATAL_ERROR "PyTorch requires CUDA 11.0 or above.")
endif()
# Enable CUDA language support
set(CUDAToolkit_ROOT "${CUDA_TOOLKIT_ROOT_DIR}")
# Pass clang as host compiler, which according to the docs
# Must be done before CUDA language is enabled, see
# https://cmake.org/cmake/help/v3.15/variable/CMAKE_CUDA_HOST_COMPILER.html
@ -52,38 +50,140 @@ if("X${CMAKE_CUDA_STANDARD}" STREQUAL "X" )
endif()
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
message(STATUS "PyTorch: CUDA detected: " ${CUDAToolkit_VERSION})
message(STATUS "PyTorch: CUDA nvcc is: " ${CUDAToolkit_NVCC_EXECUTABLE})
message(STATUS "PyTorch: CUDA toolkit directory: " ${CUDAToolkit_ROOT})
# CMP0074 - find_package will respect <PackageName>_ROOT variables
cmake_policy(PUSH)
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.12.0)
cmake_policy(SET CMP0074 NEW)
endif()
# cuda_select_nvcc_arch_flags is required
cmake_policy(SET CMP0146 OLD)
find_package(CUDA)
find_package(CUDAToolkit REQUIRED)
cmake_policy(POP)
if(NOT CMAKE_CUDA_COMPILER_VERSION VERSION_EQUAL CUDAToolkit_VERSION)
message(FATAL_ERROR "Found two conflicting CUDA versions:\n"
"V${CMAKE_CUDA_COMPILER_VERSION} in '${CUDA_INCLUDE_DIRS}' and\n"
"V${CUDAToolkit_VERSION} in '${CUDAToolkit_INCLUDE_DIRS}'")
endif()
message(STATUS "PyTorch: CUDA detected: " ${CUDA_VERSION})
message(STATUS "PyTorch: CUDA nvcc is: " ${CUDA_NVCC_EXECUTABLE})
message(STATUS "PyTorch: CUDA toolkit directory: " ${CUDA_TOOLKIT_ROOT_DIR})
if(CUDA_VERSION VERSION_LESS 11.0)
message(FATAL_ERROR "PyTorch requires CUDA 11.0 or above.")
endif()
if(CUDA_FOUND)
# Sometimes, we may mismatch nvcc with the CUDA headers we are
# compiling with, e.g., if a ccache nvcc is fed to us by CUDA_NVCC_EXECUTABLE
# but the PATH is not consistent with CUDA_HOME. It's better safe
# than sorry: make sure everything is consistent.
if(MSVC AND CMAKE_GENERATOR MATCHES "Visual Studio")
# When using Visual Studio, it attempts to lock the whole binary dir when
# `try_run` is called, which will cause the build to fail.
string(RANDOM BUILD_SUFFIX)
set(PROJECT_RANDOM_BINARY_DIR "${PROJECT_BINARY_DIR}/${BUILD_SUFFIX}")
else()
set(PROJECT_RANDOM_BINARY_DIR "${PROJECT_BINARY_DIR}")
endif()
set(file "${PROJECT_BINARY_DIR}/detect_cuda_version.cc")
file(WRITE ${file} ""
"#include <cuda.h>\n"
"#include <cstdio>\n"
"int main() {\n"
" printf(\"%d.%d\", CUDA_VERSION / 1000, (CUDA_VERSION / 10) % 100);\n"
" return 0;\n"
"}\n"
)
if(NOT CMAKE_CROSSCOMPILING)
try_run(run_result compile_result ${PROJECT_RANDOM_BINARY_DIR} ${file}
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CUDA_INCLUDE_DIRS}"
LINK_LIBRARIES ${CUDA_LIBRARIES}
RUN_OUTPUT_VARIABLE cuda_version_from_header
COMPILE_OUTPUT_VARIABLE output_var
)
if(NOT compile_result)
message(FATAL_ERROR "PyTorch: Couldn't determine version from header: " ${output_var})
endif()
message(STATUS "PyTorch: Header version is: " ${cuda_version_from_header})
if(NOT cuda_version_from_header STREQUAL ${CUDA_VERSION_STRING})
# Force CUDA to be processed for again next time
# TODO: I'm not sure if this counts as an implementation detail of
# FindCUDA
set(${cuda_version_from_findcuda} ${CUDA_VERSION_STRING})
unset(CUDA_TOOLKIT_ROOT_DIR_INTERNAL CACHE)
# Not strictly necessary, but for good luck.
unset(CUDA_VERSION CACHE)
# Error out
message(FATAL_ERROR "FindCUDA says CUDA version is ${cuda_version_from_findcuda} (usually determined by nvcc), "
"but the CUDA headers say the version is ${cuda_version_from_header}. This often occurs "
"when you set both CUDA_HOME and CUDA_NVCC_EXECUTABLE to "
"non-standard locations, without also setting PATH to point to the correct nvcc. "
"Perhaps, try re-running this command again with PATH=${CUDA_TOOLKIT_ROOT_DIR}/bin:$PATH. "
"See above log messages for more diagnostics, and see https://github.com/pytorch/pytorch/issues/8092 for more details.")
endif()
endif()
endif()
# ---[ CUDA libraries wrapper
# find lbnvrtc.so
set(CUDA_NVRTC_LIB "${CUDA_nvrtc_LIBRARY}" CACHE FILEPATH "")
if(CUDA_NVRTC_LIB AND NOT CUDA_NVRTC_SHORTHASH)
find_package(Python COMPONENTS Interpreter)
execute_process(
COMMAND Python::Interpreter -c
"import hashlib;hash=hashlib.sha256();hash.update(open('${CUDA_NVRTC_LIB}','rb').read());print(hash.hexdigest()[:8])"
RESULT_VARIABLE _retval
OUTPUT_VARIABLE CUDA_NVRTC_SHORTHASH)
if(NOT _retval EQUAL 0)
message(WARNING "Failed to compute shorthash for libnvrtc.so")
set(CUDA_NVRTC_SHORTHASH "XXXXXXXX")
else()
string(STRIP "${CUDA_NVRTC_SHORTHASH}" CUDA_NVRTC_SHORTHASH)
message(STATUS "${CUDA_NVRTC_LIB} shorthash is ${CUDA_NVRTC_SHORTHASH}")
endif()
endif()
# Create new style imported libraries.
# Several of these libraries have a hardcoded path if CAFFE2_STATIC_LINK_CUDA
# is set. This path is where sane CUDA installations have their static
# libraries installed. This flag should only be used for binary builds, so
# end-users should never have this flag set.
# cuda
add_library(caffe2::cuda INTERFACE IMPORTED)
set_property(
TARGET caffe2::cuda PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cuda_driver)
# cudart
add_library(torch::cudart INTERFACE IMPORTED)
if(CAFFE2_STATIC_LINK_CUDA)
target_link_libraries(torch::cudart INTERFACE CUDA::cudart_static)
set_property(
TARGET torch::cudart PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cudart_static)
else()
target_link_libraries(torch::cudart INTERFACE CUDA::cudart)
set_property(
TARGET torch::cudart PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cudart)
endif()
# cublas
add_library(torch::cublas INTERFACE IMPORTED)
# NOTE: cublas is always linked dynamically
add_library(caffe2::cublas INTERFACE IMPORTED)
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
target_link_libraries(torch::cublas INTERFACE CUDA::cublas CUDA::cublasLt CUDA::cudart_static)
set_property(
TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES
# NOTE: cublas is always linked dynamically
CUDA::cublas CUDA::cublasLt)
set_property(
TARGET caffe2::cublas APPEND PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cudart_static rt)
else()
target_link_libraries(torch::cublas INTERFACE CUDA::cublas CUDA::cublasLt)
set_property(
TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cublas CUDA::cublasLt)
endif()
# cudnn interface
@ -155,41 +255,47 @@ endif()
if(CAFFE2_USE_CUFILE)
add_library(torch::cufile INTERFACE IMPORTED)
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
target_link_libraries(torch::cufile INTERFACE CUDA::cuFile_static CUDA::culibos)
set_property(
TARGET torch::cufile PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cuFile_static)
else()
target_link_libraries(torch::cufile INTERFACE CUDA::cuFile CUDA::culibos)
set_property(
TARGET torch::cufile PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cuFile)
endif()
else()
message(STATUS "USE_CUFILE is set to 0. Compiling without cuFile support")
endif()
# curand
add_library(torch::curand INTERFACE IMPORTED)
add_library(caffe2::curand INTERFACE IMPORTED)
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
target_link_libraries(torch::curand INTERFACE CUDA::curand_static)
set_property(
TARGET caffe2::curand PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::curand_static)
else()
target_link_libraries(torch::curand INTERFACE CUDA::curand)
set_property(
TARGET caffe2::curand PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::curand)
endif()
# cufft
add_library(torch::cufft INTERFACE IMPORTED)
add_library(caffe2::cufft INTERFACE IMPORTED)
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
target_link_libraries(torch::cufft INTERFACE CUDA::cufft_static_nocallback)
set_property(
TARGET caffe2::cufft PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cufft_static_nocallback)
else()
target_link_libraries(torch::cufft INTERFACE CUDA::cufft)
set_property(
TARGET caffe2::cufft PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cufft)
endif()
# nvrtc
add_library(torch::nvrtc INTERFACE IMPORTED)
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
add_library(caffe2::nvrtc INTERFACE IMPORTED)
set_property(
TARGET torch::nvrtc PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::nvrtc_static CUDA::cuda_driver)
else()
set_property(
TARGET torch::nvrtc PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::nvrtc CUDA::cuda_driver)
endif()
TARGET caffe2::nvrtc PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::nvrtc caffe2::cuda)
# Add onnx namespace definition to nvcc
if(ONNX_NAMESPACE)

View File

@ -317,138 +317,6 @@ macro(torch_xpu_get_arch_list store_var)
endif()
endmacro()
################################################################################################
# Function for selecting GPU arch flags for nvcc based on CUDA architectures from parameter list
# Usage:
# SELECT_NVCC_ARCH_FLAGS(out_variable [list of CUDA compute archs])
function(torch_cuda_select_nvcc_arch_flags out_variable)
set(CUDA_ARCH_LIST "${ARGN}")
if("X${CUDA_ARCH_LIST}" STREQUAL "X" )
set(CUDA_ARCH_LIST "Auto")
endif()
set(cuda_arch_bin)
set(cuda_arch_ptx)
if("${CUDA_ARCH_LIST}" STREQUAL "All")
set(CUDA_ARCH_LIST ${CUDA_KNOWN_GPU_ARCHITECTURES})
elseif("${CUDA_ARCH_LIST}" STREQUAL "Common")
set(CUDA_ARCH_LIST ${CUDA_COMMON_GPU_ARCHITECTURES})
elseif("${CUDA_ARCH_LIST}" STREQUAL "Auto")
CUDA_DETECT_INSTALLED_GPUS(CUDA_ARCH_LIST)
message(STATUS "Autodetected CUDA architecture(s): ${CUDA_ARCH_LIST}")
endif()
# Now process the list and look for names
string(REGEX REPLACE "[ \t]+" ";" CUDA_ARCH_LIST "${CUDA_ARCH_LIST}")
list(REMOVE_DUPLICATES CUDA_ARCH_LIST)
foreach(arch_name ${CUDA_ARCH_LIST})
set(arch_bin)
set(arch_ptx)
set(add_ptx FALSE)
# Check to see if we are compiling PTX
if(arch_name MATCHES "(.*)\\+PTX$")
set(add_ptx TRUE)
set(arch_name ${CMAKE_MATCH_1})
endif()
if(arch_name MATCHES "^([0-9]+\\.[0-9]a?(\\([0-9]+\\.[0-9]\\))?)$")
set(arch_bin ${CMAKE_MATCH_1})
set(arch_ptx ${arch_bin})
else()
# Look for it in our list of known architectures
if(${arch_name} STREQUAL "Kepler+Tesla")
set(arch_bin 3.7)
elseif(${arch_name} STREQUAL "Kepler")
set(arch_bin 3.5)
set(arch_ptx 3.5)
elseif(${arch_name} STREQUAL "Maxwell+Tegra")
set(arch_bin 5.3)
elseif(${arch_name} STREQUAL "Maxwell")
set(arch_bin 5.0 5.2)
set(arch_ptx 5.2)
elseif(${arch_name} STREQUAL "Pascal")
set(arch_bin 6.0 6.1)
set(arch_ptx 6.1)
elseif(${arch_name} STREQUAL "Volta+Tegra")
set(arch_bin 7.2)
elseif(${arch_name} STREQUAL "Volta")
set(arch_bin 7.0 7.0)
set(arch_ptx 7.0)
elseif(${arch_name} STREQUAL "Turing")
set(arch_bin 7.5)
set(arch_ptx 7.5)
elseif(${arch_name} STREQUAL "Ampere+Tegra")
set(arch_bin 8.7)
elseif(${arch_name} STREQUAL "Ampere")
set(arch_bin 8.0 8.6)
set(arch_ptx 8.0 8.6)
elseif(${arch_name} STREQUAL "Ada")
set(arch_bin 8.9)
set(arch_ptx 8.9)
elseif(${arch_name} STREQUAL "Hopper")
set(arch_bin 9.0)
set(arch_ptx 9.0)
elseif(${arch_name} STREQUAL "Blackwell+Tegra")
set(arch_bin 10.1)
elseif(${arch_name} STREQUAL "Blackwell")
set(arch_bin 10.0 12.0)
set(arch_ptx 10.0 12.0)
else()
message(SEND_ERROR "Found Unknown CUDA Architecture Name in CUDA_SELECT_NVCC_ARCH_FLAGS: ${arch_name} ")
endif()
endif()
if(NOT arch_bin)
message(SEND_ERROR "arch_bin wasn't set for some reason")
endif()
list(APPEND cuda_arch_bin ${arch_bin})
if(add_ptx)
if(NOT arch_ptx)
set(arch_ptx ${arch_bin})
endif()
list(APPEND cuda_arch_ptx ${arch_ptx})
endif()
endforeach()
# remove dots and convert to lists
string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}")
string(REGEX REPLACE "\\." "" cuda_arch_ptx "${cuda_arch_ptx}")
string(REGEX MATCHALL "[0-9()]+a?" cuda_arch_bin "${cuda_arch_bin}")
string(REGEX MATCHALL "[0-9]+a?" cuda_arch_ptx "${cuda_arch_ptx}")
if(cuda_arch_bin)
list(REMOVE_DUPLICATES cuda_arch_bin)
endif()
if(cuda_arch_ptx)
list(REMOVE_DUPLICATES cuda_arch_ptx)
endif()
set(nvcc_flags "")
set(nvcc_archs_readable "")
# Tell NVCC to add binaries for the specified GPUs
foreach(arch ${cuda_arch_bin})
if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
# User explicitly specified ARCH for the concrete CODE
list(APPEND nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
list(APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1})
else()
# User didn't explicitly specify ARCH for the concrete CODE, we assume ARCH=CODE
list(APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch})
list(APPEND nvcc_archs_readable sm_${arch})
endif()
endforeach()
# Tell NVCC to add PTX intermediate code for the specified architectures
foreach(arch ${cuda_arch_ptx})
list(APPEND nvcc_flags -gencode arch=compute_${arch},code=compute_${arch})
list(APPEND nvcc_archs_readable compute_${arch})
endforeach()
string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}")
set(${out_variable} ${nvcc_flags} PARENT_SCOPE)
set(${out_variable}_readable ${nvcc_archs_readable} PARENT_SCOPE)
endfunction()
##############################################################################
# Get the NVCC arch flags specified by TORCH_CUDA_ARCH_LIST and CUDA_ARCH_NAME.
# Usage:
@ -468,7 +336,8 @@ macro(torch_cuda_get_nvcc_gencode_flag store_var)
set(TORCH_CUDA_ARCH_LIST TORCH_CUDA_ARCH_LIST ${CUDA_ARCH_NAME})
endif()
torch_cuda_select_nvcc_arch_flags(${store_var} ${TORCH_CUDA_ARCH_LIST})
# Invoke cuda_select_nvcc_arch_flags from proper cmake FindCUDA.
cuda_select_nvcc_arch_flags(${store_var} ${TORCH_CUDA_ARCH_LIST})
endmacro()

View File

@ -2417,6 +2417,7 @@ def _get_cuda_arch_flags(cflags: Optional[list[str]] = None) -> list[str]:
# The default is sm_30 for CUDA 9.x and 10.x
# First check for an env var (same as used by the main setup.py)
# Can be one or more architectures, e.g. "6.1" or "3.5;5.2;6.0;6.1;7.0+PTX"
# See cmake/Modules_CUDA_fix/upstream/FindCUDA/select_compute_arch.cmake
_arch_list = os.environ.get('TORCH_CUDA_ARCH_LIST', None)
# If not given, determine what's best for the GPU / CUDA version that can be found