mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Revert "Use official CUDAToolkit module in CMake (#154595)"
This reverts commit 08dae945ae380d80efbaf140a95abfc5d96e5100. Reverted https://github.com/pytorch/pytorch/pull/154595 on behalf of https://github.com/malfet due to It breaks on some local setup with no clear diagnostic, but looks like it fails to find cuFile ([comment](https://github.com/pytorch/pytorch/pull/154595#issuecomment-2997959344))
This commit is contained in:
@ -79,7 +79,6 @@ def package_cuda_wheel(wheel_path, desired_cuda) -> None:
|
||||
os.system(f"unzip {wheel_path} -d {folder}/tmp")
|
||||
libs_to_copy = [
|
||||
"/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12",
|
||||
"/usr/local/cuda/extras/CUPTI/lib64/libnvperf_host.so",
|
||||
"/usr/local/cuda/lib64/libcudnn.so.9",
|
||||
"/usr/local/cuda/lib64/libcublas.so.12",
|
||||
"/usr/local/cuda/lib64/libcublasLt.so.12",
|
||||
|
@ -131,8 +131,6 @@ if [[ $CUDA_VERSION == 12* ]]; then
|
||||
"/usr/local/cuda/lib64/libnvrtc-builtins.so"
|
||||
"/usr/local/cuda/lib64/libcufile.so.0"
|
||||
"/usr/local/cuda/lib64/libcufile_rdma.so.1"
|
||||
"/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12"
|
||||
"/usr/local/cuda/extras/CUPTI/lib64/libnvperf_host.so"
|
||||
)
|
||||
DEPS_SONAME+=(
|
||||
"libcudnn_adv.so.9"
|
||||
@ -151,8 +149,6 @@ if [[ $CUDA_VERSION == 12* ]]; then
|
||||
"libnvrtc-builtins.so"
|
||||
"libcufile.so.0"
|
||||
"libcufile_rdma.so.1"
|
||||
"libcupti.so.12"
|
||||
"libnvperf_host.so"
|
||||
)
|
||||
else
|
||||
echo "Using nvidia libs from pypi."
|
||||
|
@ -8,7 +8,6 @@ copy "%CUDA_PATH%\bin\cusolver*64_*.dll*" pytorch\torch\lib
|
||||
copy "%CUDA_PATH%\bin\cudnn*64_*.dll*" pytorch\torch\lib
|
||||
copy "%CUDA_PATH%\bin\nvrtc*64_*.dll*" pytorch\torch\lib
|
||||
copy "%CUDA_PATH%\extras\CUPTI\lib64\cupti64_*.dll*" pytorch\torch\lib
|
||||
copy "%CUDA_PATH%\extras\CUPTI\lib64\nvperf_host*.dll*" pytorch\torch\lib
|
||||
|
||||
copy "%PYTHON_LIB_PATH%\libiomp*5md.dll" pytorch\torch\lib
|
||||
|
||||
|
@ -1302,7 +1302,11 @@ if(BUILD_SHARED_LIBS)
|
||||
DESTINATION share/cmake/Caffe2/public
|
||||
COMPONENT dev)
|
||||
install(
|
||||
FILES ${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUDNN.cmake
|
||||
DIRECTORY ${PROJECT_SOURCE_DIR}/cmake/Modules_CUDA_fix
|
||||
DESTINATION share/cmake/Caffe2/
|
||||
COMPONENT dev)
|
||||
install(
|
||||
FILES ${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUDAToolkit.cmake
|
||||
DESTINATION share/cmake/Caffe2/
|
||||
COMPONENT dev)
|
||||
install(
|
||||
|
@ -10,7 +10,8 @@ endif()
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../cmake/Modules
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../cmake/public)
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../cmake/public
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../cmake/Modules_CUDA_fix)
|
||||
|
||||
cmake_policy(SET CMP0012 NEW)
|
||||
|
||||
|
@ -561,7 +561,7 @@ if(USE_CUDA)
|
||||
set(DELAY_LOAD_FLAGS "")
|
||||
endif()
|
||||
|
||||
target_link_libraries(caffe2_nvrtc PRIVATE torch::nvrtc ${DELAY_LOAD_FLAGS})
|
||||
target_link_libraries(caffe2_nvrtc PRIVATE caffe2::nvrtc ${DELAY_LOAD_FLAGS})
|
||||
install(TARGETS caffe2_nvrtc DESTINATION "${TORCH_INSTALL_LIB_DIR}")
|
||||
if(USE_NCCL)
|
||||
list(APPEND Caffe2_GPU_SRCS
|
||||
@ -1076,9 +1076,26 @@ elseif(USE_CUDA)
|
||||
torch_cuda
|
||||
)
|
||||
if($ENV{ATEN_STATIC_CUDA})
|
||||
target_link_libraries(torch_cuda_linalg PRIVATE CUDA::cusolver_static)
|
||||
if(CUDA_VERSION_MAJOR LESS_EQUAL 11)
|
||||
target_link_libraries(torch_cuda_linalg PRIVATE
|
||||
CUDA::cusolver_static
|
||||
${CUDAToolkit_LIBRARY_DIR}/liblapack_static.a # needed for libcusolver_static
|
||||
)
|
||||
elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 12)
|
||||
target_link_libraries(torch_cuda_linalg PRIVATE
|
||||
CUDA::cusolver_static
|
||||
${CUDAToolkit_LIBRARY_DIR}/libcusolver_lapack_static.a # needed for libcusolver_static
|
||||
)
|
||||
endif()
|
||||
else()
|
||||
target_link_libraries(torch_cuda_linalg PRIVATE CUDA::cusolver)
|
||||
target_link_libraries(torch_cuda_linalg PRIVATE
|
||||
CUDA::cusolver
|
||||
)
|
||||
endif()
|
||||
# NS: TODO, is this really necessary?
|
||||
if(USE_MAGMA AND CAFFE2_STATIC_LINK_CUDA)
|
||||
target_link_libraries(torch_cuda_linalg PRIVATE
|
||||
CUDA::culibos ${CMAKE_DL_LIBS})
|
||||
endif()
|
||||
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/cuda/LinearAlgebraStubs.cpp PROPERTIES COMPILE_FLAGS "-DBUILD_LAZY_CUDA_LINALG")
|
||||
install(TARGETS torch_cuda_linalg DESTINATION "${TORCH_INSTALL_LIB_DIR}")
|
||||
|
@ -50,7 +50,7 @@ if(USE_CUDA)
|
||||
if(NOT CAFFE2_USE_NVRTC)
|
||||
caffe2_update_option(USE_NVRTC OFF)
|
||||
endif()
|
||||
list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS torch::curand torch::cufft torch::cublas)
|
||||
list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS caffe2::curand caffe2::cufft caffe2::cublas)
|
||||
if(CAFFE2_USE_CUDNN)
|
||||
list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS torch::cudnn)
|
||||
else()
|
||||
|
1081
cmake/Modules/FindCUDAToolkit.cmake
Normal file
1081
cmake/Modules/FindCUDAToolkit.cmake
Normal file
File diff suppressed because it is too large
Load Diff
11
cmake/Modules_CUDA_fix/FindCUDA.cmake
Normal file
11
cmake/Modules_CUDA_fix/FindCUDA.cmake
Normal file
@ -0,0 +1,11 @@
|
||||
# This is a wrapper of the upstream `./upstream/FindCUDA.cmake` that
|
||||
# automatically includes `./upstream/CMakeInitializeConfigs.cmake` before
|
||||
# `./upstream/FindCUDA.cmake`. The `CMakeInitializeConfigs.cmake`, which is
|
||||
# absent in old CMake versions, creates some necessary variables for the later
|
||||
# to run.
|
||||
# See ./README.md for details.
|
||||
|
||||
set(UPSTREAM_FIND_CUDA_DIR "${CMAKE_CURRENT_LIST_DIR}/upstream/")
|
||||
|
||||
include("${UPSTREAM_FIND_CUDA_DIR}/CMakeInitializeConfigs.cmake")
|
||||
include("${UPSTREAM_FIND_CUDA_DIR}/FindCUDA.cmake")
|
27
cmake/Modules_CUDA_fix/README.md
Normal file
27
cmake/Modules_CUDA_fix/README.md
Normal file
@ -0,0 +1,27 @@
|
||||
This `./upstream` subfolder contains fixes for `FindCUDA` that are introduced in
|
||||
later versions of cmake but cause generator expression errors in earlier CMake
|
||||
versions. Specifically:
|
||||
|
||||
1. a problem where a generator expression for include directories was
|
||||
passed to NVCC, where the generator expression itself was prefixed by `-I`.
|
||||
As the NNPACK include directory generator expression expands to multiple
|
||||
directories, the second and later ones were not prefixed by `-I`, causing
|
||||
NVCC to return an error. First fixed in CMake 3.7 (see
|
||||
[Kitware/CMake@7ded655f](https://github.com/Kitware/CMake/commit/7ded655f)).
|
||||
|
||||
2. Windows VS2017 fixes that allows one to define the ccbin path
|
||||
differently between earlier versions of Visual Studio and VS2017. First
|
||||
introduced after 3.10.1 master version (see
|
||||
[Kitware/CMake@bc88329e](https://github.com/Kitware/CMake/commit/bc88329e)).
|
||||
|
||||
The downside of using these fixes is that `./upstream/CMakeInitializeConfigs.cmake`,
|
||||
defining some new CMake variables (added in
|
||||
[Kitware/CMake@48f7e2d3](https://github.com/Kitware/CMake/commit/48f7e2d3)),
|
||||
must be included before `./upstream/FindCUDA.cmake` to support older CMake
|
||||
versions. A wrapper `./FindCUDA.cmake` is created to do this automatically, and
|
||||
to allow submodules to use these fixes because we can't patch their
|
||||
`CMakeList.txt`.
|
||||
|
||||
If you need to update files under `./upstream` folder, we recommend you issue PRs
|
||||
against [the CMake mainline branch](https://github.com/Kitware/CMake/blob/master/Modules/FindCUDA.cmake),
|
||||
and then backport it here for earlier CMake compatibility.
|
40
cmake/Modules_CUDA_fix/upstream/CMakeInitializeConfigs.cmake
Normal file
40
cmake/Modules_CUDA_fix/upstream/CMakeInitializeConfigs.cmake
Normal file
@ -0,0 +1,40 @@
|
||||
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
|
||||
# file Copyright.txt or https://cmake.org/licensing for details.
|
||||
|
||||
# Present in upstream, but not supported on versions of cmake we need to support
|
||||
# include_guard(GLOBAL)
|
||||
|
||||
# Initializes `<_PREFIX>_<CONFIG>` variables from the corresponding
|
||||
# `<_PREFIX>_<CONFIG>_INIT`, for the configurations currently used.
|
||||
function(cmake_initialize_per_config_variable _PREFIX _DOCSTRING)
|
||||
string(STRIP "${${_PREFIX}_INIT}" _INIT)
|
||||
set("${_PREFIX}" "${_INIT}"
|
||||
CACHE STRING "${_DOCSTRING} during all build types.")
|
||||
mark_as_advanced("${_PREFIX}")
|
||||
|
||||
if (NOT CMAKE_NOT_USING_CONFIG_FLAGS)
|
||||
set(_CONFIGS Debug Release MinSizeRel RelWithDebInfo)
|
||||
|
||||
get_property(_GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
|
||||
if (_GENERATOR_IS_MULTI_CONFIG)
|
||||
list(APPEND _CONFIGS ${CMAKE_CONFIGURATION_TYPES})
|
||||
else()
|
||||
if (NOT CMAKE_NO_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE "${CMAKE_BUILD_TYPE_INIT}" CACHE STRING
|
||||
"Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel ...")
|
||||
endif()
|
||||
list(APPEND _CONFIGS ${CMAKE_BUILD_TYPE})
|
||||
endif()
|
||||
|
||||
list(REMOVE_DUPLICATES _CONFIGS)
|
||||
foreach(_BUILD_TYPE IN LISTS _CONFIGS)
|
||||
if (NOT "${_BUILD_TYPE}" STREQUAL "")
|
||||
string(TOUPPER "${_BUILD_TYPE}" _BUILD_TYPE)
|
||||
string(STRIP "${${_PREFIX}_${_BUILD_TYPE}_INIT}" _INIT)
|
||||
set("${_PREFIX}_${_BUILD_TYPE}" "${_INIT}"
|
||||
CACHE STRING "${_DOCSTRING} during ${_BUILD_TYPE} builds.")
|
||||
mark_as_advanced("${_PREFIX}_${_BUILD_TYPE}")
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
endfunction()
|
1982
cmake/Modules_CUDA_fix/upstream/FindCUDA.cmake
Normal file
1982
cmake/Modules_CUDA_fix/upstream/FindCUDA.cmake
Normal file
File diff suppressed because it is too large
Load Diff
106
cmake/Modules_CUDA_fix/upstream/FindCUDA/make2cmake.cmake
Normal file
106
cmake/Modules_CUDA_fix/upstream/FindCUDA/make2cmake.cmake
Normal file
@ -0,0 +1,106 @@
|
||||
# James Bigler, NVIDIA Corp (nvidia.com - jbigler)
|
||||
# Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html
|
||||
#
|
||||
# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved.
|
||||
#
|
||||
# Copyright (c) 2007-2009
|
||||
# Scientific Computing and Imaging Institute, University of Utah
|
||||
#
|
||||
# This code is licensed under the MIT License. See the FindCUDA.cmake script
|
||||
# for the text of the license.
|
||||
|
||||
# The MIT License
|
||||
#
|
||||
# License for the specific language governing rights and limitations under
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included
|
||||
# in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
# DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
|
||||
#######################################################################
|
||||
# This converts a file written in makefile syntax into one that can be included
|
||||
# by CMake.
|
||||
|
||||
# Input variables
|
||||
#
|
||||
# verbose:BOOL=<> OFF: Be as quiet as possible (default)
|
||||
# ON : Extra output
|
||||
#
|
||||
# input_file:FILEPATH=<> Path to dependency file in makefile format
|
||||
#
|
||||
# output_file:FILEPATH=<> Path to file with dependencies in CMake readable variable
|
||||
#
|
||||
|
||||
file(READ ${input_file} depend_text)
|
||||
|
||||
if (NOT "${depend_text}" STREQUAL "")
|
||||
|
||||
# message("FOUND DEPENDS")
|
||||
|
||||
string(REPLACE "\\ " " " depend_text ${depend_text})
|
||||
|
||||
# This works for the nvcc -M generated dependency files.
|
||||
string(REGEX REPLACE "^.* : " "" depend_text ${depend_text})
|
||||
string(REGEX REPLACE "[ \\\\]*\n" ";" depend_text ${depend_text})
|
||||
|
||||
set(dependency_list "")
|
||||
|
||||
foreach(file ${depend_text})
|
||||
|
||||
string(REGEX REPLACE "^ +" "" file ${file})
|
||||
|
||||
# OK, now if we had a UNC path, nvcc has a tendency to only output the first '/'
|
||||
# instead of '//'. Here we will test to see if the file exists, if it doesn't then
|
||||
# try to prepend another '/' to the path and test again. If it still fails remove the
|
||||
# path.
|
||||
|
||||
if(NOT EXISTS "${file}")
|
||||
if (EXISTS "/${file}")
|
||||
set(file "/${file}")
|
||||
else()
|
||||
if(verbose)
|
||||
message(WARNING " Removing non-existent dependency file: ${file}")
|
||||
endif()
|
||||
set(file "")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Make sure we check to see if we have a file, before asking if it is not a directory.
|
||||
# if(NOT IS_DIRECTORY "") will return TRUE.
|
||||
if(file AND NOT IS_DIRECTORY "${file}")
|
||||
# If softlinks start to matter, we should change this to REALPATH. For now we need
|
||||
# to flatten paths, because nvcc can generate stuff like /bin/../include instead of
|
||||
# just /include.
|
||||
get_filename_component(file_absolute "${file}" ABSOLUTE)
|
||||
list(APPEND dependency_list "${file_absolute}")
|
||||
endif()
|
||||
|
||||
endforeach()
|
||||
|
||||
else()
|
||||
# message("FOUND NO DEPENDS")
|
||||
endif()
|
||||
|
||||
# Remove the duplicate entries and sort them.
|
||||
list(REMOVE_DUPLICATES dependency_list)
|
||||
list(SORT dependency_list)
|
||||
|
||||
foreach(file ${dependency_list})
|
||||
string(APPEND cuda_nvcc_depend " \"${file}\"\n")
|
||||
endforeach()
|
||||
|
||||
file(WRITE ${output_file} "# Generated by: make2cmake.cmake\nSET(CUDA_NVCC_DEPEND\n ${cuda_nvcc_depend})\n\n")
|
109
cmake/Modules_CUDA_fix/upstream/FindCUDA/parse_cubin.cmake
Normal file
109
cmake/Modules_CUDA_fix/upstream/FindCUDA/parse_cubin.cmake
Normal file
@ -0,0 +1,109 @@
|
||||
# James Bigler, NVIDIA Corp (nvidia.com - jbigler)
|
||||
# Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html
|
||||
#
|
||||
# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved.
|
||||
#
|
||||
# Copyright (c) 2007-2009
|
||||
# Scientific Computing and Imaging Institute, University of Utah
|
||||
#
|
||||
# This code is licensed under the MIT License. See the FindCUDA.cmake script
|
||||
# for the text of the license.
|
||||
|
||||
# The MIT License
|
||||
#
|
||||
# License for the specific language governing rights and limitations under
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included
|
||||
# in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
# DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
|
||||
#######################################################################
|
||||
# Parses a .cubin file produced by nvcc and reports statistics about the file.
|
||||
|
||||
|
||||
file(READ ${input_file} file_text)
|
||||
|
||||
if (NOT "${file_text}" STREQUAL "")
|
||||
|
||||
string(REPLACE ";" "\\;" file_text ${file_text})
|
||||
string(REPLACE "\ncode" ";code" file_text ${file_text})
|
||||
|
||||
list(LENGTH file_text len)
|
||||
|
||||
foreach(line ${file_text})
|
||||
|
||||
# Only look at "code { }" blocks.
|
||||
if(line MATCHES "^code")
|
||||
|
||||
# Break into individual lines.
|
||||
string(REGEX REPLACE "\n" ";" line ${line})
|
||||
|
||||
foreach(entry ${line})
|
||||
|
||||
# Extract kernel names.
|
||||
if (${entry} MATCHES "[^g]name = ([^ ]+)")
|
||||
set(entry "${CMAKE_MATCH_1}")
|
||||
|
||||
# Check to see if the kernel name starts with "_"
|
||||
set(skip FALSE)
|
||||
# if (${entry} MATCHES "^_")
|
||||
# Skip the rest of this block.
|
||||
# message("Skipping ${entry}")
|
||||
# set(skip TRUE)
|
||||
# else ()
|
||||
message("Kernel: ${entry}")
|
||||
# endif ()
|
||||
|
||||
endif()
|
||||
|
||||
# Skip the rest of the block if necessary
|
||||
if(NOT skip)
|
||||
|
||||
# Registers
|
||||
if (${entry} MATCHES "reg([ ]+)=([ ]+)([^ ]+)")
|
||||
set(entry "${CMAKE_MATCH_3}")
|
||||
message("Registers: ${entry}")
|
||||
endif()
|
||||
|
||||
# Local memory
|
||||
if (${entry} MATCHES "lmem([ ]+)=([ ]+)([^ ]+)")
|
||||
set(entry "${CMAKE_MATCH_3}")
|
||||
message("Local: ${entry}")
|
||||
endif()
|
||||
|
||||
# Shared memory
|
||||
if (${entry} MATCHES "smem([ ]+)=([ ]+)([^ ]+)")
|
||||
set(entry "${CMAKE_MATCH_3}")
|
||||
message("Shared: ${entry}")
|
||||
endif()
|
||||
|
||||
if (${entry} MATCHES "^}")
|
||||
message("")
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
||||
|
||||
endforeach()
|
||||
|
||||
endif()
|
||||
|
||||
endforeach()
|
||||
|
||||
else()
|
||||
# message("FOUND NO DEPENDS")
|
||||
endif()
|
303
cmake/Modules_CUDA_fix/upstream/FindCUDA/run_nvcc.cmake
Normal file
303
cmake/Modules_CUDA_fix/upstream/FindCUDA/run_nvcc.cmake
Normal file
@ -0,0 +1,303 @@
|
||||
# James Bigler, NVIDIA Corp (nvidia.com - jbigler)
|
||||
#
|
||||
# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved.
|
||||
#
|
||||
# This code is licensed under the MIT License. See the FindCUDA.cmake script
|
||||
# for the text of the license.
|
||||
|
||||
# The MIT License
|
||||
#
|
||||
# License for the specific language governing rights and limitations under
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included
|
||||
# in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
# DEALINGS IN THE SOFTWARE.
|
||||
|
||||
|
||||
##########################################################################
|
||||
# This file runs the nvcc commands to produce the desired output file along with
|
||||
# the dependency file needed by CMake to compute dependencies. In addition the
|
||||
# file checks the output of each command and if the command fails it deletes the
|
||||
# output files.
|
||||
|
||||
# Input variables
|
||||
#
|
||||
# verbose:BOOL=<> OFF: Be as quiet as possible (default)
|
||||
# ON : Describe each step
|
||||
#
|
||||
# build_configuration:STRING=<> Typically one of Debug, MinSizeRel, Release, or
|
||||
# RelWithDebInfo, but it should match one of the
|
||||
# entries in CUDA_HOST_FLAGS. This is the build
|
||||
# configuration used when compiling the code. If
|
||||
# blank or unspecified Debug is assumed as this is
|
||||
# what CMake does.
|
||||
#
|
||||
# generated_file:STRING=<> File to generate. This argument must be passed in.
|
||||
#
|
||||
# generated_cubin_file:STRING=<> File to generate. This argument must be passed
|
||||
# in if build_cubin is true.
|
||||
|
||||
cmake_policy(PUSH)
|
||||
cmake_policy(SET CMP0007 NEW)
|
||||
cmake_policy(SET CMP0010 NEW)
|
||||
if(NOT generated_file)
|
||||
message(FATAL_ERROR "You must specify generated_file on the command line")
|
||||
endif()
|
||||
|
||||
# Set these up as variables to make reading the generated file easier
|
||||
set(CMAKE_COMMAND "@CMAKE_COMMAND@") # path
|
||||
set(source_file "@source_file@") # path
|
||||
set(NVCC_generated_dependency_file "@NVCC_generated_dependency_file@") # path
|
||||
set(cmake_dependency_file "@cmake_dependency_file@") # path
|
||||
set(CUDA_make2cmake "@CUDA_make2cmake@") # path
|
||||
set(CUDA_parse_cubin "@CUDA_parse_cubin@") # path
|
||||
set(build_cubin @build_cubin@) # bool
|
||||
set(CUDA_HOST_COMPILER "@CUDA_HOST_COMPILER@") # path
|
||||
# We won't actually use these variables for now, but we need to set this, in
|
||||
# order to force this file to be run again if it changes.
|
||||
set(generated_file_path "@generated_file_path@") # path
|
||||
set(generated_file_internal "@generated_file@") # path
|
||||
set(generated_cubin_file_internal "@generated_cubin_file@") # path
|
||||
|
||||
set(CUDA_NVCC_EXECUTABLE "@CUDA_NVCC_EXECUTABLE@") # path
|
||||
set(CUDA_NVCC_FLAGS @CUDA_NVCC_FLAGS@ ;; @CUDA_WRAP_OPTION_NVCC_FLAGS@) # list
|
||||
@CUDA_NVCC_FLAGS_CONFIG@
|
||||
set(nvcc_flags @nvcc_flags@) # list
|
||||
set(CUDA_NVCC_INCLUDE_DIRS [==[@CUDA_NVCC_INCLUDE_DIRS@]==]) # list (needs to be in lua quotes to address backslashes)
|
||||
string(REPLACE "\\" "/" CUDA_NVCC_INCLUDE_DIRS "${CUDA_NVCC_INCLUDE_DIRS}")
|
||||
set(CUDA_NVCC_COMPILE_DEFINITIONS [==[@CUDA_NVCC_COMPILE_DEFINITIONS@]==]) # list (needs to be in lua quotes see #16510 ).
|
||||
set(format_flag "@format_flag@") # string
|
||||
set(cuda_language_flag @cuda_language_flag@) # list
|
||||
|
||||
# Clean up list of include directories and add -I flags
|
||||
list(REMOVE_DUPLICATES CUDA_NVCC_INCLUDE_DIRS)
|
||||
set(CUDA_NVCC_INCLUDE_ARGS)
|
||||
foreach(dir ${CUDA_NVCC_INCLUDE_DIRS})
|
||||
# Extra quotes are added around each flag to help nvcc parse out flags with spaces.
|
||||
list(APPEND CUDA_NVCC_INCLUDE_ARGS "-I${dir}")
|
||||
endforeach()
|
||||
|
||||
# Clean up list of compile definitions, add -D flags, and append to nvcc_flags
|
||||
list(REMOVE_DUPLICATES CUDA_NVCC_COMPILE_DEFINITIONS)
|
||||
foreach(def ${CUDA_NVCC_COMPILE_DEFINITIONS})
|
||||
list(APPEND nvcc_flags "-D${def}")
|
||||
endforeach()
|
||||
|
||||
if(build_cubin AND NOT generated_cubin_file)
|
||||
message(FATAL_ERROR "You must specify generated_cubin_file on the command line")
|
||||
endif()
|
||||
|
||||
# This is the list of host compilation flags. It C or CXX should already have
|
||||
# been chosen by FindCUDA.cmake.
|
||||
@CUDA_HOST_FLAGS@
|
||||
|
||||
# Take the compiler flags and package them up to be sent to the compiler via -Xcompiler
|
||||
set(nvcc_host_compiler_flags "")
|
||||
# If we weren't given a build_configuration, use Debug.
|
||||
if(NOT build_configuration)
|
||||
set(build_configuration Debug)
|
||||
endif()
|
||||
string(TOUPPER "${build_configuration}" build_configuration)
|
||||
#message("CUDA_NVCC_HOST_COMPILER_FLAGS = ${CUDA_NVCC_HOST_COMPILER_FLAGS}")
|
||||
foreach(flag ${CMAKE_HOST_FLAGS} ${CMAKE_HOST_FLAGS_${build_configuration}})
|
||||
# Extra quotes are added around each flag to help nvcc parse out flags with spaces.
|
||||
string(APPEND nvcc_host_compiler_flags ",\"${flag}\"")
|
||||
endforeach()
|
||||
if (nvcc_host_compiler_flags)
|
||||
set(nvcc_host_compiler_flags "-Xcompiler" ${nvcc_host_compiler_flags})
|
||||
endif()
|
||||
#message("nvcc_host_compiler_flags = \"${nvcc_host_compiler_flags}\"")
|
||||
# Add the build specific configuration flags
|
||||
list(APPEND CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS_${build_configuration}})
|
||||
|
||||
# Any -ccbin existing in CUDA_NVCC_FLAGS gets highest priority
|
||||
list( FIND CUDA_NVCC_FLAGS "-ccbin" ccbin_found0 )
|
||||
list( FIND CUDA_NVCC_FLAGS "--compiler-bindir" ccbin_found1 )
|
||||
if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 AND CUDA_HOST_COMPILER )
|
||||
if (CUDA_HOST_COMPILER STREQUAL "@_CUDA_MSVC_HOST_COMPILER@" AND DEFINED CCBIN)
|
||||
set(CCBIN -ccbin "${CCBIN}")
|
||||
else()
|
||||
set(CCBIN -ccbin "${CUDA_HOST_COMPILER}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# cuda_execute_process - Executes a command with optional command echo and status message.
|
||||
#
|
||||
# status - Status message to print if verbose is true
|
||||
# command - COMMAND argument from the usual execute_process argument structure
|
||||
# ARGN - Remaining arguments are the command with arguments
|
||||
#
|
||||
# CUDA_result - return value from running the command
|
||||
#
|
||||
# Make this a macro instead of a function, so that things like RESULT_VARIABLE
|
||||
# and other return variables are present after executing the process.
|
||||
macro(cuda_execute_process status command)
|
||||
set(_command ${command})
|
||||
if(NOT "x${_command}" STREQUAL "xCOMMAND")
|
||||
message(FATAL_ERROR "Malformed call to cuda_execute_process. Missing COMMAND as second argument. (command = ${command})")
|
||||
endif()
|
||||
if(verbose)
|
||||
execute_process(COMMAND "${CMAKE_COMMAND}" -E echo -- ${status})
|
||||
# Now we need to build up our command string. We are accounting for quotes
|
||||
# and spaces, anything else is left up to the user to fix if they want to
|
||||
# copy and paste a runnable command line.
|
||||
set(cuda_execute_process_string)
|
||||
foreach(arg ${ARGN})
|
||||
# If there are quotes, escape them, so they come through.
|
||||
string(REPLACE "\"" "\\\"" arg ${arg})
|
||||
# Args with spaces need quotes around them to get them to be parsed as a single argument.
|
||||
if(arg MATCHES " ")
|
||||
list(APPEND cuda_execute_process_string "\"${arg}\"")
|
||||
else()
|
||||
list(APPEND cuda_execute_process_string ${arg})
|
||||
endif()
|
||||
endforeach()
|
||||
# Echo the command
|
||||
execute_process(COMMAND ${CMAKE_COMMAND} -E echo ${cuda_execute_process_string})
|
||||
endif()
|
||||
# Run the command
|
||||
execute_process(COMMAND ${ARGN} RESULT_VARIABLE CUDA_result )
|
||||
endmacro()
|
||||
|
||||
# Delete the target file
|
||||
cuda_execute_process(
|
||||
"Removing ${generated_file}"
|
||||
COMMAND "${CMAKE_COMMAND}" -E remove "${generated_file}"
|
||||
)
|
||||
|
||||
# For CUDA 2.3 and below, -G -M doesn't work, so remove the -G flag
|
||||
# for dependency generation and hope for the best.
|
||||
set(depends_CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}")
|
||||
set(CUDA_VERSION @CUDA_VERSION@)
|
||||
|
||||
# nvcc doesn't define __CUDACC__ for some reason when generating dependency files. This
|
||||
# can cause incorrect dependencies when #including files based on this macro which is
|
||||
# defined in the generating passes of nvcc invocation. We will go ahead and manually
|
||||
# define this for now until a future version fixes this bug.
|
||||
set(CUDACC_DEFINE -D__CUDACC__)
|
||||
|
||||
# Generate the dependency file
|
||||
cuda_execute_process(
|
||||
"Generating dependency file: ${NVCC_generated_dependency_file}"
|
||||
COMMAND "${CUDA_NVCC_EXECUTABLE}"
|
||||
-M
|
||||
${CUDACC_DEFINE}
|
||||
"${source_file}"
|
||||
-o "${NVCC_generated_dependency_file}"
|
||||
${CCBIN}
|
||||
${nvcc_flags}
|
||||
${nvcc_host_compiler_flags}
|
||||
${depends_CUDA_NVCC_FLAGS}
|
||||
-DNVCC
|
||||
${CUDA_NVCC_INCLUDE_ARGS}
|
||||
)
|
||||
|
||||
if(CUDA_result)
|
||||
message(FATAL_ERROR "Error generating ${generated_file}")
|
||||
endif()
|
||||
|
||||
# Generate the cmake readable dependency file to a temp file. Don't put the
|
||||
# quotes just around the filenames for the input_file and output_file variables.
|
||||
# CMake will pass the quotes through and not be able to find the file.
|
||||
cuda_execute_process(
|
||||
"Generating temporary cmake readable file: ${cmake_dependency_file}.tmp"
|
||||
COMMAND "${CMAKE_COMMAND}"
|
||||
-D "input_file:FILEPATH=${NVCC_generated_dependency_file}"
|
||||
-D "output_file:FILEPATH=${cmake_dependency_file}.tmp"
|
||||
-D "verbose=${verbose}"
|
||||
-P "${CUDA_make2cmake}"
|
||||
)
|
||||
|
||||
if(CUDA_result)
|
||||
message(FATAL_ERROR "Error generating ${generated_file}")
|
||||
endif()
|
||||
|
||||
# Copy the file if it is different
|
||||
cuda_execute_process(
|
||||
"Copy if different ${cmake_dependency_file}.tmp to ${cmake_dependency_file}"
|
||||
COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${cmake_dependency_file}.tmp" "${cmake_dependency_file}"
|
||||
)
|
||||
|
||||
if(CUDA_result)
|
||||
message(FATAL_ERROR "Error generating ${generated_file}")
|
||||
endif()
|
||||
|
||||
# Delete the temporary file
|
||||
cuda_execute_process(
|
||||
"Removing ${cmake_dependency_file}.tmp and ${NVCC_generated_dependency_file}"
|
||||
COMMAND "${CMAKE_COMMAND}" -E remove "${cmake_dependency_file}.tmp" "${NVCC_generated_dependency_file}"
|
||||
)
|
||||
|
||||
if(CUDA_result)
|
||||
message(FATAL_ERROR "Error generating ${generated_file}")
|
||||
endif()
|
||||
|
||||
# Generate the code
|
||||
cuda_execute_process(
|
||||
"Generating ${generated_file}"
|
||||
COMMAND "${CUDA_NVCC_EXECUTABLE}"
|
||||
"${source_file}"
|
||||
${cuda_language_flag}
|
||||
${format_flag} -o "${generated_file}"
|
||||
${CCBIN}
|
||||
${nvcc_flags}
|
||||
${nvcc_host_compiler_flags}
|
||||
${CUDA_NVCC_FLAGS}
|
||||
-DNVCC
|
||||
${CUDA_NVCC_INCLUDE_ARGS}
|
||||
)
|
||||
|
||||
if(CUDA_result)
|
||||
# Since nvcc can sometimes leave half done files make sure that we delete the output file.
|
||||
cuda_execute_process(
|
||||
"Removing ${generated_file}"
|
||||
COMMAND "${CMAKE_COMMAND}" -E remove "${generated_file}"
|
||||
)
|
||||
message(FATAL_ERROR "Error generating file ${generated_file}")
|
||||
else()
|
||||
if(verbose)
|
||||
message("Generated ${generated_file} successfully.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Cubin resource report commands.
|
||||
if( build_cubin )
|
||||
# Run with -cubin to produce resource usage report.
|
||||
cuda_execute_process(
|
||||
"Generating ${generated_cubin_file}"
|
||||
COMMAND "${CUDA_NVCC_EXECUTABLE}"
|
||||
"${source_file}"
|
||||
${CUDA_NVCC_FLAGS}
|
||||
${nvcc_flags}
|
||||
${CCBIN}
|
||||
${nvcc_host_compiler_flags}
|
||||
-DNVCC
|
||||
-cubin
|
||||
-o "${generated_cubin_file}"
|
||||
${CUDA_NVCC_INCLUDE_ARGS}
|
||||
)
|
||||
|
||||
# Execute the parser script.
|
||||
cuda_execute_process(
|
||||
"Executing the parser script"
|
||||
COMMAND "${CMAKE_COMMAND}"
|
||||
-D "input_file:STRING=${generated_cubin_file}"
|
||||
-P "${CUDA_parse_cubin}"
|
||||
)
|
||||
|
||||
endif()
|
||||
|
||||
cmake_policy(POP)
|
@ -0,0 +1,300 @@
|
||||
# Synopsis:
|
||||
# CUDA_SELECT_NVCC_ARCH_FLAGS(out_variable [target_CUDA_architectures])
|
||||
# -- Selects GPU arch flags for nvcc based on target_CUDA_architectures
|
||||
# target_CUDA_architectures : Auto | Common | All | LIST(ARCH_AND_PTX ...)
|
||||
# - "Auto" detects local machine GPU compute arch at runtime.
|
||||
# - "Common" and "All" cover common and entire subsets of architectures
|
||||
# ARCH_AND_PTX : NAME | NUM.NUM | NUM.NUM(NUM.NUM) | NUM.NUM+PTX
|
||||
# NAME: Kepler Maxwell Kepler+Tegra Kepler+Tesla Maxwell+Tegra Pascal Volta Turing Ampere
|
||||
# NUM: Any number. Only those pairs are currently accepted by NVCC though:
|
||||
# 3.5 3.7 5.0 5.2 5.3 6.0 6.2 7.0 7.2 7.5 8.0
|
||||
# Returns LIST of flags to be added to CUDA_NVCC_FLAGS in ${out_variable}
|
||||
# Additionally, sets ${out_variable}_readable to the resulting numeric list
|
||||
# Example:
|
||||
# CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS 3.0 3.5+PTX 5.2(5.0) Maxwell)
|
||||
# LIST(APPEND CUDA_NVCC_FLAGS ${ARCH_FLAGS})
|
||||
#
|
||||
# More info on CUDA architectures: https://en.wikipedia.org/wiki/CUDA
|
||||
#
|
||||
|
||||
if(CMAKE_CUDA_COMPILER_LOADED) # CUDA as a language
|
||||
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA"
|
||||
AND CMAKE_CUDA_COMPILER_VERSION MATCHES "^([0-9]+\\.[0-9]+)")
|
||||
set(CUDA_VERSION "${CMAKE_MATCH_1}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# See: https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#gpu-feature-list
|
||||
|
||||
# This list will be used for CUDA_ARCH_NAME = All option
|
||||
set(CUDA_KNOWN_GPU_ARCHITECTURES "Kepler" "Maxwell")
|
||||
|
||||
# This list will be used for CUDA_ARCH_NAME = Common option (enabled by default)
|
||||
set(CUDA_COMMON_GPU_ARCHITECTURES "3.5" "5.0")
|
||||
|
||||
# This list is used to filter CUDA archs when autodetecting
|
||||
set(CUDA_ALL_GPU_ARCHITECTURES "3.5" "5.0")
|
||||
|
||||
if(CUDA_VERSION VERSION_GREATER "10.5")
|
||||
list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Ampere")
|
||||
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "8.0")
|
||||
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "8.0")
|
||||
|
||||
if(CUDA_VERSION VERSION_LESS "11.1")
|
||||
set(CUDA_LIMIT_GPU_ARCHITECTURE "8.0")
|
||||
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "8.0+PTX")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT CUDA_VERSION VERSION_LESS "11.1")
|
||||
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "8.6")
|
||||
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "8.6")
|
||||
set(CUDA_LIMIT_GPU_ARCHITECUTRE "8.6")
|
||||
|
||||
if(CUDA_VERSION VERSION_LESS "11.8")
|
||||
set(CUDA_LIMIT_GPU_ARCHITECTURE "8.9")
|
||||
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "8.6+PTX")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT CUDA_VERSION VERSION_LESS "11.8")
|
||||
list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Ada")
|
||||
list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Hopper")
|
||||
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "8.9")
|
||||
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "9.0")
|
||||
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "8.9")
|
||||
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "9.0")
|
||||
|
||||
if(CUDA_VERSION VERSION_LESS "12.0")
|
||||
set(CUDA_LIMIT_GPU_ARCHITECTURE "9.0")
|
||||
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "8.9+PTX")
|
||||
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "9.0+PTX")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT CUDA_VERSION VERSION_LESS "12.0")
|
||||
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "9.0a")
|
||||
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "9.0a")
|
||||
list(REMOVE_ITEM CUDA_COMMON_GPU_ARCHITECTURES "3.5")
|
||||
list(REMOVE_ITEM CUDA_ALL_GPU_ARCHITECTURES "3.5")
|
||||
endif()
|
||||
|
||||
if(CUDA_VERSION VERSION_GREATER "12.6")
|
||||
list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Blackwell")
|
||||
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "10.0")
|
||||
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "10.0a")
|
||||
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "10.1a")
|
||||
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "12.0")
|
||||
list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "12.0a")
|
||||
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "10.0")
|
||||
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "10.0a")
|
||||
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "10.1a")
|
||||
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "12.0")
|
||||
list(APPEND CUDA_ALL_GPU_ARCHITECTURES "12.0a")
|
||||
endif()
|
||||
|
||||
|
||||
################################################################################################
|
||||
# A function for automatic detection of GPUs installed (if autodetection is enabled)
|
||||
# Usage:
|
||||
# CUDA_DETECT_INSTALLED_GPUS(OUT_VARIABLE)
|
||||
#
|
||||
function(CUDA_DETECT_INSTALLED_GPUS OUT_VARIABLE)
|
||||
if(NOT CUDA_GPU_DETECT_OUTPUT)
|
||||
if(CMAKE_CUDA_COMPILER_LOADED) # CUDA as a language
|
||||
set(file "${PROJECT_BINARY_DIR}/detect_cuda_compute_capabilities.cu")
|
||||
else()
|
||||
set(file "${PROJECT_BINARY_DIR}/detect_cuda_compute_capabilities.cpp")
|
||||
endif()
|
||||
|
||||
file(WRITE ${file} ""
|
||||
"#include <cuda_runtime.h>\n"
|
||||
"#include <cstdio>\n"
|
||||
"int main()\n"
|
||||
"{\n"
|
||||
" int count = 0;\n"
|
||||
" if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n"
|
||||
" if (count == 0) return -1;\n"
|
||||
" for (int device = 0; device < count; ++device)\n"
|
||||
" {\n"
|
||||
" cudaDeviceProp prop;\n"
|
||||
" if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n"
|
||||
" std::printf(\"%d.%d \", prop.major, prop.minor);\n"
|
||||
" }\n"
|
||||
" return 0;\n"
|
||||
"}\n")
|
||||
|
||||
if(CMAKE_CUDA_COMPILER_LOADED) # CUDA as a language
|
||||
try_run(run_result compile_result ${PROJECT_BINARY_DIR} ${file}
|
||||
RUN_OUTPUT_VARIABLE compute_capabilities)
|
||||
else()
|
||||
try_run(run_result compile_result ${PROJECT_BINARY_DIR} ${file}
|
||||
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CUDA_INCLUDE_DIRS}"
|
||||
LINK_LIBRARIES ${CUDA_LIBRARIES}
|
||||
RUN_OUTPUT_VARIABLE compute_capabilities)
|
||||
endif()
|
||||
|
||||
# Filter unrelated content out of the output.
|
||||
string(REGEX MATCHALL "[0-9]+\\.[0-9]+" compute_capabilities "${compute_capabilities}")
|
||||
|
||||
if(run_result EQUAL 0)
|
||||
string(REPLACE "2.1" "2.1(2.0)" compute_capabilities "${compute_capabilities}")
|
||||
set(CUDA_GPU_DETECT_OUTPUT ${compute_capabilities}
|
||||
CACHE INTERNAL "Returned GPU architectures from detect_gpus tool" FORCE)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT CUDA_GPU_DETECT_OUTPUT)
|
||||
message(STATUS "Automatic GPU detection failed. Building for common architectures.")
|
||||
set(${OUT_VARIABLE} ${CUDA_COMMON_GPU_ARCHITECTURES} PARENT_SCOPE)
|
||||
else()
|
||||
# Filter based on CUDA version supported archs
|
||||
set(CUDA_GPU_DETECT_OUTPUT_FILTERED "")
|
||||
separate_arguments(CUDA_GPU_DETECT_OUTPUT)
|
||||
foreach(ITEM IN ITEMS ${CUDA_GPU_DETECT_OUTPUT})
|
||||
if(CUDA_LIMIT_GPU_ARCHITECTURE AND (ITEM VERSION_GREATER CUDA_LIMIT_GPU_ARCHITECTURE OR
|
||||
ITEM VERSION_EQUAL CUDA_LIMIT_GPU_ARCHITECTURE))
|
||||
list(GET CUDA_COMMON_GPU_ARCHITECTURES -1 NEWITEM)
|
||||
string(APPEND CUDA_GPU_DETECT_OUTPUT_FILTERED " ${NEWITEM}")
|
||||
else()
|
||||
string(APPEND CUDA_GPU_DETECT_OUTPUT_FILTERED " ${ITEM}")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
set(${OUT_VARIABLE} ${CUDA_GPU_DETECT_OUTPUT_FILTERED} PARENT_SCOPE)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
|
||||
################################################################################################
|
||||
# Function for selecting GPU arch flags for nvcc based on CUDA architectures from parameter list
|
||||
# Usage:
|
||||
# SELECT_NVCC_ARCH_FLAGS(out_variable [list of CUDA compute archs])
|
||||
function(CUDA_SELECT_NVCC_ARCH_FLAGS out_variable)
|
||||
set(CUDA_ARCH_LIST "${ARGN}")
|
||||
|
||||
if("X${CUDA_ARCH_LIST}" STREQUAL "X" )
|
||||
set(CUDA_ARCH_LIST "Auto")
|
||||
endif()
|
||||
|
||||
set(cuda_arch_bin)
|
||||
set(cuda_arch_ptx)
|
||||
|
||||
if("${CUDA_ARCH_LIST}" STREQUAL "All")
|
||||
set(CUDA_ARCH_LIST ${CUDA_KNOWN_GPU_ARCHITECTURES})
|
||||
elseif("${CUDA_ARCH_LIST}" STREQUAL "Common")
|
||||
set(CUDA_ARCH_LIST ${CUDA_COMMON_GPU_ARCHITECTURES})
|
||||
elseif("${CUDA_ARCH_LIST}" STREQUAL "Auto")
|
||||
CUDA_DETECT_INSTALLED_GPUS(CUDA_ARCH_LIST)
|
||||
message(STATUS "Autodetected CUDA architecture(s): ${CUDA_ARCH_LIST}")
|
||||
endif()
|
||||
|
||||
# Now process the list and look for names
|
||||
string(REGEX REPLACE "[ \t]+" ";" CUDA_ARCH_LIST "${CUDA_ARCH_LIST}")
|
||||
list(REMOVE_DUPLICATES CUDA_ARCH_LIST)
|
||||
foreach(arch_name ${CUDA_ARCH_LIST})
|
||||
set(arch_bin)
|
||||
set(arch_ptx)
|
||||
set(add_ptx FALSE)
|
||||
# Check to see if we are compiling PTX
|
||||
if(arch_name MATCHES "(.*)\\+PTX$")
|
||||
set(add_ptx TRUE)
|
||||
set(arch_name ${CMAKE_MATCH_1})
|
||||
endif()
|
||||
if(arch_name MATCHES "^([0-9]+\\.[0-9]a?(\\([0-9]+\\.[0-9]\\))?)$")
|
||||
set(arch_bin ${CMAKE_MATCH_1})
|
||||
set(arch_ptx ${arch_bin})
|
||||
else()
|
||||
# Look for it in our list of known architectures
|
||||
if(${arch_name} STREQUAL "Kepler+Tesla")
|
||||
set(arch_bin 3.7)
|
||||
elseif(${arch_name} STREQUAL "Kepler")
|
||||
set(arch_bin 3.5)
|
||||
set(arch_ptx 3.5)
|
||||
elseif(${arch_name} STREQUAL "Maxwell+Tegra")
|
||||
set(arch_bin 5.3)
|
||||
elseif(${arch_name} STREQUAL "Maxwell")
|
||||
set(arch_bin 5.0 5.2)
|
||||
set(arch_ptx 5.2)
|
||||
elseif(${arch_name} STREQUAL "Pascal")
|
||||
set(arch_bin 6.0 6.1)
|
||||
set(arch_ptx 6.1)
|
||||
elseif(${arch_name} STREQUAL "Volta+Tegra")
|
||||
set(arch_bin 7.2)
|
||||
elseif(${arch_name} STREQUAL "Volta")
|
||||
set(arch_bin 7.0 7.0)
|
||||
set(arch_ptx 7.0)
|
||||
elseif(${arch_name} STREQUAL "Turing")
|
||||
set(arch_bin 7.5)
|
||||
set(arch_ptx 7.5)
|
||||
elseif(${arch_name} STREQUAL "Ampere+Tegra")
|
||||
set(arch_bin 8.7)
|
||||
elseif(${arch_name} STREQUAL "Ampere")
|
||||
set(arch_bin 8.0 8.6)
|
||||
set(arch_ptx 8.0 8.6)
|
||||
elseif(${arch_name} STREQUAL "Ada")
|
||||
set(arch_bin 8.9)
|
||||
set(arch_ptx 8.9)
|
||||
elseif(${arch_name} STREQUAL "Hopper")
|
||||
set(arch_bin 9.0)
|
||||
set(arch_ptx 9.0)
|
||||
elseif(${arch_name} STREQUAL "Blackwell+Tegra")
|
||||
set(arch_bin 10.1)
|
||||
elseif(${arch_name} STREQUAL "Blackwell")
|
||||
set(arch_bin 10.0 12.0)
|
||||
set(arch_ptx 10.0 12.0)
|
||||
else()
|
||||
message(SEND_ERROR "Found Unknown CUDA Architecture Name in CUDA_SELECT_NVCC_ARCH_FLAGS: ${arch_name} ")
|
||||
endif()
|
||||
endif()
|
||||
if(NOT arch_bin)
|
||||
message(SEND_ERROR "arch_bin wasn't set for some reason")
|
||||
endif()
|
||||
list(APPEND cuda_arch_bin ${arch_bin})
|
||||
if(add_ptx)
|
||||
if (NOT arch_ptx)
|
||||
set(arch_ptx ${arch_bin})
|
||||
endif()
|
||||
list(APPEND cuda_arch_ptx ${arch_ptx})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# remove dots and convert to lists
|
||||
string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}")
|
||||
string(REGEX REPLACE "\\." "" cuda_arch_ptx "${cuda_arch_ptx}")
|
||||
string(REGEX MATCHALL "[0-9()]+a?" cuda_arch_bin "${cuda_arch_bin}")
|
||||
string(REGEX MATCHALL "[0-9]+a?" cuda_arch_ptx "${cuda_arch_ptx}")
|
||||
|
||||
if(cuda_arch_bin)
|
||||
list(REMOVE_DUPLICATES cuda_arch_bin)
|
||||
endif()
|
||||
if(cuda_arch_ptx)
|
||||
list(REMOVE_DUPLICATES cuda_arch_ptx)
|
||||
endif()
|
||||
|
||||
set(nvcc_flags "")
|
||||
set(nvcc_archs_readable "")
|
||||
|
||||
# Tell NVCC to add binaries for the specified GPUs
|
||||
foreach(arch ${cuda_arch_bin})
|
||||
if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
|
||||
# User explicitly specified ARCH for the concrete CODE
|
||||
list(APPEND nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
|
||||
list(APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1})
|
||||
else()
|
||||
# User didn't explicitly specify ARCH for the concrete CODE, we assume ARCH=CODE
|
||||
list(APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch})
|
||||
list(APPEND nvcc_archs_readable sm_${arch})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# Tell NVCC to add PTX intermediate code for the specified architectures
|
||||
foreach(arch ${cuda_arch_ptx})
|
||||
list(APPEND nvcc_flags -gencode arch=compute_${arch},code=compute_${arch})
|
||||
list(APPEND nvcc_archs_readable compute_${arch})
|
||||
endforeach()
|
||||
|
||||
string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}")
|
||||
set(${out_variable} ${nvcc_flags} PARENT_SCOPE)
|
||||
set(${out_variable}_readable ${nvcc_archs_readable} PARENT_SCOPE)
|
||||
endfunction()
|
@ -0,0 +1,386 @@
|
||||
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
|
||||
# file Copyright.txt or https://cmake.org/licensing for details.
|
||||
|
||||
#[=======================================================================[.rst:
|
||||
FindPackageHandleStandardArgs
|
||||
-----------------------------
|
||||
|
||||
This module provides a function intended to be used in :ref:`Find Modules`
|
||||
implementing :command:`find_package(<PackageName>)` calls. It handles the
|
||||
``REQUIRED``, ``QUIET`` and version-related arguments of ``find_package``.
|
||||
It also sets the ``<PackageName>_FOUND`` variable. The package is
|
||||
considered found if all variables listed contain valid results, e.g.
|
||||
valid filepaths.
|
||||
|
||||
.. command:: find_package_handle_standard_args
|
||||
|
||||
There are two signatures::
|
||||
|
||||
find_package_handle_standard_args(<PackageName>
|
||||
(DEFAULT_MSG|<custom-failure-message>)
|
||||
<required-var>...
|
||||
)
|
||||
|
||||
find_package_handle_standard_args(<PackageName>
|
||||
[FOUND_VAR <result-var>]
|
||||
[REQUIRED_VARS <required-var>...]
|
||||
[VERSION_VAR <version-var>]
|
||||
[HANDLE_COMPONENTS]
|
||||
[CONFIG_MODE]
|
||||
[FAIL_MESSAGE <custom-failure-message>]
|
||||
)
|
||||
|
||||
The ``<PackageName>_FOUND`` variable will be set to ``TRUE`` if all
|
||||
the variables ``<required-var>...`` are valid and any optional
|
||||
constraints are satisfied, and ``FALSE`` otherwise. A success or
|
||||
failure message may be displayed based on the results and on
|
||||
whether the ``REQUIRED`` and/or ``QUIET`` option was given to
|
||||
the :command:`find_package` call.
|
||||
|
||||
The options are:
|
||||
|
||||
``(DEFAULT_MSG|<custom-failure-message>)``
|
||||
In the simple signature this specifies the failure message.
|
||||
Use ``DEFAULT_MSG`` to ask for a default message to be computed
|
||||
(recommended). Not valid in the full signature.
|
||||
|
||||
``FOUND_VAR <result-var>``
|
||||
Obsolete. Specifies either ``<PackageName>_FOUND`` or
|
||||
``<PACKAGENAME>_FOUND`` as the result variable. This exists only
|
||||
for compatibility with older versions of CMake and is now ignored.
|
||||
Result variables of both names are always set for compatibility.
|
||||
|
||||
``REQUIRED_VARS <required-var>...``
|
||||
Specify the variables which are required for this package.
|
||||
These may be named in the generated failure message asking the
|
||||
user to set the missing variable values. Therefore these should
|
||||
typically be cache entries such as ``FOO_LIBRARY`` and not output
|
||||
variables like ``FOO_LIBRARIES``.
|
||||
|
||||
``VERSION_VAR <version-var>``
|
||||
Specify the name of a variable that holds the version of the package
|
||||
that has been found. This version will be checked against the
|
||||
(potentially) specified required version given to the
|
||||
:command:`find_package` call, including its ``EXACT`` option.
|
||||
The default messages include information about the required
|
||||
version and the version which has been actually found, both
|
||||
if the version is ok or not.
|
||||
|
||||
``HANDLE_COMPONENTS``
|
||||
Enable handling of package components. In this case, the command
|
||||
will report which components have been found and which are missing,
|
||||
and the ``<PackageName>_FOUND`` variable will be set to ``FALSE``
|
||||
if any of the required components (i.e. not the ones listed after
|
||||
the ``OPTIONAL_COMPONENTS`` option of :command:`find_package`) are
|
||||
missing.
|
||||
|
||||
``CONFIG_MODE``
|
||||
Specify that the calling find module is a wrapper around a
|
||||
call to ``find_package(<PackageName> NO_MODULE)``. This implies
|
||||
a ``VERSION_VAR`` value of ``<PackageName>_VERSION``. The command
|
||||
will automatically check whether the package configuration file
|
||||
was found.
|
||||
|
||||
``FAIL_MESSAGE <custom-failure-message>``
|
||||
Specify a custom failure message instead of using the default
|
||||
generated message. Not recommended.
|
||||
|
||||
Example for the simple signature:
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
find_package_handle_standard_args(LibXml2 DEFAULT_MSG
|
||||
LIBXML2_LIBRARY LIBXML2_INCLUDE_DIR)
|
||||
|
||||
The ``LibXml2`` package is considered to be found if both
|
||||
``LIBXML2_LIBRARY`` and ``LIBXML2_INCLUDE_DIR`` are valid.
|
||||
Then also ``LibXml2_FOUND`` is set to ``TRUE``. If it is not found
|
||||
and ``REQUIRED`` was used, it fails with a
|
||||
:command:`message(FATAL_ERROR)`, independent whether ``QUIET`` was
|
||||
used or not. If it is found, success will be reported, including
|
||||
the content of the first ``<required-var>``. On repeated CMake runs,
|
||||
the same message will not be printed again.
|
||||
|
||||
Example for the full signature:
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
find_package_handle_standard_args(LibArchive
|
||||
REQUIRED_VARS LibArchive_LIBRARY LibArchive_INCLUDE_DIR
|
||||
VERSION_VAR LibArchive_VERSION)
|
||||
|
||||
In this case, the ``LibArchive`` package is considered to be found if
|
||||
both ``LibArchive_LIBRARY`` and ``LibArchive_INCLUDE_DIR`` are valid.
|
||||
Also the version of ``LibArchive`` will be checked by using the version
|
||||
contained in ``LibArchive_VERSION``. Since no ``FAIL_MESSAGE`` is given,
|
||||
the default messages will be printed.
|
||||
|
||||
Another example for the full signature:
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
find_package(Automoc4 QUIET NO_MODULE HINTS /opt/automoc4)
|
||||
find_package_handle_standard_args(Automoc4 CONFIG_MODE)
|
||||
|
||||
In this case, a ``FindAutmoc4.cmake`` module wraps a call to
|
||||
``find_package(Automoc4 NO_MODULE)`` and adds an additional search
|
||||
directory for ``automoc4``. Then the call to
|
||||
``find_package_handle_standard_args`` produces a proper success/failure
|
||||
message.
|
||||
#]=======================================================================]
|
||||
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/FindPackageMessage.cmake)
|
||||
|
||||
# internal helper macro
|
||||
macro(_FPHSA_FAILURE_MESSAGE _msg)
|
||||
if (${_NAME}_FIND_REQUIRED)
|
||||
message(FATAL_ERROR "${_msg}")
|
||||
else ()
|
||||
if (NOT ${_NAME}_FIND_QUIETLY)
|
||||
message(STATUS "${_msg}")
|
||||
endif ()
|
||||
endif ()
|
||||
endmacro()
|
||||
|
||||
|
||||
# internal helper macro to generate the failure message when used in CONFIG_MODE:
|
||||
macro(_FPHSA_HANDLE_FAILURE_CONFIG_MODE)
|
||||
# <name>_CONFIG is set, but FOUND is false, this means that some other of the REQUIRED_VARS was not found:
|
||||
if(${_NAME}_CONFIG)
|
||||
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: missing:${MISSING_VARS} (found ${${_NAME}_CONFIG} ${VERSION_MSG})")
|
||||
else()
|
||||
# If _CONSIDERED_CONFIGS is set, the config-file has been found, but no suitable version.
|
||||
# List them all in the error message:
|
||||
if(${_NAME}_CONSIDERED_CONFIGS)
|
||||
set(configsText "")
|
||||
list(LENGTH ${_NAME}_CONSIDERED_CONFIGS configsCount)
|
||||
math(EXPR configsCount "${configsCount} - 1")
|
||||
foreach(currentConfigIndex RANGE ${configsCount})
|
||||
list(GET ${_NAME}_CONSIDERED_CONFIGS ${currentConfigIndex} filename)
|
||||
list(GET ${_NAME}_CONSIDERED_VERSIONS ${currentConfigIndex} version)
|
||||
string(APPEND configsText " ${filename} (version ${version})\n")
|
||||
endforeach()
|
||||
if (${_NAME}_NOT_FOUND_MESSAGE)
|
||||
string(APPEND configsText " Reason given by package: ${${_NAME}_NOT_FOUND_MESSAGE}\n")
|
||||
endif()
|
||||
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} ${VERSION_MSG}, checked the following files:\n${configsText}")
|
||||
|
||||
else()
|
||||
# Simple case: No Config-file was found at all:
|
||||
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: found neither ${_NAME}Config.cmake nor ${_NAME_LOWER}-config.cmake ${VERSION_MSG}")
|
||||
endif()
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
|
||||
function(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FIRST_ARG)
|
||||
|
||||
# Set up the arguments for `cmake_parse_arguments`.
|
||||
set(options CONFIG_MODE HANDLE_COMPONENTS)
|
||||
set(oneValueArgs FAIL_MESSAGE VERSION_VAR FOUND_VAR)
|
||||
set(multiValueArgs REQUIRED_VARS)
|
||||
|
||||
# Check whether we are in 'simple' or 'extended' mode:
|
||||
set(_KEYWORDS_FOR_EXTENDED_MODE ${options} ${oneValueArgs} ${multiValueArgs} )
|
||||
list(FIND _KEYWORDS_FOR_EXTENDED_MODE "${_FIRST_ARG}" INDEX)
|
||||
|
||||
if(${INDEX} EQUAL -1)
|
||||
set(FPHSA_FAIL_MESSAGE ${_FIRST_ARG})
|
||||
set(FPHSA_REQUIRED_VARS ${ARGN})
|
||||
set(FPHSA_VERSION_VAR)
|
||||
else()
|
||||
cmake_parse_arguments(FPHSA "${options}" "${oneValueArgs}" "${multiValueArgs}" ${_FIRST_ARG} ${ARGN})
|
||||
|
||||
if(FPHSA_UNPARSED_ARGUMENTS)
|
||||
message(FATAL_ERROR "Unknown keywords given to FIND_PACKAGE_HANDLE_STANDARD_ARGS(): \"${FPHSA_UNPARSED_ARGUMENTS}\"")
|
||||
endif()
|
||||
|
||||
if(NOT FPHSA_FAIL_MESSAGE)
|
||||
set(FPHSA_FAIL_MESSAGE "DEFAULT_MSG")
|
||||
endif()
|
||||
|
||||
# In config-mode, we rely on the variable <package>_CONFIG, which is set by find_package()
|
||||
# when it successfully found the config-file, including version checking:
|
||||
if(FPHSA_CONFIG_MODE)
|
||||
list(INSERT FPHSA_REQUIRED_VARS 0 ${_NAME}_CONFIG)
|
||||
list(REMOVE_DUPLICATES FPHSA_REQUIRED_VARS)
|
||||
set(FPHSA_VERSION_VAR ${_NAME}_VERSION)
|
||||
endif()
|
||||
|
||||
if(NOT FPHSA_REQUIRED_VARS)
|
||||
message(FATAL_ERROR "No REQUIRED_VARS specified for FIND_PACKAGE_HANDLE_STANDARD_ARGS()")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# now that we collected all arguments, process them
|
||||
|
||||
if("x${FPHSA_FAIL_MESSAGE}" STREQUAL "xDEFAULT_MSG")
|
||||
set(FPHSA_FAIL_MESSAGE "Could NOT find ${_NAME}")
|
||||
endif()
|
||||
|
||||
list(GET FPHSA_REQUIRED_VARS 0 _FIRST_REQUIRED_VAR)
|
||||
|
||||
string(TOUPPER ${_NAME} _NAME_UPPER)
|
||||
string(TOLOWER ${_NAME} _NAME_LOWER)
|
||||
|
||||
if(FPHSA_FOUND_VAR)
|
||||
if(FPHSA_FOUND_VAR MATCHES "^${_NAME}_FOUND$" OR FPHSA_FOUND_VAR MATCHES "^${_NAME_UPPER}_FOUND$")
|
||||
set(_FOUND_VAR ${FPHSA_FOUND_VAR})
|
||||
else()
|
||||
message(FATAL_ERROR "The argument for FOUND_VAR is \"${FPHSA_FOUND_VAR}\", but only \"${_NAME}_FOUND\" and \"${_NAME_UPPER}_FOUND\" are valid names.")
|
||||
endif()
|
||||
else()
|
||||
set(_FOUND_VAR ${_NAME_UPPER}_FOUND)
|
||||
endif()
|
||||
|
||||
# collect all variables which were not found, so they can be printed, so the
|
||||
# user knows better what went wrong (#6375)
|
||||
set(MISSING_VARS "")
|
||||
set(DETAILS "")
|
||||
# check if all passed variables are valid
|
||||
set(FPHSA_FOUND_${_NAME} TRUE)
|
||||
foreach(_CURRENT_VAR ${FPHSA_REQUIRED_VARS})
|
||||
if(NOT ${_CURRENT_VAR})
|
||||
set(FPHSA_FOUND_${_NAME} FALSE)
|
||||
string(APPEND MISSING_VARS " ${_CURRENT_VAR}")
|
||||
else()
|
||||
string(APPEND DETAILS "[${${_CURRENT_VAR}}]")
|
||||
endif()
|
||||
endforeach()
|
||||
if(FPHSA_FOUND_${_NAME})
|
||||
set(${_NAME}_FOUND TRUE)
|
||||
set(${_NAME_UPPER}_FOUND TRUE)
|
||||
else()
|
||||
set(${_NAME}_FOUND FALSE)
|
||||
set(${_NAME_UPPER}_FOUND FALSE)
|
||||
endif()
|
||||
|
||||
# component handling
|
||||
unset(FOUND_COMPONENTS_MSG)
|
||||
unset(MISSING_COMPONENTS_MSG)
|
||||
|
||||
if(FPHSA_HANDLE_COMPONENTS)
|
||||
foreach(comp ${${_NAME}_FIND_COMPONENTS})
|
||||
if(${_NAME}_${comp}_FOUND)
|
||||
|
||||
if(NOT DEFINED FOUND_COMPONENTS_MSG)
|
||||
set(FOUND_COMPONENTS_MSG "found components: ")
|
||||
endif()
|
||||
string(APPEND FOUND_COMPONENTS_MSG " ${comp}")
|
||||
|
||||
else()
|
||||
|
||||
if(NOT DEFINED MISSING_COMPONENTS_MSG)
|
||||
set(MISSING_COMPONENTS_MSG "missing components: ")
|
||||
endif()
|
||||
string(APPEND MISSING_COMPONENTS_MSG " ${comp}")
|
||||
|
||||
if(${_NAME}_FIND_REQUIRED_${comp})
|
||||
set(${_NAME}_FOUND FALSE)
|
||||
string(APPEND MISSING_VARS " ${comp}")
|
||||
endif()
|
||||
|
||||
endif()
|
||||
endforeach()
|
||||
set(COMPONENT_MSG "${FOUND_COMPONENTS_MSG} ${MISSING_COMPONENTS_MSG}")
|
||||
string(APPEND DETAILS "[c${COMPONENT_MSG}]")
|
||||
endif()
|
||||
|
||||
# version handling:
|
||||
set(VERSION_MSG "")
|
||||
set(VERSION_OK TRUE)
|
||||
|
||||
# check with DEFINED here as the requested or found version may be "0"
|
||||
if (DEFINED ${_NAME}_FIND_VERSION)
|
||||
if(DEFINED ${FPHSA_VERSION_VAR})
|
||||
set(_FOUND_VERSION ${${FPHSA_VERSION_VAR}})
|
||||
|
||||
if(${_NAME}_FIND_VERSION_EXACT) # exact version required
|
||||
# count the dots in the version string
|
||||
string(REGEX REPLACE "[^.]" "" _VERSION_DOTS "${_FOUND_VERSION}")
|
||||
# add one dot because there is one dot more than there are components
|
||||
string(LENGTH "${_VERSION_DOTS}." _VERSION_DOTS)
|
||||
if (_VERSION_DOTS GREATER ${_NAME}_FIND_VERSION_COUNT)
|
||||
# Because of the C++ implementation of find_package() ${_NAME}_FIND_VERSION_COUNT
|
||||
# is at most 4 here. Therefore a simple lookup table is used.
|
||||
if (${_NAME}_FIND_VERSION_COUNT EQUAL 1)
|
||||
set(_VERSION_REGEX "[^.]*")
|
||||
elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 2)
|
||||
set(_VERSION_REGEX "[^.]*\\.[^.]*")
|
||||
elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 3)
|
||||
set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*")
|
||||
else ()
|
||||
set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*\\.[^.]*")
|
||||
endif ()
|
||||
string(REGEX REPLACE "^(${_VERSION_REGEX})\\..*" "\\1" _VERSION_HEAD "${_FOUND_VERSION}")
|
||||
unset(_VERSION_REGEX)
|
||||
if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _VERSION_HEAD)
|
||||
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"")
|
||||
set(VERSION_OK FALSE)
|
||||
else ()
|
||||
set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")")
|
||||
endif ()
|
||||
unset(_VERSION_HEAD)
|
||||
else ()
|
||||
if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _FOUND_VERSION)
|
||||
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"")
|
||||
set(VERSION_OK FALSE)
|
||||
else ()
|
||||
set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")")
|
||||
endif ()
|
||||
endif ()
|
||||
unset(_VERSION_DOTS)
|
||||
|
||||
else() # minimum version specified:
|
||||
if (${_NAME}_FIND_VERSION VERSION_GREATER _FOUND_VERSION)
|
||||
set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is at least \"${${_NAME}_FIND_VERSION}\"")
|
||||
set(VERSION_OK FALSE)
|
||||
else ()
|
||||
set(VERSION_MSG "(found suitable version \"${_FOUND_VERSION}\", minimum required is \"${${_NAME}_FIND_VERSION}\")")
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
else()
|
||||
|
||||
# if the package was not found, but a version was given, add that to the output:
|
||||
if(${_NAME}_FIND_VERSION_EXACT)
|
||||
set(VERSION_MSG "(Required is exact version \"${${_NAME}_FIND_VERSION}\")")
|
||||
else()
|
||||
set(VERSION_MSG "(Required is at least version \"${${_NAME}_FIND_VERSION}\")")
|
||||
endif()
|
||||
|
||||
endif()
|
||||
else ()
|
||||
# Check with DEFINED as the found version may be 0.
|
||||
if(DEFINED ${FPHSA_VERSION_VAR})
|
||||
set(VERSION_MSG "(found version \"${${FPHSA_VERSION_VAR}}\")")
|
||||
endif()
|
||||
endif ()
|
||||
|
||||
if(VERSION_OK)
|
||||
string(APPEND DETAILS "[v${${FPHSA_VERSION_VAR}}(${${_NAME}_FIND_VERSION})]")
|
||||
else()
|
||||
set(${_NAME}_FOUND FALSE)
|
||||
endif()
|
||||
|
||||
|
||||
# print the result:
|
||||
if (${_NAME}_FOUND)
|
||||
FIND_PACKAGE_MESSAGE(${_NAME} "Found ${_NAME}: ${${_FIRST_REQUIRED_VAR}} ${VERSION_MSG} ${COMPONENT_MSG}" "${DETAILS}")
|
||||
else ()
|
||||
|
||||
if(FPHSA_CONFIG_MODE)
|
||||
_FPHSA_HANDLE_FAILURE_CONFIG_MODE()
|
||||
else()
|
||||
if(NOT VERSION_OK)
|
||||
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: ${VERSION_MSG} (found ${${_FIRST_REQUIRED_VAR}})")
|
||||
else()
|
||||
_FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} (missing:${MISSING_VARS}) ${VERSION_MSG}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
endif ()
|
||||
|
||||
set(${_NAME}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)
|
||||
set(${_NAME_UPPER}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)
|
||||
endfunction()
|
47
cmake/Modules_CUDA_fix/upstream/FindPackageMessage.cmake
Normal file
47
cmake/Modules_CUDA_fix/upstream/FindPackageMessage.cmake
Normal file
@ -0,0 +1,47 @@
|
||||
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
|
||||
# file Copyright.txt or https://cmake.org/licensing for details.
|
||||
|
||||
#.rst:
|
||||
# FindPackageMessage
|
||||
# ------------------
|
||||
#
|
||||
#
|
||||
#
|
||||
# FIND_PACKAGE_MESSAGE(<name> "message for user" "find result details")
|
||||
#
|
||||
# This macro is intended to be used in FindXXX.cmake modules files. It
|
||||
# will print a message once for each unique find result. This is useful
|
||||
# for telling the user where a package was found. The first argument
|
||||
# specifies the name (XXX) of the package. The second argument
|
||||
# specifies the message to display. The third argument lists details
|
||||
# about the find result so that if they change the message will be
|
||||
# displayed again. The macro also obeys the QUIET argument to the
|
||||
# find_package command.
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# ::
|
||||
#
|
||||
# if(X11_FOUND)
|
||||
# FIND_PACKAGE_MESSAGE(X11 "Found X11: ${X11_X11_LIB}"
|
||||
# "[${X11_X11_LIB}][${X11_INCLUDE_DIR}]")
|
||||
# else()
|
||||
# ...
|
||||
# endif()
|
||||
|
||||
function(FIND_PACKAGE_MESSAGE pkg msg details)
|
||||
# Avoid printing a message repeatedly for the same find result.
|
||||
if(NOT ${pkg}_FIND_QUIETLY)
|
||||
string(REPLACE "\n" "" details "${details}")
|
||||
set(DETAILS_VAR FIND_PACKAGE_MESSAGE_DETAILS_${pkg})
|
||||
if(NOT "${details}" STREQUAL "${${DETAILS_VAR}}")
|
||||
# The message has not yet been printed.
|
||||
message(STATUS "${msg}")
|
||||
|
||||
# Save the find details in the cache to avoid printing the same
|
||||
# message again.
|
||||
set("${DETAILS_VAR}" "${details}"
|
||||
CACHE INTERNAL "Details about finding ${pkg}")
|
||||
endif()
|
||||
endif()
|
||||
endfunction()
|
5
cmake/Modules_CUDA_fix/upstream/README.md
Normal file
5
cmake/Modules_CUDA_fix/upstream/README.md
Normal file
@ -0,0 +1,5 @@
|
||||
If you need to update files under this folder, we recommend you issue PRs
|
||||
against [the CMake mainline branch](https://github.com/Kitware/CMake/blob/master/Modules/FindCUDA.cmake),
|
||||
and then backport it here for earlier CMake compatibility.
|
||||
|
||||
See [this](../README.md) for more details.
|
@ -76,7 +76,7 @@ function(caffe2_print_configuration_summary)
|
||||
message(STATUS " USE_CUSPARSELT : ${USE_CUSPARSELT}")
|
||||
message(STATUS " USE_CUDSS : ${USE_CUDSS}")
|
||||
message(STATUS " USE_CUFILE : ${USE_CUFILE}")
|
||||
message(STATUS " CUDA version : ${CUDAToolkit_VERSION}")
|
||||
message(STATUS " CUDA version : ${CUDA_VERSION}")
|
||||
message(STATUS " USE_FLASH_ATTENTION : ${USE_FLASH_ATTENTION}")
|
||||
message(STATUS " USE_MEM_EFF_ATTENTION : ${USE_MEM_EFF_ATTENTION}")
|
||||
if(${USE_CUDNN})
|
||||
@ -107,8 +107,9 @@ function(caffe2_print_configuration_summary)
|
||||
get_target_property(__tmp torch::cudss INTERFACE_LINK_LIBRARIES)
|
||||
message(STATUS " cuDSS library : ${__tmp}")
|
||||
endif()
|
||||
message(STATUS " CUDA include path : ${CUDAToolkit_INCLUDE_DIRS}")
|
||||
message(STATUS " NVCC executable : ${CUDAToolkit_NVCC_EXECUTABLE}")
|
||||
message(STATUS " nvrtc : ${CUDA_nvrtc_LIBRARY}")
|
||||
message(STATUS " CUDA include path : ${CUDA_INCLUDE_DIRS}")
|
||||
message(STATUS " NVCC executable : ${CUDA_NVCC_EXECUTABLE}")
|
||||
message(STATUS " CUDA compiler : ${CMAKE_CUDA_COMPILER}")
|
||||
message(STATUS " CUDA flags : ${CMAKE_CUDA_FLAGS}")
|
||||
message(STATUS " CUDA host compiler : ${CMAKE_CUDA_HOST_COMPILER}")
|
||||
|
@ -130,7 +130,7 @@ if(@USE_CUDA@)
|
||||
find_library(CAFFE2_NVRTC_LIBRARY caffe2_nvrtc PATHS "${TORCH_INSTALL_PREFIX}/lib")
|
||||
list(APPEND TORCH_CUDA_LIBRARIES ${CAFFE2_NVRTC_LIBRARY})
|
||||
else()
|
||||
list(APPEND TORCH_CUDA_LIBRARIES torch::nvrtc)
|
||||
set(TORCH_CUDA_LIBRARIES ${CUDA_NVRTC_LIB})
|
||||
endif()
|
||||
|
||||
if(@BUILD_SHARED_LIBS@)
|
||||
|
@ -1,6 +1,13 @@
|
||||
# ---[ cuda
|
||||
|
||||
include_guard(GLOBAL)
|
||||
# Poor man's include guard
|
||||
if(TARGET torch::cudart)
|
||||
return()
|
||||
endif()
|
||||
|
||||
# sccache is only supported in CMake master and not in the newest official
|
||||
# release (3.11.3) yet. Hence we need our own Modules_CUDA_fix to enable sccache.
|
||||
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/../Modules_CUDA_fix)
|
||||
|
||||
# We don't want to statically link cudart, because we rely on it's dynamic linkage in
|
||||
# python (follow along torch/cuda/__init__.py and usage of cudaGetErrorName).
|
||||
@ -18,16 +25,9 @@ if(NOT MSVC)
|
||||
set(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE INTERNAL "")
|
||||
endif()
|
||||
|
||||
# Enable CUDA language support
|
||||
if(CUDA_TOOLKIT_ROOT_DIR AND NOT CUDAToolkit_ROOT)
|
||||
set(CUDAToolkit_ROOT "${CUDA_TOOLKIT_ROOT_DIR}")
|
||||
endif()
|
||||
|
||||
# CMP0074 - find_package will respect <PackageName>_ROOT variables
|
||||
cmake_policy(SET CMP0074 NEW)
|
||||
find_package(CUDAToolkit)
|
||||
|
||||
if(NOT CUDAToolkit_FOUND)
|
||||
# Find CUDA.
|
||||
find_package(CUDA)
|
||||
if(NOT CUDA_FOUND)
|
||||
message(WARNING
|
||||
"PyTorch: CUDA cannot be found. Depending on whether you are building "
|
||||
"PyTorch or a PyTorch dependent library, the next warning / error will "
|
||||
@ -36,10 +36,8 @@ if(NOT CUDAToolkit_FOUND)
|
||||
return()
|
||||
endif()
|
||||
|
||||
if(CUDAToolkit_VERSION VERSION_LESS 11.0)
|
||||
message(FATAL_ERROR "PyTorch requires CUDA 11.0 or above.")
|
||||
endif()
|
||||
|
||||
# Enable CUDA language support
|
||||
set(CUDAToolkit_ROOT "${CUDA_TOOLKIT_ROOT_DIR}")
|
||||
# Pass clang as host compiler, which according to the docs
|
||||
# Must be done before CUDA language is enabled, see
|
||||
# https://cmake.org/cmake/help/v3.15/variable/CMAKE_CUDA_HOST_COMPILER.html
|
||||
@ -52,38 +50,140 @@ if("X${CMAKE_CUDA_STANDARD}" STREQUAL "X" )
|
||||
endif()
|
||||
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
|
||||
|
||||
message(STATUS "PyTorch: CUDA detected: " ${CUDAToolkit_VERSION})
|
||||
message(STATUS "PyTorch: CUDA nvcc is: " ${CUDAToolkit_NVCC_EXECUTABLE})
|
||||
message(STATUS "PyTorch: CUDA toolkit directory: " ${CUDAToolkit_ROOT})
|
||||
# CMP0074 - find_package will respect <PackageName>_ROOT variables
|
||||
cmake_policy(PUSH)
|
||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.12.0)
|
||||
cmake_policy(SET CMP0074 NEW)
|
||||
endif()
|
||||
|
||||
# cuda_select_nvcc_arch_flags is required
|
||||
cmake_policy(SET CMP0146 OLD)
|
||||
find_package(CUDA)
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
cmake_policy(POP)
|
||||
|
||||
if(NOT CMAKE_CUDA_COMPILER_VERSION VERSION_EQUAL CUDAToolkit_VERSION)
|
||||
message(FATAL_ERROR "Found two conflicting CUDA versions:\n"
|
||||
"V${CMAKE_CUDA_COMPILER_VERSION} in '${CUDA_INCLUDE_DIRS}' and\n"
|
||||
"V${CUDAToolkit_VERSION} in '${CUDAToolkit_INCLUDE_DIRS}'")
|
||||
endif()
|
||||
|
||||
message(STATUS "PyTorch: CUDA detected: " ${CUDA_VERSION})
|
||||
message(STATUS "PyTorch: CUDA nvcc is: " ${CUDA_NVCC_EXECUTABLE})
|
||||
message(STATUS "PyTorch: CUDA toolkit directory: " ${CUDA_TOOLKIT_ROOT_DIR})
|
||||
if(CUDA_VERSION VERSION_LESS 11.0)
|
||||
message(FATAL_ERROR "PyTorch requires CUDA 11.0 or above.")
|
||||
endif()
|
||||
|
||||
if(CUDA_FOUND)
|
||||
# Sometimes, we may mismatch nvcc with the CUDA headers we are
|
||||
# compiling with, e.g., if a ccache nvcc is fed to us by CUDA_NVCC_EXECUTABLE
|
||||
# but the PATH is not consistent with CUDA_HOME. It's better safe
|
||||
# than sorry: make sure everything is consistent.
|
||||
if(MSVC AND CMAKE_GENERATOR MATCHES "Visual Studio")
|
||||
# When using Visual Studio, it attempts to lock the whole binary dir when
|
||||
# `try_run` is called, which will cause the build to fail.
|
||||
string(RANDOM BUILD_SUFFIX)
|
||||
set(PROJECT_RANDOM_BINARY_DIR "${PROJECT_BINARY_DIR}/${BUILD_SUFFIX}")
|
||||
else()
|
||||
set(PROJECT_RANDOM_BINARY_DIR "${PROJECT_BINARY_DIR}")
|
||||
endif()
|
||||
set(file "${PROJECT_BINARY_DIR}/detect_cuda_version.cc")
|
||||
file(WRITE ${file} ""
|
||||
"#include <cuda.h>\n"
|
||||
"#include <cstdio>\n"
|
||||
"int main() {\n"
|
||||
" printf(\"%d.%d\", CUDA_VERSION / 1000, (CUDA_VERSION / 10) % 100);\n"
|
||||
" return 0;\n"
|
||||
"}\n"
|
||||
)
|
||||
if(NOT CMAKE_CROSSCOMPILING)
|
||||
try_run(run_result compile_result ${PROJECT_RANDOM_BINARY_DIR} ${file}
|
||||
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CUDA_INCLUDE_DIRS}"
|
||||
LINK_LIBRARIES ${CUDA_LIBRARIES}
|
||||
RUN_OUTPUT_VARIABLE cuda_version_from_header
|
||||
COMPILE_OUTPUT_VARIABLE output_var
|
||||
)
|
||||
if(NOT compile_result)
|
||||
message(FATAL_ERROR "PyTorch: Couldn't determine version from header: " ${output_var})
|
||||
endif()
|
||||
message(STATUS "PyTorch: Header version is: " ${cuda_version_from_header})
|
||||
if(NOT cuda_version_from_header STREQUAL ${CUDA_VERSION_STRING})
|
||||
# Force CUDA to be processed for again next time
|
||||
# TODO: I'm not sure if this counts as an implementation detail of
|
||||
# FindCUDA
|
||||
set(${cuda_version_from_findcuda} ${CUDA_VERSION_STRING})
|
||||
unset(CUDA_TOOLKIT_ROOT_DIR_INTERNAL CACHE)
|
||||
# Not strictly necessary, but for good luck.
|
||||
unset(CUDA_VERSION CACHE)
|
||||
# Error out
|
||||
message(FATAL_ERROR "FindCUDA says CUDA version is ${cuda_version_from_findcuda} (usually determined by nvcc), "
|
||||
"but the CUDA headers say the version is ${cuda_version_from_header}. This often occurs "
|
||||
"when you set both CUDA_HOME and CUDA_NVCC_EXECUTABLE to "
|
||||
"non-standard locations, without also setting PATH to point to the correct nvcc. "
|
||||
"Perhaps, try re-running this command again with PATH=${CUDA_TOOLKIT_ROOT_DIR}/bin:$PATH. "
|
||||
"See above log messages for more diagnostics, and see https://github.com/pytorch/pytorch/issues/8092 for more details.")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# ---[ CUDA libraries wrapper
|
||||
|
||||
# find lbnvrtc.so
|
||||
set(CUDA_NVRTC_LIB "${CUDA_nvrtc_LIBRARY}" CACHE FILEPATH "")
|
||||
if(CUDA_NVRTC_LIB AND NOT CUDA_NVRTC_SHORTHASH)
|
||||
find_package(Python COMPONENTS Interpreter)
|
||||
execute_process(
|
||||
COMMAND Python::Interpreter -c
|
||||
"import hashlib;hash=hashlib.sha256();hash.update(open('${CUDA_NVRTC_LIB}','rb').read());print(hash.hexdigest()[:8])"
|
||||
RESULT_VARIABLE _retval
|
||||
OUTPUT_VARIABLE CUDA_NVRTC_SHORTHASH)
|
||||
if(NOT _retval EQUAL 0)
|
||||
message(WARNING "Failed to compute shorthash for libnvrtc.so")
|
||||
set(CUDA_NVRTC_SHORTHASH "XXXXXXXX")
|
||||
else()
|
||||
string(STRIP "${CUDA_NVRTC_SHORTHASH}" CUDA_NVRTC_SHORTHASH)
|
||||
message(STATUS "${CUDA_NVRTC_LIB} shorthash is ${CUDA_NVRTC_SHORTHASH}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Create new style imported libraries.
|
||||
# Several of these libraries have a hardcoded path if CAFFE2_STATIC_LINK_CUDA
|
||||
# is set. This path is where sane CUDA installations have their static
|
||||
# libraries installed. This flag should only be used for binary builds, so
|
||||
# end-users should never have this flag set.
|
||||
|
||||
# cuda
|
||||
add_library(caffe2::cuda INTERFACE IMPORTED)
|
||||
set_property(
|
||||
TARGET caffe2::cuda PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::cuda_driver)
|
||||
|
||||
# cudart
|
||||
add_library(torch::cudart INTERFACE IMPORTED)
|
||||
if(CAFFE2_STATIC_LINK_CUDA)
|
||||
target_link_libraries(torch::cudart INTERFACE CUDA::cudart_static)
|
||||
set_property(
|
||||
TARGET torch::cudart PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::cudart_static)
|
||||
else()
|
||||
target_link_libraries(torch::cudart INTERFACE CUDA::cudart)
|
||||
set_property(
|
||||
TARGET torch::cudart PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::cudart)
|
||||
endif()
|
||||
|
||||
|
||||
# cublas
|
||||
add_library(torch::cublas INTERFACE IMPORTED)
|
||||
# NOTE: cublas is always linked dynamically
|
||||
add_library(caffe2::cublas INTERFACE IMPORTED)
|
||||
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
|
||||
target_link_libraries(torch::cublas INTERFACE CUDA::cublas CUDA::cublasLt CUDA::cudart_static)
|
||||
set_property(
|
||||
TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
# NOTE: cublas is always linked dynamically
|
||||
CUDA::cublas CUDA::cublasLt)
|
||||
set_property(
|
||||
TARGET caffe2::cublas APPEND PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::cudart_static rt)
|
||||
else()
|
||||
target_link_libraries(torch::cublas INTERFACE CUDA::cublas CUDA::cublasLt)
|
||||
set_property(
|
||||
TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::cublas CUDA::cublasLt)
|
||||
endif()
|
||||
|
||||
# cudnn interface
|
||||
@ -155,41 +255,47 @@ endif()
|
||||
if(CAFFE2_USE_CUFILE)
|
||||
add_library(torch::cufile INTERFACE IMPORTED)
|
||||
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
|
||||
target_link_libraries(torch::cufile INTERFACE CUDA::cuFile_static CUDA::culibos)
|
||||
set_property(
|
||||
TARGET torch::cufile PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::cuFile_static)
|
||||
else()
|
||||
target_link_libraries(torch::cufile INTERFACE CUDA::cuFile CUDA::culibos)
|
||||
set_property(
|
||||
TARGET torch::cufile PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::cuFile)
|
||||
endif()
|
||||
else()
|
||||
message(STATUS "USE_CUFILE is set to 0. Compiling without cuFile support")
|
||||
endif()
|
||||
|
||||
# curand
|
||||
add_library(torch::curand INTERFACE IMPORTED)
|
||||
add_library(caffe2::curand INTERFACE IMPORTED)
|
||||
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
|
||||
target_link_libraries(torch::curand INTERFACE CUDA::curand_static)
|
||||
set_property(
|
||||
TARGET caffe2::curand PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::curand_static)
|
||||
else()
|
||||
target_link_libraries(torch::curand INTERFACE CUDA::curand)
|
||||
set_property(
|
||||
TARGET caffe2::curand PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::curand)
|
||||
endif()
|
||||
|
||||
# cufft
|
||||
add_library(torch::cufft INTERFACE IMPORTED)
|
||||
add_library(caffe2::cufft INTERFACE IMPORTED)
|
||||
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
|
||||
target_link_libraries(torch::cufft INTERFACE CUDA::cufft_static_nocallback)
|
||||
set_property(
|
||||
TARGET caffe2::cufft PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::cufft_static_nocallback)
|
||||
else()
|
||||
target_link_libraries(torch::cufft INTERFACE CUDA::cufft)
|
||||
set_property(
|
||||
TARGET caffe2::cufft PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::cufft)
|
||||
endif()
|
||||
|
||||
# nvrtc
|
||||
add_library(torch::nvrtc INTERFACE IMPORTED)
|
||||
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
|
||||
add_library(caffe2::nvrtc INTERFACE IMPORTED)
|
||||
set_property(
|
||||
TARGET torch::nvrtc PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::nvrtc_static CUDA::cuda_driver)
|
||||
else()
|
||||
set_property(
|
||||
TARGET torch::nvrtc PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::nvrtc CUDA::cuda_driver)
|
||||
endif()
|
||||
TARGET caffe2::nvrtc PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::nvrtc caffe2::cuda)
|
||||
|
||||
# Add onnx namespace definition to nvcc
|
||||
if(ONNX_NAMESPACE)
|
||||
|
@ -317,138 +317,6 @@ macro(torch_xpu_get_arch_list store_var)
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
################################################################################################
|
||||
# Function for selecting GPU arch flags for nvcc based on CUDA architectures from parameter list
|
||||
# Usage:
|
||||
# SELECT_NVCC_ARCH_FLAGS(out_variable [list of CUDA compute archs])
|
||||
function(torch_cuda_select_nvcc_arch_flags out_variable)
|
||||
set(CUDA_ARCH_LIST "${ARGN}")
|
||||
|
||||
if("X${CUDA_ARCH_LIST}" STREQUAL "X" )
|
||||
set(CUDA_ARCH_LIST "Auto")
|
||||
endif()
|
||||
|
||||
set(cuda_arch_bin)
|
||||
set(cuda_arch_ptx)
|
||||
|
||||
if("${CUDA_ARCH_LIST}" STREQUAL "All")
|
||||
set(CUDA_ARCH_LIST ${CUDA_KNOWN_GPU_ARCHITECTURES})
|
||||
elseif("${CUDA_ARCH_LIST}" STREQUAL "Common")
|
||||
set(CUDA_ARCH_LIST ${CUDA_COMMON_GPU_ARCHITECTURES})
|
||||
elseif("${CUDA_ARCH_LIST}" STREQUAL "Auto")
|
||||
CUDA_DETECT_INSTALLED_GPUS(CUDA_ARCH_LIST)
|
||||
message(STATUS "Autodetected CUDA architecture(s): ${CUDA_ARCH_LIST}")
|
||||
endif()
|
||||
|
||||
# Now process the list and look for names
|
||||
string(REGEX REPLACE "[ \t]+" ";" CUDA_ARCH_LIST "${CUDA_ARCH_LIST}")
|
||||
list(REMOVE_DUPLICATES CUDA_ARCH_LIST)
|
||||
foreach(arch_name ${CUDA_ARCH_LIST})
|
||||
set(arch_bin)
|
||||
set(arch_ptx)
|
||||
set(add_ptx FALSE)
|
||||
# Check to see if we are compiling PTX
|
||||
if(arch_name MATCHES "(.*)\\+PTX$")
|
||||
set(add_ptx TRUE)
|
||||
set(arch_name ${CMAKE_MATCH_1})
|
||||
endif()
|
||||
if(arch_name MATCHES "^([0-9]+\\.[0-9]a?(\\([0-9]+\\.[0-9]\\))?)$")
|
||||
set(arch_bin ${CMAKE_MATCH_1})
|
||||
set(arch_ptx ${arch_bin})
|
||||
else()
|
||||
# Look for it in our list of known architectures
|
||||
if(${arch_name} STREQUAL "Kepler+Tesla")
|
||||
set(arch_bin 3.7)
|
||||
elseif(${arch_name} STREQUAL "Kepler")
|
||||
set(arch_bin 3.5)
|
||||
set(arch_ptx 3.5)
|
||||
elseif(${arch_name} STREQUAL "Maxwell+Tegra")
|
||||
set(arch_bin 5.3)
|
||||
elseif(${arch_name} STREQUAL "Maxwell")
|
||||
set(arch_bin 5.0 5.2)
|
||||
set(arch_ptx 5.2)
|
||||
elseif(${arch_name} STREQUAL "Pascal")
|
||||
set(arch_bin 6.0 6.1)
|
||||
set(arch_ptx 6.1)
|
||||
elseif(${arch_name} STREQUAL "Volta+Tegra")
|
||||
set(arch_bin 7.2)
|
||||
elseif(${arch_name} STREQUAL "Volta")
|
||||
set(arch_bin 7.0 7.0)
|
||||
set(arch_ptx 7.0)
|
||||
elseif(${arch_name} STREQUAL "Turing")
|
||||
set(arch_bin 7.5)
|
||||
set(arch_ptx 7.5)
|
||||
elseif(${arch_name} STREQUAL "Ampere+Tegra")
|
||||
set(arch_bin 8.7)
|
||||
elseif(${arch_name} STREQUAL "Ampere")
|
||||
set(arch_bin 8.0 8.6)
|
||||
set(arch_ptx 8.0 8.6)
|
||||
elseif(${arch_name} STREQUAL "Ada")
|
||||
set(arch_bin 8.9)
|
||||
set(arch_ptx 8.9)
|
||||
elseif(${arch_name} STREQUAL "Hopper")
|
||||
set(arch_bin 9.0)
|
||||
set(arch_ptx 9.0)
|
||||
elseif(${arch_name} STREQUAL "Blackwell+Tegra")
|
||||
set(arch_bin 10.1)
|
||||
elseif(${arch_name} STREQUAL "Blackwell")
|
||||
set(arch_bin 10.0 12.0)
|
||||
set(arch_ptx 10.0 12.0)
|
||||
else()
|
||||
message(SEND_ERROR "Found Unknown CUDA Architecture Name in CUDA_SELECT_NVCC_ARCH_FLAGS: ${arch_name} ")
|
||||
endif()
|
||||
endif()
|
||||
if(NOT arch_bin)
|
||||
message(SEND_ERROR "arch_bin wasn't set for some reason")
|
||||
endif()
|
||||
list(APPEND cuda_arch_bin ${arch_bin})
|
||||
if(add_ptx)
|
||||
if(NOT arch_ptx)
|
||||
set(arch_ptx ${arch_bin})
|
||||
endif()
|
||||
list(APPEND cuda_arch_ptx ${arch_ptx})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# remove dots and convert to lists
|
||||
string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}")
|
||||
string(REGEX REPLACE "\\." "" cuda_arch_ptx "${cuda_arch_ptx}")
|
||||
string(REGEX MATCHALL "[0-9()]+a?" cuda_arch_bin "${cuda_arch_bin}")
|
||||
string(REGEX MATCHALL "[0-9]+a?" cuda_arch_ptx "${cuda_arch_ptx}")
|
||||
|
||||
if(cuda_arch_bin)
|
||||
list(REMOVE_DUPLICATES cuda_arch_bin)
|
||||
endif()
|
||||
if(cuda_arch_ptx)
|
||||
list(REMOVE_DUPLICATES cuda_arch_ptx)
|
||||
endif()
|
||||
|
||||
set(nvcc_flags "")
|
||||
set(nvcc_archs_readable "")
|
||||
|
||||
# Tell NVCC to add binaries for the specified GPUs
|
||||
foreach(arch ${cuda_arch_bin})
|
||||
if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
|
||||
# User explicitly specified ARCH for the concrete CODE
|
||||
list(APPEND nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
|
||||
list(APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1})
|
||||
else()
|
||||
# User didn't explicitly specify ARCH for the concrete CODE, we assume ARCH=CODE
|
||||
list(APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch})
|
||||
list(APPEND nvcc_archs_readable sm_${arch})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# Tell NVCC to add PTX intermediate code for the specified architectures
|
||||
foreach(arch ${cuda_arch_ptx})
|
||||
list(APPEND nvcc_flags -gencode arch=compute_${arch},code=compute_${arch})
|
||||
list(APPEND nvcc_archs_readable compute_${arch})
|
||||
endforeach()
|
||||
|
||||
string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}")
|
||||
set(${out_variable} ${nvcc_flags} PARENT_SCOPE)
|
||||
set(${out_variable}_readable ${nvcc_archs_readable} PARENT_SCOPE)
|
||||
endfunction()
|
||||
##############################################################################
|
||||
# Get the NVCC arch flags specified by TORCH_CUDA_ARCH_LIST and CUDA_ARCH_NAME.
|
||||
# Usage:
|
||||
@ -468,7 +336,8 @@ macro(torch_cuda_get_nvcc_gencode_flag store_var)
|
||||
set(TORCH_CUDA_ARCH_LIST TORCH_CUDA_ARCH_LIST ${CUDA_ARCH_NAME})
|
||||
endif()
|
||||
|
||||
torch_cuda_select_nvcc_arch_flags(${store_var} ${TORCH_CUDA_ARCH_LIST})
|
||||
# Invoke cuda_select_nvcc_arch_flags from proper cmake FindCUDA.
|
||||
cuda_select_nvcc_arch_flags(${store_var} ${TORCH_CUDA_ARCH_LIST})
|
||||
endmacro()
|
||||
|
||||
|
||||
|
@ -2417,6 +2417,7 @@ def _get_cuda_arch_flags(cflags: Optional[list[str]] = None) -> list[str]:
|
||||
# The default is sm_30 for CUDA 9.x and 10.x
|
||||
# First check for an env var (same as used by the main setup.py)
|
||||
# Can be one or more architectures, e.g. "6.1" or "3.5;5.2;6.0;6.1;7.0+PTX"
|
||||
# See cmake/Modules_CUDA_fix/upstream/FindCUDA/select_compute_arch.cmake
|
||||
_arch_list = os.environ.get('TORCH_CUDA_ARCH_LIST', None)
|
||||
|
||||
# If not given, determine what's best for the GPU / CUDA version that can be found
|
||||
|
Reference in New Issue
Block a user