mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
<!-- copilot:summary --> ### <samp>🤖 Generated by Copilot at 27084ed</samp> This pull request simplifies and cleans up the code that uses the cuDNN library for convolution, batch normalization, CTC loss, and quantized operations. It removes the unnecessary checks and conditions for older cuDNN versions and the experimental cuDNN v8 API, and ~~replaces them with the stable `cudnn_frontend` API that requires cuDNN v8 or higher. It also adds the dependency and configuration for the `cudnn_frontend` library in the cmake and bazel files.~~ Correction: The v7 API will still be available with this PR, and can still be used, without any changes to the defaults. This change simply always _builds_ the v8 API, and removes the case where _only_ the v7 API is built. This is a re-land of https://github.com/pytorch/pytorch/pull/91527 Pull Request resolved: https://github.com/pytorch/pytorch/pull/95722 Approved by: https://github.com/malfet, https://github.com/atalman
1230 lines
49 KiB
CMake
1230 lines
49 KiB
CMake
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
|
|
#cmake_policy(SET CMP0022 NEW)
|
|
#cmake_policy(SET CMP0023 NEW)
|
|
|
|
# Use compiler ID "AppleClang" instead of "Clang" for XCode.
|
|
# Not setting this sometimes makes XCode C compiler gets detected as "Clang",
|
|
# even when the C++ one is detected as "AppleClang".
|
|
cmake_policy(SET CMP0010 NEW)
|
|
cmake_policy(SET CMP0025 NEW)
|
|
|
|
# Enables CMake to set LTO on compilers other than Intel.
|
|
cmake_policy(SET CMP0069 NEW)
|
|
# Enable the policy for CMake subprojects.
|
|
# protobuf currently causes issues
|
|
#set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
|
|
|
|
# Suppress warning flags in default MSVC configuration. It's not
|
|
# mandatory that we do this (and we don't if cmake is old), but it's
|
|
# nice when it's possible, and it's possible on our Windows configs.
|
|
cmake_policy(SET CMP0092 NEW)
|
|
|
|
# ---[ Project and semantic versioning.
|
|
project(Torch CXX C)
|
|
|
|
if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
|
|
set(LINUX TRUE)
|
|
else()
|
|
set(LINUX FALSE)
|
|
endif()
|
|
|
|
set(CMAKE_INSTALL_MESSAGE NEVER)
|
|
|
|
# check and set CMAKE_CXX_STANDARD
|
|
string(FIND "${CMAKE_CXX_FLAGS}" "-std=c++" env_cxx_standard)
|
|
if(env_cxx_standard GREATER -1)
|
|
message(
|
|
WARNING "C++ standard version definition detected in environment variable."
|
|
"PyTorch requires -std=c++17. Please remove -std=c++ settings in your environment.")
|
|
endif()
|
|
set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.")
|
|
set(CMAKE_C_STANDARD 11 CACHE STRING "The C standard whose features are requested to build this target.")
|
|
|
|
# ---[ Utils
|
|
include(cmake/public/utils.cmake)
|
|
|
|
# --- [ Check that minimal gcc version is 9.4+
|
|
if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.4)
|
|
message(FATAL "GCC-9.4 or newer is required to compile PyTorch, but found ${CMAKE_CXX_COMPILER_VERSION}")
|
|
endif()
|
|
|
|
if(LINUX)
|
|
include(cmake/CheckAbi.cmake)
|
|
string(APPEND CMAKE_CXX_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}")
|
|
string(APPEND CMAKE_CUDA_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}")
|
|
if(${GLIBCXX_USE_CXX11_ABI} EQUAL 1)
|
|
set(CXX_STANDARD_REQUIRED ON)
|
|
else()
|
|
# Please note this is required in order to ensure compatibility between gcc 9 and gcc 7
|
|
# This could be removed when all Linux PyTorch binary builds are compiled by the same toolchain again
|
|
include(CheckCXXCompilerFlag)
|
|
append_cxx_flag_if_supported("-fabi-version=11" CMAKE_CXX_FLAGS)
|
|
endif()
|
|
endif()
|
|
|
|
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
|
set(CMAKE_LINK_WHAT_YOU_USE TRUE)
|
|
|
|
# One variable that determines whether the current cmake process is being run
|
|
# with the main Caffe2 library. This is useful for building modules - if
|
|
# modules are built with the main Caffe2 library then one does not need to do
|
|
# find caffe2 in the cmake script. One can usually guard it in some way like
|
|
# if(NOT CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO)
|
|
# find_package(Caffe2 REQUIRED)
|
|
# endif()
|
|
set(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO ON)
|
|
|
|
# Googletest's cmake files are going to set it on once they are processed. Let's
|
|
# set it at the very beginning so that the entire build is deterministic.
|
|
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
|
|
|
if(NOT DEFINED BLAS_SET_BY_USER)
|
|
if(DEFINED BLAS)
|
|
set(BLAS_SET_BY_USER TRUE)
|
|
else()
|
|
message(STATUS "Not forcing any particular BLAS to be found")
|
|
set(BLAS_SET_BY_USER FALSE)
|
|
endif()
|
|
set(BLAS_SET_BY_USER ${BLAS_SET_BY_USER} CACHE STRING "Marks whether BLAS was manually set by user or auto-detected")
|
|
endif()
|
|
|
|
# Apple specific
|
|
if(APPLE)
|
|
# These lines are an attempt to make find_package(cuda) pick up
|
|
# libcuda.dylib, and not cuda.framework. It doesn't work all
|
|
# the time, but it seems to help for some users.
|
|
# TODO: replace this with a more robust fix
|
|
set(CMAKE_FIND_FRAMEWORK LAST)
|
|
set(CMAKE_FIND_APPBUNDLE LAST)
|
|
|
|
# Get clang version on macOS
|
|
execute_process( COMMAND ${CMAKE_CXX_COMPILER} --version OUTPUT_VARIABLE clang_full_version_string )
|
|
string(REGEX REPLACE "Apple (.*) version ([0-9]+\\.[0-9]+).*" "\\2" CLANG_VERSION_STRING ${clang_full_version_string})
|
|
message( STATUS "CLANG_VERSION_STRING: " ${CLANG_VERSION_STRING} )
|
|
|
|
|
|
# RPATH stuff
|
|
set(CMAKE_MACOSX_RPATH ON)
|
|
if(NOT IOS)
|
|
# Determine if we can link against MPSGraph
|
|
set(MPS_FOUND OFF)
|
|
execute_process(
|
|
COMMAND bash -c "xcrun --sdk macosx --show-sdk-version"
|
|
RESULT_VARIABLE _exit_code
|
|
OUTPUT_VARIABLE _macosx_sdk_version
|
|
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
|
if(_exit_code EQUAL 0)
|
|
set(_MPS_supported_os_version OFF)
|
|
if(_macosx_sdk_version VERSION_GREATER_EQUAL 12.3)
|
|
set(_MPS_supported_os_version ON)
|
|
endif()
|
|
message(STATUS "sdk version: ${_macosx_sdk_version}, mps supported: ${_MPS_supported_os_version}")
|
|
execute_process(
|
|
COMMAND bash -c "xcrun --sdk macosx --show-sdk-path"
|
|
OUTPUT_VARIABLE _macosx_sdk_path
|
|
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
|
set(_SDK_SEARCH_PATH "${_macosx_sdk_path}/System/Library/Frameworks/")
|
|
set(_FRAMEWORK_SEARCH_PATH "/System/Library/Frameworks/")
|
|
|
|
find_library(_MPS_fwrk_path_ NAMES MetalPerformanceShadersGraph MetalPerformanceShaders PATHS ${_FRAMEWORK_SEARCH_PATH} NO_DEFAULT_PATH)
|
|
find_library(_MPS_sdk_path_ NAMES MetalPerformanceShadersGraph MetalPerformanceShaders PATHS ${_SDK_SEARCH_PATH} NO_DEFAULT_PATH)
|
|
|
|
if(_MPS_supported_os_version AND _MPS_fwrk_path_ AND _MPS_sdk_path_)
|
|
set(MPS_FOUND ON)
|
|
message(STATUS "MPSGraph framework found")
|
|
else()
|
|
message(STATUS "MPSGraph framework not found")
|
|
endif()
|
|
else()
|
|
message(STATUS "MPS: unable to get MacOS sdk version")
|
|
message(STATUS "MPSGraph framework not found")
|
|
endif()
|
|
endif()
|
|
endif()
|
|
|
|
set(CPU_AARCH64 OFF)
|
|
set(CPU_INTEL OFF)
|
|
|
|
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(AMD64|x86_64)")
|
|
set(CPU_INTEL ON)
|
|
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64)")
|
|
set(CPU_AARCH64 ON)
|
|
endif()
|
|
|
|
|
|
# For non-supported platforms, turn USE_DISTRIBUTED off by default.
|
|
# It is not tested and likely won't work without additional changes.
|
|
if(NOT LINUX AND NOT WIN32)
|
|
set(USE_DISTRIBUTED OFF CACHE STRING "Use distributed")
|
|
# On macOS, if USE_DISTRIBUTED is enabled (specified by the user),
|
|
# then make Gloo build with the libuv transport.
|
|
if(APPLE AND USE_DISTRIBUTED)
|
|
set(USE_LIBUV ON CACHE STRING "")
|
|
endif()
|
|
endif()
|
|
|
|
# ---[ Options.
|
|
# Note to developers: if you add an option below, make sure you also add it to
|
|
# cmake/Summary.cmake so that the summary prints out the option values.
|
|
include(CMakeDependentOption)
|
|
option(ATEN_NO_TEST "Do not build ATen test binaries" OFF)
|
|
option(BUILD_BINARY "Build C++ binaries" OFF)
|
|
option(BUILD_DOCS "Build Caffe2 documentation" OFF)
|
|
option(BUILD_CUSTOM_PROTOBUF "Build and use Caffe2's own protobuf under third_party" ON)
|
|
option(BUILD_PYTHON "Build Python binaries" ON)
|
|
option(BUILD_CAFFE2 "Master flag to build Caffe2" OFF)
|
|
option(BUILD_LITE_INTERPRETER "Master flag to build Lite Interpreter" OFF)
|
|
cmake_dependent_option(
|
|
BUILD_CAFFE2_OPS "Build Caffe2 operators" ON
|
|
"BUILD_CAFFE2" OFF)
|
|
option(BUILD_SHARED_LIBS "Build libcaffe2.so" ON)
|
|
cmake_dependent_option(
|
|
CAFFE2_LINK_LOCAL_PROTOBUF "If set, build protobuf inside libcaffe2.so." ON
|
|
"BUILD_SHARED_LIBS AND BUILD_CUSTOM_PROTOBUF" OFF)
|
|
cmake_dependent_option(
|
|
CAFFE2_USE_MSVC_STATIC_RUNTIME "Using MSVC static runtime libraries" ON
|
|
"NOT BUILD_SHARED_LIBS" OFF)
|
|
option(BUILD_TEST "Build C++ test binaries (need gtest and gbenchmark)" OFF)
|
|
option(BUILD_AOT_INDUCTOR_TEST "Build C++ test binaries for aot-inductor" OFF)
|
|
option(BUILD_STATIC_RUNTIME_BENCHMARK "Build C++ binaries for static runtime benchmarks (need gbenchmark)" OFF)
|
|
option(BUILD_TENSOREXPR_BENCHMARK "Build C++ binaries for tensorexpr benchmarks (need gbenchmark)" OFF)
|
|
option(BUILD_MOBILE_BENCHMARK "Build C++ test binaries for mobile (ARM) targets(need gtest and gbenchmark)" OFF)
|
|
option(BUILD_MOBILE_TEST "Build C++ test binaries for mobile (ARM) targets(need gtest and gbenchmark)" OFF)
|
|
option(BUILD_JNI "Build JNI bindings" OFF)
|
|
option(BUILD_MOBILE_AUTOGRAD "Build autograd function in mobile build (in development)" OFF)
|
|
cmake_dependent_option(
|
|
INSTALL_TEST "Install test binaries if BUILD_TEST is on" ON
|
|
"BUILD_TEST" OFF)
|
|
option(USE_CPP_CODE_COVERAGE "Compile C/C++ with code coverage flags" OFF)
|
|
option(USE_COLORIZE_OUTPUT "Colorize output during compilation" ON)
|
|
option(USE_ASAN "Use Address+Undefined Sanitizers" OFF)
|
|
option(USE_TSAN "Use Thread Sanitizer" OFF)
|
|
option(USE_CUDA "Use CUDA" ON)
|
|
cmake_dependent_option(
|
|
BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON "USE_CUDA AND LINUX AND BUILD_PYTHON" OFF)
|
|
cmake_dependent_option(USE_ROCM "Use ROCm" ON "LINUX" OFF)
|
|
option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF)
|
|
cmake_dependent_option(
|
|
USE_CUDNN "Use cuDNN" ON
|
|
"USE_CUDA" OFF)
|
|
cmake_dependent_option(
|
|
USE_STATIC_CUDNN "Use cuDNN static libraries" OFF
|
|
"USE_CUDNN" OFF)
|
|
cmake_dependent_option(
|
|
USE_CUSPARSELT "Use cuSPARSELt" ON
|
|
"USE_CUDA" OFF)
|
|
option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" ON)
|
|
option(USE_KINETO "Use Kineto profiling library" ON)
|
|
option(USE_CUPTI_SO "Use CUPTI as a shared library" ON)
|
|
option(USE_FAKELOWP "Use FakeLowp operators" OFF)
|
|
option(USE_FFMPEG "Use ffmpeg" OFF)
|
|
option(USE_GFLAGS "Use GFLAGS" OFF)
|
|
option(USE_GLOG "Use GLOG" OFF)
|
|
option(USE_LEVELDB "Use LEVELDB" OFF)
|
|
option(USE_LITE_PROTO "Use lite protobuf instead of full." OFF)
|
|
option(USE_LMDB "Use LMDB" OFF)
|
|
option(USE_MAGMA "Use MAGMA" ON)
|
|
option(USE_METAL "Use Metal for Caffe2 iOS build" ON)
|
|
option(USE_PYTORCH_METAL "Use Metal for PyTorch iOS build" OFF)
|
|
option(USE_PYTORCH_METAL_EXPORT "Export Metal models on MacOSX desktop" OFF)
|
|
option(USE_NATIVE_ARCH "Use -march=native" OFF)
|
|
cmake_dependent_option(
|
|
USE_MPS "Use MPS for macOS build" ON
|
|
"MPS_FOUND" OFF)
|
|
cmake_dependent_option(
|
|
USE_NCCL "Use NCCL" ON
|
|
"USE_CUDA OR USE_ROCM;UNIX;NOT APPLE" OFF)
|
|
cmake_dependent_option(USE_RCCL "Use RCCL" ON
|
|
USE_NCCL OFF)
|
|
cmake_dependent_option(
|
|
USE_STATIC_NCCL "Use static NCCL" OFF
|
|
"USE_NCCL" OFF)
|
|
cmake_dependent_option(
|
|
USE_SYSTEM_NCCL "Use system-wide NCCL" OFF
|
|
"USE_NCCL" OFF)
|
|
option(USE_NNAPI "Use NNAPI" OFF)
|
|
option(USE_NNPACK "Use NNPACK" ON)
|
|
cmake_dependent_option(
|
|
USE_NUMA "Use NUMA. Only available on Linux." ON
|
|
"LINUX" OFF)
|
|
cmake_dependent_option(
|
|
USE_NVRTC "Use NVRTC. Only available if USE_CUDA is on." OFF
|
|
"USE_CUDA" OFF)
|
|
option(USE_NUMPY "Use NumPy" ON)
|
|
option(USE_OBSERVERS "Use observers module." OFF)
|
|
option(USE_OPENCL "Use OpenCL" OFF)
|
|
option(USE_OPENCV "Use OpenCV" OFF)
|
|
option(USE_OPENMP "Use OpenMP for parallel code" ON)
|
|
option(USE_PRECOMPILED_HEADERS "Use pre-compiled headers to accelerate build." OFF)
|
|
|
|
option(USE_PROF "Use profiling" OFF)
|
|
option(USE_QNNPACK "Use QNNPACK (quantized 8-bit operators)" ON)
|
|
option(USE_PYTORCH_QNNPACK "Use ATen/QNNPACK (quantized 8-bit operators)" ON)
|
|
option(USE_REDIS "Use Redis" OFF)
|
|
option(USE_ROCKSDB "Use RocksDB" OFF)
|
|
option(USE_SNPE "Use Qualcomm's SNPE library" OFF)
|
|
option(USE_SYSTEM_EIGEN_INSTALL
|
|
"Use system Eigen instead of the one under third_party" OFF)
|
|
option(USE_TENSORRT "Using Nvidia TensorRT library" OFF)
|
|
cmake_dependent_option(
|
|
USE_VALGRIND "Use Valgrind. Only available on Linux." ON
|
|
"LINUX" OFF)
|
|
|
|
if(NOT DEFINED USE_VULKAN)
|
|
cmake_dependent_option(
|
|
USE_VULKAN "Use Vulkan GPU backend" ON
|
|
"ANDROID" OFF)
|
|
endif()
|
|
|
|
option(USE_SLEEF_FOR_ARM_VEC256 "Use sleef for arm" OFF)
|
|
option(USE_SOURCE_DEBUG_ON_MOBILE "Enable " ON)
|
|
option(USE_LITE_INTERPRETER_PROFILER "Enable " ON)
|
|
option(USE_VULKAN_FP16_INFERENCE "Vulkan - Use fp16 inference" OFF)
|
|
option(USE_VULKAN_RELAXED_PRECISION "Vulkan - Use relaxed precision math in the kernels (mediump)" OFF)
|
|
# option USE_XNNPACK: try to enable xnnpack by default.
|
|
option(USE_XNNPACK "Use XNNPACK" ON)
|
|
option(USE_ZMQ "Use ZMQ" OFF)
|
|
option(USE_ZSTD "Use ZSTD" OFF)
|
|
option(USE_ROCM_KERNEL_ASSERT "Use Kernel Assert for ROCm" OFF)
|
|
# Ensure that an ITT build is the default for x86 CPUs
|
|
cmake_dependent_option(
|
|
USE_ITT "Use Intel(R) VTune Profiler ITT functionality" ON
|
|
"CPU_INTEL" OFF)
|
|
# Ensure that an MKLDNN build is the default for x86 CPUs
|
|
# but optional for AArch64 (dependent on -DUSE_MKLDNN).
|
|
cmake_dependent_option(
|
|
USE_MKLDNN "Use MKLDNN. Only available on x86, x86_64, and AArch64." "${CPU_INTEL}"
|
|
"CPU_INTEL OR CPU_AARCH64" OFF)
|
|
cmake_dependent_option(
|
|
USE_MKLDNN_ACL "Use Compute Library for the Arm architecture." OFF
|
|
"USE_MKLDNN AND CPU_AARCH64" OFF)
|
|
set(MKLDNN_ENABLE_CONCURRENT_EXEC ${USE_MKLDNN})
|
|
cmake_dependent_option(
|
|
USE_MKLDNN_CBLAS "Use CBLAS in MKLDNN" OFF
|
|
"USE_MKLDNN" OFF)
|
|
option(USE_STATIC_MKL "Prefer to link with MKL statically (Unix only)" OFF)
|
|
option(USE_DISTRIBUTED "Use distributed" ON)
|
|
cmake_dependent_option(
|
|
USE_MPI "Use MPI for Caffe2. Only available if USE_DISTRIBUTED is on." ON
|
|
"USE_DISTRIBUTED" OFF)
|
|
cmake_dependent_option(
|
|
USE_UCC "Use UCC. Only available if USE_DISTRIBUTED is on." OFF
|
|
"USE_DISTRIBUTED" OFF)
|
|
cmake_dependent_option(
|
|
USE_SYSTEM_UCC "Use system-wide UCC" OFF
|
|
"USE_UCC" OFF)
|
|
cmake_dependent_option(
|
|
USE_C10D_UCC "USE C10D UCC" ON "USE_DISTRIBUTED;USE_UCC" OFF)
|
|
cmake_dependent_option(
|
|
USE_GLOO "Use Gloo. Only available if USE_DISTRIBUTED is on." ON
|
|
"USE_DISTRIBUTED" OFF)
|
|
cmake_dependent_option(
|
|
USE_GLOO_WITH_OPENSSL "Use Gloo with OpenSSL. Only available if USE_GLOO is on." OFF
|
|
"USE_GLOO AND LINUX AND NOT INTERN_BUILD_MOBILE" OFF)
|
|
cmake_dependent_option(
|
|
USE_C10D_GLOO "USE C10D GLOO" ON "USE_DISTRIBUTED;USE_GLOO" OFF)
|
|
cmake_dependent_option(
|
|
USE_C10D_NCCL "USE C10D NCCL" ON "USE_DISTRIBUTED;USE_NCCL" OFF)
|
|
cmake_dependent_option(
|
|
USE_C10D_MPI "USE C10D MPI" ON "USE_DISTRIBUTED;USE_MPI" OFF)
|
|
cmake_dependent_option(
|
|
USE_TENSORPIPE "Use TensorPipe. Only available if USE_DISTRIBUTED is on." ON
|
|
"USE_DISTRIBUTED" OFF)
|
|
option(USE_TBB "Use TBB (Deprecated)" OFF)
|
|
cmake_dependent_option(
|
|
USE_SYSTEM_TBB "Use system-provided Intel TBB." OFF "USE_TBB" OFF)
|
|
option(ONNX_ML "Enable traditional ONNX ML API." ON)
|
|
option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" OFF)
|
|
option(BUILD_LIBTORCH_CPU_WITH_DEBUG "Enable RelWithDebInfo for libtorch_cpu target only" OFF)
|
|
cmake_dependent_option(USE_CCACHE "Attempt using CCache to wrap the compilation" ON "UNIX" OFF)
|
|
option(WERROR "Build with -Werror supported by the compiler" OFF)
|
|
option(DEBUG_CUDA "When compiling DEBUG, also attempt to compile CUDA with debug flags (may cause nvcc to OOM)" OFF)
|
|
option(USE_COREML_DELEGATE "Use the CoreML backend through delegate APIs" OFF)
|
|
option(USE_PER_OPERATOR_HEADERS "Whether ATen should generate separate headers for each operator" ON)
|
|
cmake_dependent_option(
|
|
BUILD_LAZY_TS_BACKEND "Build the lazy Torchscript backend, not compatible with mobile builds" ON
|
|
"NOT INTERN_BUILD_MOBILE" OFF)
|
|
cmake_dependent_option(
|
|
BUILD_FUNCTORCH "Build Functorch" ON "BUILD_PYTHON" OFF)
|
|
|
|
option(USE_MIMALLOC "Use mimalloc" OFF)
|
|
# Enable third party mimalloc library to improve memory allocation performance on Windows.
|
|
if(WIN32)
|
|
set(USE_MIMALLOC ON)
|
|
endif()
|
|
|
|
if(USE_CCACHE)
|
|
find_program(CCACHE_PROGRAM ccache)
|
|
if(CCACHE_PROGRAM)
|
|
set(CMAKE_C_COMPILER_LAUNCHER "${CCACHE_PROGRAM}" CACHE STRING "C compiler launcher")
|
|
set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE_PROGRAM}" CACHE STRING "CXX compiler launcher")
|
|
set(CMAKE_CUDA_COMPILER_LAUNCHER "${CCACHE_PROGRAM}" CACHE STRING "CUDA compiler launcher")
|
|
else()
|
|
message(STATUS "Could not find ccache. Consider installing ccache to speed up compilation.")
|
|
endif()
|
|
endif()
|
|
|
|
# Since TensorPipe does not support Windows, set it to OFF when WIN32 detected
|
|
# On Windows platform, if user does not install libuv in build conda env and
|
|
# does not set libuv_ROOT environment variable. Set USE_DISTRIBUTED to OFF.
|
|
if(WIN32)
|
|
set(USE_TENSORPIPE OFF)
|
|
message(WARNING "TensorPipe cannot be used on Windows. Set it to OFF")
|
|
|
|
if(USE_DISTRIBUTED AND NOT DEFINED ENV{libuv_ROOT})
|
|
find_library(
|
|
libuv_tmp_LIBRARY
|
|
NAMES uv libuv
|
|
HINTS $ENV{CONDA_PREFIX}\\Library $ENV{PREFIX}\\Library
|
|
PATH_SUFFIXES lib
|
|
NO_DEFAULT_PATH)
|
|
if(NOT libuv_tmp_LIBRARY)
|
|
set(USE_DISTRIBUTED OFF)
|
|
set(USE_GLOO OFF)
|
|
message(
|
|
WARNING "Libuv is not installed in current conda env. Set USE_DISTRIBUTED to OFF. "
|
|
"Please run command 'conda install -c conda-forge libuv=1.39' to install libuv.")
|
|
else()
|
|
set(ENV{libuv_ROOT} ${libuv_tmp_LIBRARY}/../../)
|
|
endif()
|
|
endif()
|
|
endif()
|
|
|
|
if(USE_GLOO_WITH_OPENSSL)
|
|
set(USE_TCP_OPENSSL_LOAD ON CACHE STRING "")
|
|
endif()
|
|
|
|
# Linux distributions do not want too many embedded sources, in that sense we
|
|
# need to be able to build pytorch with an (almost) empty third_party
|
|
# directory.
|
|
# USE_SYSTEM_LIBS is a shortcut variable to toggle all the # USE_SYSTEM_*
|
|
# variables on. Individual USE_SYSTEM_* variables can be toggled with
|
|
# USE_SYSTEM_LIBS being "OFF".
|
|
option(USE_SYSTEM_LIBS "Use all available system-provided libraries." OFF)
|
|
option(USE_SYSTEM_CPUINFO "Use system-provided cpuinfo." OFF)
|
|
option(USE_SYSTEM_SLEEF "Use system-provided sleef." OFF)
|
|
option(USE_SYSTEM_GLOO "Use system-provided gloo." OFF)
|
|
option(USE_SYSTEM_FP16 "Use system-provided fp16." OFF)
|
|
option(USE_SYSTEM_PYBIND11 "Use system-provided PyBind11." OFF)
|
|
option(USE_SYSTEM_PTHREADPOOL "Use system-provided pthreadpool." OFF)
|
|
option(USE_SYSTEM_PSIMD "Use system-provided psimd." OFF)
|
|
option(USE_SYSTEM_FXDIV "Use system-provided fxdiv." OFF)
|
|
option(USE_SYSTEM_BENCHMARK "Use system-provided google benchmark." OFF)
|
|
option(USE_SYSTEM_ONNX "Use system-provided onnx." OFF)
|
|
option(USE_SYSTEM_XNNPACK "Use system-provided xnnpack." OFF)
|
|
option(USE_SYSTEM_ZSTD "Use system-provided zstd." OFF)
|
|
option(USE_GOLD_LINKER "Use ld.gold to link" OFF)
|
|
if(USE_SYSTEM_LIBS)
|
|
set(USE_SYSTEM_CPUINFO ON)
|
|
set(USE_SYSTEM_SLEEF ON)
|
|
set(USE_SYSTEM_GLOO ON)
|
|
set(BUILD_CUSTOM_PROTOBUF OFF)
|
|
set(USE_SYSTEM_EIGEN_INSTALL ON)
|
|
set(USE_SYSTEM_FP16 ON)
|
|
set(USE_SYSTEM_PTHREADPOOL ON)
|
|
set(USE_SYSTEM_PSIMD ON)
|
|
set(USE_SYSTEM_FXDIV ON)
|
|
set(USE_SYSTEM_BENCHMARK ON)
|
|
set(USE_SYSTEM_ONNX ON)
|
|
set(USE_SYSTEM_XNNPACK ON)
|
|
set(USE_SYSTEM_PYBIND11 ON)
|
|
if(USE_NCCL)
|
|
set(USE_SYSTEM_NCCL ON)
|
|
endif()
|
|
if(USE_TBB)
|
|
set(USE_SYSTEM_TBB ON)
|
|
endif()
|
|
if(USE_ZSTD)
|
|
set(USE_SYSTEM_ZSTD ON)
|
|
endif()
|
|
endif()
|
|
|
|
# Used when building Caffe2 through setup.py
|
|
option(BUILDING_WITH_TORCH_LIBS "Tell cmake if Caffe2 is being built alongside torch libs" ON)
|
|
|
|
# /Z7 override option
|
|
# When generating debug symbols, CMake default to use the flag /Zi.
|
|
# However, it is not compatible with sccache. So we rewrite it off.
|
|
# But some users don't use sccache; this override is for them.
|
|
cmake_dependent_option(
|
|
MSVC_Z7_OVERRIDE "Work around sccache bug by replacing /Zi and /ZI with /Z7 when using MSVC (if you are not using sccache, you can turn this OFF)" ON
|
|
"MSVC" OFF)
|
|
|
|
if(NOT USE_SYSTEM_ONNX)
|
|
set(ONNX_NAMESPACE "onnx_torch" CACHE STRING "A namespace for ONNX; needed to build with other frameworks that share ONNX.")
|
|
else()
|
|
set(ONNX_NAMESPACE "onnx" CACHE STRING "A namespace for ONNX; needed to build with other frameworks that share ONNX.")
|
|
endif()
|
|
set(SELECTED_OP_LIST "" CACHE STRING
|
|
"Path to the yaml file that contains the list of operators to include for custom build. Include all operators by default.")
|
|
option(
|
|
STATIC_DISPATCH_BACKEND
|
|
"Name of the backend for which static dispatch code is generated, e.g.: CPU."
|
|
"")
|
|
option(USE_LIGHTWEIGHT_DISPATCH "Enable codegen unboxing for ATen ops, need to work with static dispatch in order to work properly." OFF)
|
|
if(USE_LIGHTWEIGHT_DISPATCH AND NOT STATIC_DISPATCH_BACKEND)
|
|
message(FATAL_ERROR "Need to enable static dispatch after enabling USE_LIGHTWEIGHT_DISPATCH.")
|
|
endif()
|
|
option(
|
|
TRACING_BASED
|
|
"Master flag to build Lite Interpreter with tracing build option"
|
|
OFF)
|
|
option(BUILD_EXECUTORCH "Master flag to build Executorch" ON)
|
|
# This is a fix for a rare build issue on Ubuntu:
|
|
# symbol lookup error: miniconda3/envs/pytorch-py3.7/lib/libmkl_intel_lp64.so: undefined symbol: mkl_blas_dsyrk
|
|
# https://software.intel.com/en-us/articles/symbol-lookup-error-when-linking-intel-mkl-with-gcc-on-ubuntu
|
|
if(LINUX)
|
|
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed")
|
|
endif()
|
|
|
|
if(MSVC)
|
|
# MSVC by default does not apply the correct __cplusplus version as specified by the C++ standard
|
|
# because MSVC is not a completely compliant implementation. This option forces MSVC to use the
|
|
# appropriate value given the requested --std option. This fixes a compilation issue mismatch
|
|
# between GCC/Clang and MSVC.
|
|
#
|
|
# See:
|
|
# * https://learn.microsoft.com/en-us/cpp/build/reference/zc-cplusplus?view=msvc-170
|
|
# * https://en.cppreference.com/w/cpp/preprocessor/replace#Predefined_macros
|
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zc:__cplusplus")
|
|
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /Zc:__cplusplus")
|
|
|
|
set(CMAKE_NINJA_CMCLDEPS_RC OFF)
|
|
foreach(flag_var
|
|
CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
|
|
CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
|
|
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
|
|
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
|
|
# Replace /Zi and /ZI with /Z7
|
|
if(MSVC_Z7_OVERRIDE)
|
|
if(${flag_var} MATCHES "/Z[iI]")
|
|
string(REGEX REPLACE "/Z[iI]" "/Z7" ${flag_var} "${${flag_var}}")
|
|
endif(${flag_var} MATCHES "/Z[iI]")
|
|
endif(MSVC_Z7_OVERRIDE)
|
|
|
|
if(${CAFFE2_USE_MSVC_STATIC_RUNTIME})
|
|
if(${flag_var} MATCHES "/MD")
|
|
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
|
|
endif(${flag_var} MATCHES "/MD")
|
|
else()
|
|
if(${flag_var} MATCHES "/MT")
|
|
string(REGEX REPLACE "/MT" "/MD" ${flag_var} "${${flag_var}}")
|
|
endif()
|
|
endif()
|
|
|
|
# /bigobj increases number of sections in .obj file, which is needed to link
|
|
# against libraries in Python 2.7 under Windows
|
|
# For Visual Studio generators, if /MP is not added, then we may need
|
|
# to add /MP to the flags.
|
|
# For other generators like ninja, we don't need to add /MP because it is
|
|
# already handled by the generator itself.
|
|
if(CMAKE_GENERATOR MATCHES "Visual Studio" AND NOT ${flag_var} MATCHES "/MP")
|
|
set(${flag_var} "${${flag_var}} /MP /bigobj")
|
|
else()
|
|
set(${flag_var} "${${flag_var}} /bigobj")
|
|
endif()
|
|
endforeach(flag_var)
|
|
|
|
foreach(flag_var
|
|
CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_MINSIZEREL
|
|
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_MINSIZEREL)
|
|
if(${flag_var} MATCHES "/Z[iI7]")
|
|
string(REGEX REPLACE "/Z[iI7]" "" ${flag_var} "${${flag_var}}")
|
|
endif()
|
|
endforeach(flag_var)
|
|
|
|
foreach(flag_var
|
|
CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO
|
|
CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO
|
|
CMAKE_SHARED_LINKER_FLAGS_DEBUG CMAKE_STATIC_LINKER_FLAGS_DEBUG
|
|
CMAKE_EXE_LINKER_FLAGS_DEBUG CMAKE_MODULE_LINKER_FLAGS_DEBUG)
|
|
# Switch off incremental linking in debug/relwithdebinfo builds
|
|
if(${flag_var} MATCHES "/INCREMENTAL" AND NOT ${flag_var} MATCHES "/INCREMENTAL:NO")
|
|
string(REGEX REPLACE "/INCREMENTAL" "/INCREMENTAL:NO" ${flag_var} "${${flag_var}}")
|
|
endif()
|
|
endforeach(flag_var)
|
|
|
|
foreach(flag_var
|
|
CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS
|
|
CMAKE_EXE_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS)
|
|
string(APPEND ${flag_var} " /ignore:4049 /ignore:4217 /ignore:4099")
|
|
endforeach(flag_var)
|
|
|
|
foreach(flag_var
|
|
CMAKE_SHARED_LINKER_FLAGS)
|
|
# https://github.com/pytorch/pytorch/issues/91933: Don't set the manifest filename
|
|
# explicitly helps fix the linker error when linking torch_python.dll. The manifest
|
|
# file would still be there in the correct format torch_python.dll.manifest
|
|
if(${flag_var} MATCHES "/MANIFESTFILE:.*\\.manifest")
|
|
string(REGEX REPLACE "/MANIFESTFILE:.*\\.manifest" "" ${flag_var} "${${flag_var}}")
|
|
endif()
|
|
endforeach(flag_var)
|
|
|
|
# Try harder
|
|
string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler /w -w")
|
|
|
|
string(APPEND CMAKE_CXX_FLAGS " /FS")
|
|
string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler /FS")
|
|
endif(MSVC)
|
|
|
|
string(APPEND CMAKE_CUDA_FLAGS " -Xfatbin -compress-all")
|
|
|
|
# Set INTERN_BUILD_MOBILE for all mobile builds. Components that are not
|
|
# applicable to mobile are disabled by this variable.
|
|
# Setting `BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN` environment variable can
|
|
# force it to do mobile build with host toolchain - which is useful for testing
|
|
# purpose.
|
|
if(ANDROID OR IOS OR DEFINED ENV{BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN})
|
|
set(INTERN_BUILD_MOBILE ON)
|
|
message(WARNING "INTERN_BUILD_MOBILE is on, disabling BUILD_LAZY_TS_BACKEND")
|
|
set(BUILD_LAZY_TS_BACKEND OFF)
|
|
|
|
# Set -ffunction-sections and -fdata-sections so that each method has its own
|
|
# text section. This allows the linker to remove unused section when the flag
|
|
# -Wl,-gc-sections is provided at link time.
|
|
string(APPEND CMAKE_CXX_FLAGS " -ffunction-sections")
|
|
string(APPEND CMAKE_C_FLAGS " -ffunction-sections")
|
|
string(APPEND CMAKE_CXX_FLAGS " -fdata-sections")
|
|
string(APPEND CMAKE_C_FLAGS " -fdata-sections")
|
|
|
|
# Please note that the use of the following flags is required when linking
|
|
# against libtorch_cpu.a for mobile builds.
|
|
# -Wl,--whole-archive -ltorch_cpu -Wl,--no-whole-archive
|
|
#
|
|
# This allows global constructors to be included and run. Global
|
|
# constructors are used for operator/kernel registration with the
|
|
# PyTorch Dispatcher.
|
|
|
|
if(DEFINED ENV{BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN})
|
|
# C10_MOBILE is derived from Android/iOS toolchain macros in
|
|
# c10/macros/Macros.h, so it needs to be explicitly set here.
|
|
string(APPEND CMAKE_CXX_FLAGS " -DC10_MOBILE")
|
|
endif()
|
|
|
|
if(DEFINED ENV{PYTORCH_MOBILE_TRIM_DISPATCH_KEY_SET})
|
|
# If PYTORCH_MOBILE_TRIM_DISPATCH_KEY_SET is defined (env var),
|
|
# then define C10_MOBILE_TRIM_DISPATCH_KEYS, which limits the
|
|
# number of dispatch keys in OperatorEntry::dispatchTable_
|
|
# to reduce peak memory during library initialization.
|
|
string(APPEND CMAKE_CXX_FLAGS " -DC10_MOBILE_TRIM_DISPATCH_KEYS")
|
|
endif()
|
|
endif()
|
|
|
|
# INTERN_BUILD_ATEN_OPS is used to control whether to build ATen/TH operators.
|
|
set(INTERN_BUILD_ATEN_OPS ON)
|
|
|
|
if(NOT DEFINED USE_BLAS)
|
|
set(USE_BLAS ON)
|
|
endif()
|
|
|
|
# Build libtorch mobile library, which contains ATen/TH ops and native support for
|
|
# TorchScript model, but doesn't contain not-yet-unified caffe2 ops;
|
|
if(INTERN_BUILD_MOBILE)
|
|
if(NOT BUILD_SHARED_LIBS AND NOT "${SELECTED_OP_LIST}" STREQUAL "")
|
|
string(APPEND CMAKE_CXX_FLAGS " -DNO_EXPORT")
|
|
endif()
|
|
if(BUILD_MOBILE_AUTOGRAD)
|
|
set(INTERN_DISABLE_AUTOGRAD OFF)
|
|
else()
|
|
set(INTERN_DISABLE_AUTOGRAD ON)
|
|
endif()
|
|
set(BUILD_PYTHON OFF)
|
|
set(BUILD_FUNCTORCH OFF)
|
|
set(BUILD_CAFFE2_OPS OFF)
|
|
set(USE_DISTRIBUTED OFF)
|
|
set(NO_API ON)
|
|
set(USE_FBGEMM OFF)
|
|
set(USE_QNNPACK OFF)
|
|
set(INTERN_DISABLE_ONNX ON)
|
|
if(USE_BLAS)
|
|
set(INTERN_USE_EIGEN_BLAS ON)
|
|
else()
|
|
set(INTERN_USE_EIGEN_BLAS OFF)
|
|
endif()
|
|
# Disable developing mobile interpreter for actual mobile build.
|
|
# Enable it elsewhere to capture build error.
|
|
set(INTERN_DISABLE_MOBILE_INTERP ON)
|
|
endif()
|
|
|
|
# ---[ Version numbers for generated libraries
|
|
file(READ version.txt TORCH_DEFAULT_VERSION)
|
|
# Strip trailing newline
|
|
string(REGEX REPLACE "\n$" "" TORCH_DEFAULT_VERSION "${TORCH_DEFAULT_VERSION}")
|
|
if("${TORCH_DEFAULT_VERSION} " STREQUAL " ")
|
|
message(WARNING "Could not get version from base 'version.txt'")
|
|
# If we can't get the version from the version file we should probably
|
|
# set it to something non-sensical like 0.0.0
|
|
set(TORCH_DEFAULT_VERSION, "0.0.0")
|
|
endif()
|
|
set(TORCH_BUILD_VERSION "${TORCH_DEFAULT_VERSION}" CACHE STRING "Torch build version")
|
|
if(DEFINED ENV{PYTORCH_BUILD_VERSION})
|
|
set(TORCH_BUILD_VERSION "$ENV{PYTORCH_BUILD_VERSION}"
|
|
CACHE STRING "Torch build version" FORCE)
|
|
endif()
|
|
if(NOT TORCH_BUILD_VERSION)
|
|
# An empty string was specified so force version to the default
|
|
set(TORCH_BUILD_VERSION "${TORCH_DEFAULT_VERSION}"
|
|
CACHE STRING "Torch build version" FORCE)
|
|
endif()
|
|
caffe2_parse_version_str(TORCH ${TORCH_BUILD_VERSION})
|
|
caffe2_parse_version_str(CAFFE2 ${TORCH_BUILD_VERSION})
|
|
set(TORCH_SOVERSION "${TORCH_VERSION_MAJOR}.${TORCH_VERSION_MINOR}")
|
|
|
|
# ---[ CMake scripts + modules
|
|
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
|
|
|
|
# ---[ CMake build directories
|
|
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
|
|
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
|
|
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
|
|
|
enable_testing()
|
|
|
|
# ---[ Build variables set within the cmake tree
|
|
include(cmake/BuildVariables.cmake)
|
|
set(CAFFE2_ALLOWLIST "" CACHE STRING "A allowlist file of files that one should build.")
|
|
|
|
# Set default build type
|
|
if(NOT CMAKE_BUILD_TYPE)
|
|
message(STATUS "Build type not set - defaulting to Release")
|
|
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build from: Debug Release RelWithDebInfo MinSizeRel Coverage." FORCE)
|
|
endif()
|
|
|
|
# The below means we are cross compiling for arm64 or x86_64 on MacOSX
|
|
if(NOT IOS AND CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64)$")
|
|
set(CROSS_COMPILING_MACOSX TRUE)
|
|
# We need to compile a universal protoc to not fail protobuf build
|
|
# We set CMAKE_TRY_COMPILE_TARGET_TYPE to STATIC_LIBRARY (vs executable) to succeed the cmake compiler check for cross-compiling
|
|
set(protoc_build_command "./scripts/build_host_protoc.sh --other-flags -DCMAKE_OSX_ARCHITECTURES=\"x86_64;arm64\" -DCMAKE_TRY_COMPILE_TARGET_TYPE=STATIC_LIBRARY -DCMAKE_C_COMPILER_WORKS=1 -DCMAKE_CXX_COMPILER_WORKS=1")
|
|
# We write to a temp scriptfile because CMake COMMAND dislikes double quotes in commands
|
|
file(WRITE ${PROJECT_SOURCE_DIR}/tmp_protoc_script.sh "#!/bin/bash\n${protoc_build_command}")
|
|
file(COPY ${PROJECT_SOURCE_DIR}/tmp_protoc_script.sh DESTINATION ${PROJECT_SOURCE_DIR}/scripts/ FILE_PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ)
|
|
execute_process(COMMAND ./scripts/tmp_protoc_script.sh
|
|
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
|
RESULT_VARIABLE BUILD_HOST_PROTOC_RESULT)
|
|
file(REMOVE ${PROJECT_SOURCE_DIR}/tmp_protoc_script.sh ${PROJECT_SOURCE_DIR}/scripts/tmp_protoc_script.sh)
|
|
if(NOT BUILD_HOST_PROTOC_RESULT EQUAL "0")
|
|
message(FATAL_ERROR "Could not compile universal protoc.")
|
|
endif()
|
|
set(PROTOBUF_PROTOC_EXECUTABLE "${PROJECT_SOURCE_DIR}/build_host_protoc/bin/protoc")
|
|
set(CAFFE2_CUSTOM_PROTOC_EXECUTABLE "${PROJECT_SOURCE_DIR}/build_host_protoc/bin/protoc")
|
|
endif()
|
|
|
|
# ---[ Misc checks to cope with various compiler modes
|
|
include(cmake/MiscCheck.cmake)
|
|
|
|
# External projects
|
|
include(ExternalProject)
|
|
|
|
# ---[ Dependencies
|
|
# ---[ FBGEMM doesn't work on x86 32bit and CMAKE_SYSTEM_PROCESSOR thinks its 64bit
|
|
if(USE_FBGEMM AND ((CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND CMAKE_SIZEOF_VOID_P EQUAL 4) OR CMAKE_SYSTEM_PROCESSOR STREQUAL "x86"))
|
|
set(USE_FBGEMM OFF)
|
|
endif()
|
|
|
|
set(BUILD_ONEDNN_GRAPH OFF)
|
|
|
|
if(MSVC)
|
|
# The source code is in utf-8 encoding
|
|
append_cxx_flag_if_supported("/utf-8" CMAKE_CXX_FLAGS)
|
|
endif()
|
|
|
|
include(cmake/Dependencies.cmake)
|
|
|
|
# Moved this cmake set option down here because CMAKE_CUDA_COMPILER_VERSION is not avaialble until now
|
|
cmake_dependent_option(
|
|
USE_FLASH_ATTENTION
|
|
"Whether to build the flash_attention kernel for scaled dot product attention" ON
|
|
"USE_CUDA AND NOT ROCM AND NOT MSVC AND NOT CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.6" OFF)
|
|
|
|
# Flash Attention2 will error while building for sm52 while Mem Eff Attention won't
|
|
cmake_dependent_option(
|
|
USE_MEM_EFF_ATTENTION
|
|
"Enable memory-efficient attention for scaled dot product attention" ON
|
|
"USE_CUDA AND NOT ROCM AND NOT CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.6" OFF)
|
|
|
|
if(DEBUG_CUDA)
|
|
string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -lineinfo")
|
|
string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -lineinfo")
|
|
# CUDA-12.1 crashes when trying to compile with --source-in-ptx
|
|
# See https://github.com/pytorch/pytorch/issues/102372#issuecomment-1572526893
|
|
if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 12.1)
|
|
string(APPEND CMAKE_CUDA_FLAGS_DEBUG " --source-in-ptx")
|
|
string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " --source-in-ptx")
|
|
endif()
|
|
endif(DEBUG_CUDA)
|
|
|
|
|
|
if(USE_FBGEMM)
|
|
string(APPEND CMAKE_CXX_FLAGS " -DUSE_FBGEMM")
|
|
endif()
|
|
|
|
if(USE_QNNPACK)
|
|
string(APPEND CMAKE_CXX_FLAGS " -DUSE_QNNPACK")
|
|
endif()
|
|
|
|
if(USE_PYTORCH_QNNPACK)
|
|
string(APPEND CMAKE_CXX_FLAGS " -DUSE_PYTORCH_QNNPACK")
|
|
endif()
|
|
|
|
if(USE_SLEEF_FOR_ARM_VEC256)
|
|
string(APPEND CMAKE_CXX_FLAGS " -DAT_BUILD_ARM_VEC256_WITH_SLEEF")
|
|
endif()
|
|
|
|
if(USE_XNNPACK)
|
|
string(APPEND CMAKE_CXX_FLAGS " -DUSE_XNNPACK")
|
|
endif()
|
|
|
|
if(USE_VULKAN)
|
|
string(APPEND CMAKE_CXX_FLAGS " -DUSE_VULKAN")
|
|
string(APPEND CMAKE_CXX_FLAGS " -DUSE_VULKAN_API")
|
|
|
|
if(USE_VULKAN_FP16_INFERENCE)
|
|
string(APPEND CMAKE_CXX_FLAGS " -DUSE_VULKAN_FP16_INFERENCE")
|
|
endif()
|
|
|
|
if(USE_VULKAN_RELAXED_PRECISION)
|
|
string(APPEND CMAKE_CXX_FLAGS " -DUSE_VULKAN_RELAXED_PRECISION")
|
|
endif()
|
|
|
|
endif()
|
|
|
|
if(BUILD_LITE_INTERPRETER)
|
|
string(APPEND CMAKE_CXX_FLAGS " -DBUILD_LITE_INTERPRETER")
|
|
endif()
|
|
|
|
if(TRACING_BASED)
|
|
string(APPEND CMAKE_CXX_FLAGS " -DTRACING_BASED")
|
|
endif()
|
|
|
|
if(USE_PYTORCH_METAL)
|
|
string(APPEND CMAKE_CXX_FLAGS " -DUSE_PYTORCH_METAL")
|
|
endif()
|
|
|
|
if(USE_PYTORCH_METAL_EXPORT)
|
|
string(APPEND CMAKE_CXX_FLAGS " -DUSE_PYTORCH_METAL_EXPORT")
|
|
endif()
|
|
|
|
if(USE_SOURCE_DEBUG_ON_MOBILE)
|
|
string(APPEND CMAKE_CXX_FLAGS " -DSYMBOLICATE_MOBILE_DEBUG_HANDLE")
|
|
endif()
|
|
|
|
if(BUILD_LITE_INTERPRETER AND USE_LITE_INTERPRETER_PROFILER)
|
|
string(APPEND CMAKE_CXX_FLAGS " -DEDGE_PROFILER_USE_KINETO")
|
|
endif()
|
|
|
|
if(USE_COREML_DELEGATE)
|
|
string(APPEND CMAKE_CXX_FLAGS " -DUSE_COREML_DELEGATE")
|
|
endif()
|
|
|
|
# ---[ Allowlist file if allowlist is specified
|
|
include(cmake/Allowlist.cmake)
|
|
|
|
# ---[ Set link flag, handle additional deps for gcc 4.8 and above
|
|
if(CMAKE_COMPILER_IS_GNUCXX AND NOT ANDROID)
|
|
message(STATUS "GCC ${CMAKE_CXX_COMPILER_VERSION}: Adding gcc and gcc_s libs to link line")
|
|
list(APPEND Caffe2_DEPENDENCY_LIBS gcc_s gcc)
|
|
endif()
|
|
|
|
# ---[ Build flags
|
|
# Re-include to override append_cxx_flag_if_supported from third_party/FBGEMM
|
|
include(cmake/public/utils.cmake)
|
|
if(NOT MSVC)
|
|
string(APPEND CMAKE_CXX_FLAGS " -O2 -fPIC")
|
|
# Eigen fails to build with some versions, so convert this to a warning
|
|
# Details at http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1459
|
|
string(APPEND CMAKE_CXX_FLAGS " -Wall")
|
|
string(APPEND CMAKE_CXX_FLAGS " -Wextra")
|
|
append_cxx_flag_if_supported("-Werror=return-type" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Werror=non-virtual-dtor" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Werror=braced-scalar-init" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Werror=range-loop-construct" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Werror=bool-operation" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Wnarrowing" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Wno-missing-field-initializers" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Wno-type-limits" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Wno-array-bounds" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Wno-unknown-pragmas" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Wno-unused-parameter" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Wno-unused-function" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Wno-unused-result" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Wno-strict-overflow" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Wno-strict-aliasing" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Wno-stringop-overflow" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Wvla-extension" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Wsuggest-override" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Wnewline-eof" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Winconsistent-missing-override" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Winconsistent-missing-destructor-override" CMAKE_CXX_FLAGS)
|
|
if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
|
|
string(APPEND CMAKE_CXX_FLAGS " -Wno-pass-failed")
|
|
endif()
|
|
if(CMAKE_COMPILER_IS_GNUCXX)
|
|
# Suppress "The ABI for passing parameters with 64-byte alignment has changed in GCC 4.6"
|
|
string(APPEND CMAKE_CXX_FLAGS " -Wno-psabi")
|
|
endif()
|
|
|
|
# Use ld.gold if available, fall back to ld.bfd (the default ld) if not
|
|
if(USE_GOLD_LINKER)
|
|
if(USE_DISTRIBUTED AND USE_MPI)
|
|
# Same issue as here with default MPI on Ubuntu
|
|
# https://bugs.launchpad.net/ubuntu/+source/deal.ii/+bug/1841577
|
|
message(WARNING "Refusing to use gold when USE_MPI=1")
|
|
else()
|
|
execute_process(
|
|
COMMAND
|
|
"${CMAKE_C_COMPILER}" -fuse-ld=gold -Wl,--version
|
|
ERROR_QUIET
|
|
OUTPUT_VARIABLE LD_VERSION)
|
|
if(NOT "${LD_VERSION}" MATCHES "GNU gold")
|
|
message(WARNING "USE_GOLD_LINKER was set but ld.gold isn't available, turning it off")
|
|
set(USE_GOLD_LINKER OFF)
|
|
else()
|
|
message(STATUS "ld.gold is available, using it to link")
|
|
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=gold")
|
|
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=gold")
|
|
set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -fuse-ld=gold")
|
|
endif()
|
|
endif()
|
|
endif()
|
|
|
|
append_cxx_flag_if_supported("-Wno-error=pedantic" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Wno-error=old-style-cast" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Wno-error=inconsistent-missing-override" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Wno-error=inconsistent-missing-destructor-override" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Wconstant-conversion" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Wno-invalid-partial-specialization" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Wno-aligned-allocation-unavailable" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Wno-missing-braces" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Qunused-arguments" CMAKE_CXX_FLAGS)
|
|
|
|
if(${USE_COLORIZE_OUTPUT})
|
|
# Why compiler checks are necessary even when `try_compile` is used
|
|
# Because of the bug in ccache that can incorrectly identify `-fcolor-diagnostics`
|
|
# As supported by GCC, see https://github.com/ccache/ccache/issues/740 (for older ccache)
|
|
# and https://github.com/ccache/ccache/issues/1275 (for newer ones)
|
|
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
|
append_cxx_flag_if_supported("-fdiagnostics-color=always" CMAKE_CXX_FLAGS)
|
|
else()
|
|
append_cxx_flag_if_supported("-fcolor-diagnostics" CMAKE_CXX_FLAGS)
|
|
endif()
|
|
endif()
|
|
|
|
append_cxx_flag_if_supported("-faligned-new" CMAKE_CXX_FLAGS)
|
|
|
|
if(WERROR)
|
|
append_cxx_flag_if_supported("-Werror" CMAKE_CXX_FLAGS)
|
|
if(NOT COMPILER_SUPPORT_WERROR)
|
|
set(WERROR FALSE)
|
|
endif()
|
|
endif()
|
|
append_cxx_flag_if_supported("-Wno-unused-but-set-variable" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Wno-maybe-uninitialized" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-fstandalone-debug" CMAKE_CXX_FLAGS_DEBUG)
|
|
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-omit-frame-pointer -O0")
|
|
string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fno-omit-frame-pointer -O0")
|
|
append_cxx_flag_if_supported("-fno-math-errno" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-fno-trapping-math" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Werror=format" CMAKE_CXX_FLAGS)
|
|
else()
|
|
# skip unwanted includes from windows.h
|
|
add_compile_definitions(WIN32_LEAN_AND_MEAN)
|
|
# Windows SDK broke compatibility since version 25131, but introduced this
|
|
# define for backward compatibility.
|
|
add_compile_definitions(_UCRT_LEGACY_INFINITY)
|
|
# disable min/max macros
|
|
add_compile_definitions(NOMINMAX)
|
|
# Turn off these warnings on Windows.
|
|
# destructor was implicitly defined as delete
|
|
append_cxx_flag_if_supported("/wd4624" CMAKE_CXX_FLAGS)
|
|
# unknown pragma
|
|
append_cxx_flag_if_supported("/wd4068" CMAKE_CXX_FLAGS)
|
|
# unexpected tokens following preprocessor directive - expected a newline
|
|
append_cxx_flag_if_supported("/wd4067" CMAKE_CXX_FLAGS)
|
|
# conversion from 'size_t' to 'unsigned int', possible loss of data
|
|
append_cxx_flag_if_supported("/wd4267" CMAKE_CXX_FLAGS)
|
|
# no suitable definition provided for explicit template instantiation request
|
|
append_cxx_flag_if_supported("/wd4661" CMAKE_CXX_FLAGS)
|
|
# recursive on all control paths, function will cause runtime stack overflow
|
|
append_cxx_flag_if_supported("/wd4717" CMAKE_CXX_FLAGS)
|
|
# conversion from '_Ty' to '_Ty', possible loss of data
|
|
append_cxx_flag_if_supported("/wd4244" CMAKE_CXX_FLAGS)
|
|
# unsafe use of type 'bool' in operation
|
|
append_cxx_flag_if_supported("/wd4804" CMAKE_CXX_FLAGS)
|
|
# inconsistent dll linkage
|
|
append_cxx_flag_if_supported("/wd4273" CMAKE_CXX_FLAGS)
|
|
endif()
|
|
|
|
|
|
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
|
|
include(CheckCSourceCompiles)
|
|
check_c_source_compiles("#include <arm_neon.h>
|
|
int main() {
|
|
float a[] = {1.0, 1.0};
|
|
float32x4x2_t v;
|
|
v.val[0] = vcombine_f32 (vcreate_f32 (0UL), vcreate_f32 (0UL));
|
|
v.val[1] = vcombine_f32 (vcreate_f32 (0UL), vcreate_f32 (0UL));
|
|
vst1q_f32_x2(a, v);
|
|
return 0;
|
|
}" HAS_VST1)
|
|
|
|
if(NOT HAS_VST1)
|
|
string(APPEND CMAKE_CXX_FLAGS " -DMISSING_ARM_VST1")
|
|
endif()
|
|
endif()
|
|
|
|
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
|
|
include(CheckCSourceCompiles)
|
|
check_c_source_compiles("#include <arm_neon.h>
|
|
int main() {
|
|
float a[] = {1.0, 1.0};
|
|
vld1q_f32_x2(a);
|
|
return 0;
|
|
}" HAS_VLD1)
|
|
|
|
if(NOT HAS_VLD1)
|
|
string(APPEND CMAKE_CXX_FLAGS " -DMISSING_ARM_VLD1")
|
|
endif()
|
|
endif()
|
|
|
|
|
|
# Add code coverage flags to supported compilers
|
|
if(USE_CPP_CODE_COVERAGE)
|
|
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
|
string(APPEND CMAKE_C_FLAGS " --coverage -fprofile-abs-path")
|
|
string(APPEND CMAKE_CXX_FLAGS " --coverage -fprofile-abs-path")
|
|
elseif("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
|
|
string(APPEND CMAKE_C_FLAGS " -fprofile-instr-generate -fcoverage-mapping")
|
|
string(APPEND CMAKE_CXX_FLAGS " -fprofile-instr-generate -fcoverage-mapping")
|
|
else()
|
|
message(ERROR "Code coverage for compiler ${CMAKE_CXX_COMPILER_ID} is unsupported")
|
|
endif()
|
|
|
|
endif()
|
|
|
|
if(APPLE)
|
|
if(USE_MPS)
|
|
string(APPEND CMAKE_OBJCXX_FLAGS " -DUSE_MPS -fno-objc-arc")
|
|
string(APPEND CMAKE_CXX_FLAGS " -DUSE_MPS")
|
|
string(APPEND CMAKE_SHARED_LINKER_FLAGS " -weak_framework Foundation -weak_framework MetalPerformanceShaders -weak_framework MetalPerformanceShadersGraph -weak_framework Metal")
|
|
# To suppress MPSGraph availability warnings
|
|
append_cxx_flag_if_supported("-Wno-unguarded-availability-new" CMAKE_OBJCXX_FLAGS)
|
|
endif()
|
|
append_cxx_flag_if_supported("-Wno-unused-private-field" CMAKE_CXX_FLAGS)
|
|
append_cxx_flag_if_supported("-Wno-missing-braces" CMAKE_CXX_FLAGS)
|
|
endif()
|
|
|
|
if(EMSCRIPTEN)
|
|
string(APPEND CMAKE_CXX_FLAGS " -Wno-implicit-function-declaration -DEMSCRIPTEN -s DISABLE_EXCEPTION_CATCHING=0")
|
|
endif()
|
|
|
|
append_cxx_flag_if_supported("-Wno-stringop-overflow" CMAKE_CXX_FLAGS)
|
|
|
|
if(ANDROID AND (NOT ANDROID_DEBUG_SYMBOLS))
|
|
if(CMAKE_COMPILER_IS_GNUCXX)
|
|
string(APPEND CMAKE_CXX_FLAGS " -s")
|
|
elseif("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
|
|
string(APPEND CMAKE_CXX_FLAGS " -g0")
|
|
else()
|
|
string(APPEND CMAKE_EXE_LINKER_FLAGS " -s")
|
|
endif()
|
|
endif()
|
|
|
|
if(NOT APPLE AND UNIX)
|
|
list(APPEND Caffe2_DEPENDENCY_LIBS dl)
|
|
endif()
|
|
|
|
# Prefix path to Caffe2 headers.
|
|
# If a directory containing installed Caffe2 headers was inadvertently
|
|
# added to the list of include directories, prefixing
|
|
# PROJECT_SOURCE_DIR means this source tree always takes precedence.
|
|
include_directories(BEFORE ${PROJECT_SOURCE_DIR})
|
|
|
|
# Prefix path to generated Caffe2 headers.
|
|
# These need to take precedence over their empty counterparts located
|
|
# in PROJECT_SOURCE_DIR.
|
|
include_directories(BEFORE ${PROJECT_BINARY_DIR})
|
|
|
|
include_directories(BEFORE ${PROJECT_SOURCE_DIR}/aten/src/)
|
|
include_directories(BEFORE ${CMAKE_BINARY_DIR}/aten/src/)
|
|
|
|
if(USE_MIMALLOC)
|
|
set(MI_OVERRIDE OFF)
|
|
set(MI_BUILD_SHARED OFF)
|
|
set(MI_BUILD_OBJECT OFF)
|
|
set(MI_BUILD_TESTS OFF)
|
|
add_definitions(-DUSE_MIMALLOC)
|
|
add_subdirectory(third_party/mimalloc)
|
|
include_directories(third_party/mimalloc/include)
|
|
endif()
|
|
|
|
# ---[ Main build
|
|
add_subdirectory(c10)
|
|
add_subdirectory(caffe2)
|
|
|
|
# --[ Documentation
|
|
if(BUILD_DOCS)
|
|
# check if Doxygen is installed
|
|
find_package(Doxygen)
|
|
if(DOXYGEN_FOUND)
|
|
message("Generating documentation")
|
|
|
|
set(DOXYGEN_C_IN ${CMAKE_CURRENT_SOURCE_DIR}/docs/caffe2/.Doxyfile-c)
|
|
set(DOXYGEN_C_OUT ${CMAKE_CURRENT_SOURCE_DIR}/docs/caffe2/Doxyfile-c)
|
|
set(DOXYGEN_P_IN ${CMAKE_CURRENT_SOURCE_DIR}/docs/caffe2/.Doxyfile-python)
|
|
set(DOXYGEN_P_OUT ${CMAKE_CURRENT_SOURCE_DIR}/docs/caffe2/Doxyfile-python)
|
|
|
|
if(EXISTS ${CMAKE_CURRENT_BINARY_DIR}/docs)
|
|
file(REMOVE_RECURSE ${CMAKE_CURRENT_BINARY_DIR}/docs)
|
|
endif()
|
|
|
|
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/docs)
|
|
configure_file(${DOXYGEN_C_IN} ${DOXYGEN_C_OUT} @ONLY)
|
|
configure_file(${DOXYGEN_P_IN} ${DOXYGEN_P_OUT} @ONLY)
|
|
|
|
add_custom_target(doc_doxygen_c ALL
|
|
COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_C_OUT}
|
|
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
|
COMMENT "Generating C++ API documentation with Doxygen"
|
|
VERBATIM)
|
|
|
|
add_custom_target(doc_doxygen_python ALL
|
|
COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_P_OUT}
|
|
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
|
COMMENT "Generating Python API documentation with Doxygen"
|
|
VERBATIM)
|
|
else()
|
|
message(FATAL_ERROR "Doxygen needs to be installed to generate the documentation")
|
|
endif()
|
|
endif()
|
|
|
|
# ---[ CMake related files
|
|
# Uninistall option.
|
|
if(NOT TARGET caffe2_uninstall)
|
|
configure_file(
|
|
${CMAKE_CURRENT_SOURCE_DIR}/cmake/cmake_uninstall.cmake.in
|
|
${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake
|
|
IMMEDIATE @ONLY)
|
|
|
|
add_custom_target(caffe2_uninstall
|
|
COMMAND ${CMAKE_COMMAND} -P
|
|
${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake)
|
|
endif()
|
|
|
|
# ---[ Make configuration files for cmake to allow dependent libraries
|
|
# easier access to Caffe2.
|
|
|
|
if((NOT USE_GLOG) OR (NOT USE_GFLAGS) OR BUILD_CUSTOM_PROTOBUF)
|
|
message(WARNING
|
|
"Generated cmake files are only fully tested if one builds "
|
|
"with system glog, gflags, and protobuf. Other settings may "
|
|
"generate files that are not well tested.")
|
|
endif()
|
|
|
|
if(USE_CUDA OR USE_ROCM)
|
|
# TODO: check if we should include other cuda dependency libraries
|
|
# to the interface as well.
|
|
|
|
endif()
|
|
|
|
# Note(jiayq): when building static libraries, all PRIVATE dependencies
|
|
# will also become interface libraries, and as a result if there are any
|
|
# dependency libraries that are not exported, the following install export
|
|
# script will fail. As a result, we will only provide the targets cmake
|
|
# files for shared lib installation. For more info, read:
|
|
# https://cmake.org/pipermail/cmake/2016-May/063400.html
|
|
if(BUILD_SHARED_LIBS)
|
|
configure_file(
|
|
${PROJECT_SOURCE_DIR}/cmake/Caffe2Config.cmake.in
|
|
${PROJECT_BINARY_DIR}/Caffe2Config.cmake
|
|
@ONLY)
|
|
install(FILES
|
|
${PROJECT_BINARY_DIR}/Caffe2Config.cmake
|
|
DESTINATION share/cmake/Caffe2
|
|
COMPONENT dev)
|
|
install(FILES
|
|
${PROJECT_SOURCE_DIR}/cmake/public/cuda.cmake
|
|
${PROJECT_SOURCE_DIR}/cmake/public/glog.cmake
|
|
${PROJECT_SOURCE_DIR}/cmake/public/gflags.cmake
|
|
${PROJECT_SOURCE_DIR}/cmake/public/mkl.cmake
|
|
${PROJECT_SOURCE_DIR}/cmake/public/mkldnn.cmake
|
|
${PROJECT_SOURCE_DIR}/cmake/public/protobuf.cmake
|
|
${PROJECT_SOURCE_DIR}/cmake/public/utils.cmake
|
|
${PROJECT_SOURCE_DIR}/cmake/public/LoadHIP.cmake
|
|
DESTINATION share/cmake/Caffe2/public
|
|
COMPONENT dev)
|
|
install(DIRECTORY
|
|
${PROJECT_SOURCE_DIR}/cmake/Modules_CUDA_fix
|
|
DESTINATION share/cmake/Caffe2/
|
|
COMPONENT dev)
|
|
install(FILES
|
|
${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUDAToolkit.cmake
|
|
DESTINATION share/cmake/Caffe2/
|
|
COMPONENT dev)
|
|
install(FILES
|
|
${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUSPARSELT.cmake
|
|
DESTINATION share/cmake/Caffe2/
|
|
COMPONENT dev)
|
|
|
|
install(EXPORT Caffe2Targets DESTINATION share/cmake/Caffe2
|
|
FILE Caffe2Targets.cmake
|
|
COMPONENT dev)
|
|
else()
|
|
message(WARNING
|
|
"Generated cmake files are only available when building "
|
|
"shared libs.")
|
|
endif()
|
|
|
|
# ---[ Modules
|
|
# If master flag for buildling Caffe2 is disabled, we also disable the
|
|
# build for Caffe2 related operator modules.
|
|
if(BUILD_CAFFE2)
|
|
add_subdirectory(modules)
|
|
endif()
|
|
|
|
# ---[ Binaries
|
|
# Binaries will be built after the Caffe2 main libraries and the modules
|
|
# are built. For the binaries, they will be linked to the Caffe2 main
|
|
# libraries, as well as all the modules that are built with Caffe2 (the ones
|
|
# built in the previous Modules section above).
|
|
if(BUILD_BINARY)
|
|
add_subdirectory(binaries)
|
|
endif()
|
|
|
|
# ---[ JNI
|
|
if(BUILD_JNI)
|
|
if(NOT MSVC)
|
|
string(APPEND CMAKE_CXX_FLAGS " -Wno-unused-variable")
|
|
endif()
|
|
set(BUILD_LIBTORCH_WITH_JNI 1)
|
|
set(FBJNI_SKIP_TESTS 1)
|
|
add_subdirectory(android/pytorch_android)
|
|
endif()
|
|
|
|
include(cmake/Summary.cmake)
|
|
caffe2_print_configuration_summary()
|
|
|
|
if(BUILD_FUNCTORCH)
|
|
add_subdirectory(functorch)
|
|
endif()
|
|
|
|
# Parse custom debug info
|
|
if(DEFINED USE_CUSTOM_DEBINFO)
|
|
string(REPLACE ";" " " SOURCE_FILES "${USE_CUSTOM_DEBINFO}")
|
|
message(STATUS "Source files with custom debug infos: ${SOURCE_FILES}")
|
|
|
|
string(REGEX REPLACE " +" ";" SOURCE_FILES_LIST "${SOURCE_FILES}")
|
|
|
|
# Set the COMPILE_FLAGS property for each source file
|
|
foreach(SOURCE_FILE ${SOURCE_FILES_LIST})
|
|
# We have to specify the scope here. We do this by specifying the
|
|
# targets we care about and caffe2/ for all test targets defined there
|
|
set(ALL_PT_TARGETS "torch_python;c10;torch_cpu;torch")
|
|
set_source_files_properties(${SOURCE_FILE} DIRECTORY "caffe2/" TARGET_DIRECTORY ${ALL_PT_TARGETS} PROPERTIES COMPILE_FLAGS "-g")
|
|
endforeach()
|
|
|
|
# Link everything with debug info when any file is in debug mode
|
|
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -g")
|
|
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -g")
|
|
endif()
|