[ghstack-poisoned]
This commit is contained in:
Scott Wolchok
2025-07-25 12:02:07 -07:00
parent fc63aee079
commit dbd090ee94
95 changed files with 6502 additions and 4956 deletions

View File

@ -246,7 +246,8 @@ cmake_dependent_option(USE_STATIC_CUDNN "Use cuDNN static libraries" OFF
"USE_CUDNN" OFF)
cmake_dependent_option(USE_CUSPARSELT "Use cuSPARSELt" ON "USE_CUDA" OFF)
cmake_dependent_option(USE_CUDSS "Use cuDSS" ON "USE_CUDA" OFF)
# USE_ROCM is guarded against in Dependencies.cmake because USE_ROCM is not properly defined here
# USE_ROCM is guarded against in Dependencies.cmake because USE_ROCM is not
# properly defined here
cmake_dependent_option(USE_CUFILE "Use cuFile" ON "USE_CUDA AND NOT WIN32" OFF)
option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" ON)
option(USE_KINETO "Use Kineto profiling library" ON)
@ -262,8 +263,7 @@ option(USE_NATIVE_ARCH "Use -march=native" OFF)
cmake_dependent_option(USE_MPS "Use MPS for macOS build" ON "MPS_FOUND" OFF)
cmake_dependent_option(USE_NCCL "Use NCCL" ON
"USE_CUDA OR USE_ROCM;UNIX;NOT APPLE" OFF)
cmake_dependent_option(USE_XCCL "Use XCCL" ON
"USE_XPU;UNIX;NOT APPLE" OFF)
cmake_dependent_option(USE_XCCL "Use XCCL" ON "USE_XPU;UNIX;NOT APPLE" OFF)
cmake_dependent_option(USE_RCCL "Use RCCL" ON USE_NCCL OFF)
cmake_dependent_option(USE_STATIC_NCCL "Use static NCCL" OFF "USE_NCCL" OFF)
cmake_dependent_option(USE_SYSTEM_NCCL "Use system-wide NCCL" OFF "USE_NCCL"
@ -287,10 +287,9 @@ option(USE_PROF "Use profiling" OFF)
option(USE_PYTORCH_QNNPACK "Use ATen/QNNPACK (quantized 8-bit operators)" ON)
option(USE_SNPE "Use Qualcomm's SNPE library" OFF)
option(USE_SYSTEM_EIGEN_INSTALL
"Use system Eigen instead of the one under third_party" OFF)
cmake_dependent_option(
USE_VALGRIND "Use Valgrind. Only available on Linux." ON
"LINUX" OFF)
"Use system Eigen instead of the one under third_party" OFF)
cmake_dependent_option(USE_VALGRIND "Use Valgrind. Only available on Linux." ON
"LINUX" OFF)
if(NOT DEFINED USE_VULKAN)
cmake_dependent_option(USE_VULKAN "Use Vulkan GPU backend" ON "ANDROID" OFF)
@ -298,9 +297,8 @@ endif()
option(USE_SOURCE_DEBUG_ON_MOBILE "Enable" ON)
option(USE_LITE_INTERPRETER_PROFILER "Enable" ON)
cmake_dependent_option(
USE_LITE_AOTI "Include AOTI sources" OFF
"BUILD_LITE_INTERPRETER" OFF)
cmake_dependent_option(USE_LITE_AOTI "Include AOTI sources" OFF
"BUILD_LITE_INTERPRETER" OFF)
option(USE_VULKAN_FP16_INFERENCE "Vulkan - Use fp16 inference" OFF)
option(USE_VULKAN_RELAXED_PRECISION
"Vulkan - Use relaxed precision math in the kernels (mediump)" OFF)
@ -333,25 +331,27 @@ cmake_dependent_option(USE_SYSTEM_UCC "Use system-wide UCC" OFF "USE_UCC" OFF)
cmake_dependent_option(USE_C10D_UCC "USE C10D UCC" ON "USE_DISTRIBUTED;USE_UCC"
OFF)
cmake_dependent_option(
USE_GLOO "Use Gloo. Only available if USE_DISTRIBUTED is on." ON
"USE_DISTRIBUTED" OFF)
USE_GLOO "Use Gloo. Only available if USE_DISTRIBUTED is on." ON
"USE_DISTRIBUTED" OFF)
cmake_dependent_option(
USE_GLOO_WITH_OPENSSL "Use Gloo with OpenSSL. Only available if USE_GLOO is on." OFF
"USE_GLOO AND LINUX AND NOT INTERN_BUILD_MOBILE" OFF)
USE_GLOO_WITH_OPENSSL
"Use Gloo with OpenSSL. Only available if USE_GLOO is on." OFF
"USE_GLOO AND LINUX AND NOT INTERN_BUILD_MOBILE" OFF)
cmake_dependent_option(
USE_GLOO_IBVERBS "Use Gloo with ibverbs backend. Only available if USE_GLOO is on." OFF
"USE_GLOO AND LINUX AND NOT INTERN_BUILD_MOBILE" OFF)
USE_GLOO_IBVERBS
"Use Gloo with ibverbs backend. Only available if USE_GLOO is on." OFF
"USE_GLOO AND LINUX AND NOT INTERN_BUILD_MOBILE" OFF)
cmake_dependent_option(USE_C10D_GLOO "USE C10D GLOO" ON
"USE_DISTRIBUTED;USE_GLOO" OFF)
cmake_dependent_option(USE_C10D_NCCL "USE C10D NCCL" ON
"USE_DISTRIBUTED;USE_NCCL" OFF)
cmake_dependent_option(USE_C10D_XCCL "USE C10D XCCL" ON
"USE_DISTRIBUTED;USE_XCCL" OFF)
cmake_dependent_option(USE_C10D_MPI "USE C10D MPI" ON "USE_DISTRIBUTED;USE_MPI"
OFF)
cmake_dependent_option(
USE_C10D_GLOO "USE C10D GLOO" ON "USE_DISTRIBUTED;USE_GLOO" OFF)
cmake_dependent_option(
USE_C10D_NCCL "USE C10D NCCL" ON "USE_DISTRIBUTED;USE_NCCL" OFF)
cmake_dependent_option(
USE_C10D_XCCL "USE C10D XCCL" ON "USE_DISTRIBUTED;USE_XCCL" OFF)
cmake_dependent_option(
USE_C10D_MPI "USE C10D MPI" ON "USE_DISTRIBUTED;USE_MPI" OFF)
cmake_dependent_option(
USE_TENSORPIPE "Use TensorPipe. Only available if USE_DISTRIBUTED is on." ON
"USE_DISTRIBUTED AND NOT WIN32" OFF)
USE_TENSORPIPE "Use TensorPipe. Only available if USE_DISTRIBUTED is on." ON
"USE_DISTRIBUTED AND NOT WIN32" OFF)
option(ONNX_ML "Enable traditional ONNX ML API." ON)
option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" OFF)
option(BUILD_LIBTORCH_CPU_WITH_DEBUG
@ -373,8 +373,9 @@ cmake_dependent_option(
cmake_dependent_option(BUILD_FUNCTORCH "Build Functorch" ON "BUILD_PYTHON" OFF)
cmake_dependent_option(BUILD_BUNDLE_PTXAS "Bundle PTX into torch/bin fodler"
OFF "USE_CUDA" OFF)
cmake_dependent_option(USE_KLEIDIAI "Use KleidiAI for the ARM CPU & AARCH64 architecture." ON
"CPU_AARCH64" OFF)
cmake_dependent_option(
USE_KLEIDIAI "Use KleidiAI for the ARM CPU & AARCH64 architecture." ON
"CPU_AARCH64" OFF)
option(USE_MIMALLOC "Use mimalloc" OFF)
# Enable third party mimalloc library to improve memory allocation performance
@ -384,10 +385,9 @@ if(WIN32)
set(USE_MIMALLOC ON)
# Not enable USE_MIMALLOC_ON_MKL due to it caused issue:
# https://github.com/pytorch/pytorch/issues/138994
# Will turn on when we can fix USE_STATIC_MKL lost functionality:
# https://github.com/pytorch/pytorch/pull/138996
# set(USE_MIMALLOC_ON_MKL ON)
# https://github.com/pytorch/pytorch/issues/138994 Will turn on when we can
# fix USE_STATIC_MKL lost functionality:
# https://github.com/pytorch/pytorch/pull/138996 set(USE_MIMALLOC_ON_MKL ON)
endif()
if(USE_CCACHE)
@ -543,8 +543,7 @@ if(LINUX)
string(STRIP "${ENV_LDFLAGS}" ENV_LDFLAGS)
# Do not append linker flags passed via env var if they already there
if(NOT ${CMAKE_SHARED_LINKER_FLAGS} MATCHES "${ENV_LDFLAGS}")
set(CMAKE_SHARED_LINKER_FLAGS
"${CMAKE_SHARED_LINKER_FLAGS} ${ENV_LDFLAGS}")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${ENV_LDFLAGS}")
endif()
endif()
@ -698,10 +697,10 @@ if(ANDROID
endif()
if(USE_KLEIDIAI AND CMAKE_C_COMPILER_VERSION)
if(CMAKE_C_COMPILER_VERSION VERSION_LESS 11)
set(USE_KLEIDIAI OFF)
message(WARNING "Disabling KleidiAI: Requires at least GCC 11 or Clang 11")
endif()
if(CMAKE_C_COMPILER_VERSION VERSION_LESS 11)
set(USE_KLEIDIAI OFF)
message(WARNING "Disabling KleidiAI: Requires at least GCC 11 or Clang 11")
endif()
endif()
# INTERN_BUILD_ATEN_OPS is used to control whether to build ATen/TH operators.
@ -835,7 +834,7 @@ include(ExternalProject)
# ---[ Dependencies ---[ FBGEMM doesn't work on x86 32bit and
# CMAKE_SYSTEM_PROCESSOR thinks its 64bit
if(USE_FBGEMM
AND((CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND CMAKE_SIZEOF_VOID_P EQUAL
AND ((CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND CMAKE_SIZEOF_VOID_P EQUAL
4)
OR CMAKE_SYSTEM_PROCESSOR STREQUAL "x86"))
set(USE_FBGEMM OFF)
@ -877,8 +876,10 @@ cmake_dependent_option(
cmake_dependent_option(
USE_MEM_EFF_ATTENTION
"Enable memory-efficient attention for scaled dot product attention.\
Will be disabled if not supported by the platform" ON
"USE_CUDA OR USE_ROCM" OFF)
Will be disabled if not supported by the platform"
ON
"USE_CUDA OR USE_ROCM"
OFF)
#
# Cannot be put into Dependencies.cmake due circular dependency:
@ -910,7 +911,8 @@ if(USE_PYTORCH_QNNPACK)
endif()
# Enable sleef on macOS with Apple silicon by default
if((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") AND ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "arm64"))
if((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") AND ("${CMAKE_SYSTEM_PROCESSOR}"
STREQUAL "arm64"))
message(STATUS "Running on macOS with Apple silicon")
string(APPEND CMAKE_CXX_FLAGS " -DAT_BUILD_ARM_VEC256_WITH_SLEEF")
add_definitions(-DAT_BUILD_ARM_VEC256_WITH_SLEEF)
@ -918,12 +920,11 @@ endif()
# Enable sleef on Arm(R) architecture by default (except Android)
if((NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Android")
AND("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "aarch64"))
AND ("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "aarch64"))
string(APPEND CMAKE_CXX_FLAGS " -DAT_BUILD_ARM_VEC256_WITH_SLEEF")
add_definitions(-DAT_BUILD_ARM_VEC256_WITH_SLEEF)
endif()
if(USE_XNNPACK)
string(APPEND CMAKE_CXX_FLAGS " -DUSE_XNNPACK")
endif()
@ -1073,9 +1074,13 @@ if(NOT MSVC)
endif()
append_cxx_flag_if_supported("-Wno-maybe-uninitialized" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-fstandalone-debug" CMAKE_CXX_FLAGS_DEBUG)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" AND CMAKE_CXX_COMPILER_ID MATCHES "GNU")
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" AND CMAKE_CXX_COMPILER_ID MATCHES
"GNU")
if(CMAKE_BUILD_TYPE MATCHES Debug)
message(Warning "Applying -Og optimization for aarch64 GCC debug build to workaround ICE")
message(
Warning
"Applying -Og optimization for aarch64 GCC debug build to workaround ICE"
)
endif()
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-omit-frame-pointer -Og")
string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fno-omit-frame-pointer -Og")
@ -1086,9 +1091,11 @@ if(NOT MSVC)
append_cxx_flag_if_supported("-fno-math-errno" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-fno-trapping-math" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-Werror=format" CMAKE_CXX_FLAGS)
if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 13)
if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION
VERSION_GREATER_EQUAL 13)
append_cxx_flag_if_supported("-Wno-dangling-reference" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-Wno-error=dangling-reference" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-Wno-error=dangling-reference"
CMAKE_CXX_FLAGS)
endif()
else()
# Define export functions for AOTI.
@ -1241,7 +1248,7 @@ if(USE_MIMALLOC AND USE_MIMALLOC_ON_MKL)
endif()
# ---[ Main build
add_subdirectory(torch/headeronly) # headeronly headers
add_subdirectory(torch/headeronly) # headeronly headers
add_subdirectory(c10)
add_subdirectory(caffe2)
@ -1260,7 +1267,7 @@ endif()
# access to Caffe2.
if((NOT USE_GLOG)
OR(NOT USE_GFLAGS)
OR (NOT USE_GFLAGS)
OR BUILD_CUSTOM_PROTOBUF)
message(WARNING "Generated cmake files are only fully tested if one builds "
"with system glog, gflags, and protobuf. Other settings may "
@ -1368,7 +1375,8 @@ if(DEFINED USE_CUSTOM_DEBINFO)
# care about and caffe2/ for all test targets defined there
if(BUILD_LIBTORCHLESS)
caffe2_update_option(USE_CUDA OFF)
set(ALL_PT_TARGETS "torch_python;${C10_LIB};${TORCH_CPU_LIB};${TORCH_LIB}")
set(ALL_PT_TARGETS
"torch_python;${C10_LIB};${TORCH_CPU_LIB};${TORCH_LIB}")
else()
# @todo test if we can remove this
set(ALL_PT_TARGETS "torch_python;c10;torch_cpu;torch")

View File

@ -1,8 +1,7 @@
cmake_minimum_required(VERSION 3.5)
option(BUILD_LITE_INTERPRETER "Master flag to build pytorch_jni_lite" ON)
message(
STATUS
"BUILD_LITE_INTERPRETER (pytorch_jni_lite): ${BUILD_LITE_INTERPRETER}")
STATUS "BUILD_LITE_INTERPRETER (pytorch_jni_lite): ${BUILD_LITE_INTERPRETER}")
if(BUILD_LITE_INTERPRETER)
project(pytorch_jni_lite CXX)
@ -14,7 +13,10 @@ endif()
include(GNUInstallDirs)
set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.")
set(CMAKE_CXX_STANDARD
17
CACHE STRING
"The C++ standard whose features are requested to build this target.")
set(CMAKE_VERBOSE_MAKEFILE ON)
message(STATUS "ANDROID_STL:${ANDROID_STL}")
@ -35,15 +37,18 @@ set(pytorch_android_DIR ${CMAKE_CURRENT_LIST_DIR}/src/main/cpp)
if(ANDROID_ABI)
set(USE_VULKAN ON)
set(libtorch_include_DIR ${pytorch_android_DIR}/libtorch_include/${ANDROID_ABI})
set(libtorch_include_DIR
${pytorch_android_DIR}/libtorch_include/${ANDROID_ABI})
set(BUILD_SUBDIR ${ANDROID_ABI})
elseif(BUILD_LIBTORCH_WITH_JNI)
# Don't need LIBTORCH_HOME if we're building from within PyTorch.
else()
# Building against a pre-built libtorch.
if(NOT LIBTORCH_HOME)
message(FATAL_ERROR
"pytorch_android requires LIBTORCH_HOME to be defined for non-Android builds.")
message(
FATAL_ERROR
"pytorch_android requires LIBTORCH_HOME to be defined for non-Android builds."
)
endif()
set(libtorch_include_DIR ${LIBTORCH_HOME}/include)
link_directories(${LIBTORCH_HOME}/lib)
@ -52,39 +57,32 @@ endif()
message(STATUS "libtorch dir:${libtorch_DIR}")
configure_file(
${pytorch_android_DIR}/cmake_macros.h.in
${pytorch_android_DIR}/cmake_macros.h)
configure_file(${pytorch_android_DIR}/cmake_macros.h.in
${pytorch_android_DIR}/cmake_macros.h)
if(BUILD_LITE_INTERPRETER)
file(GLOB pytorch_android_SOURCES
${pytorch_android_DIR}/pytorch_jni_lite.cpp
${pytorch_android_DIR}/pytorch_jni_common.cpp
${pytorch_android_DIR}/pytorch_jni_common.h
)
file(GLOB pytorch_android_SOURCES ${pytorch_android_DIR}/pytorch_jni_lite.cpp
${pytorch_android_DIR}/pytorch_jni_common.cpp
${pytorch_android_DIR}/pytorch_jni_common.h)
else()
file(GLOB pytorch_android_SOURCES
${pytorch_android_DIR}/pytorch_jni_jit.cpp
${pytorch_android_DIR}/pytorch_jni_common.cpp
${pytorch_android_DIR}/pytorch_jni_common.h
)
file(GLOB pytorch_android_SOURCES ${pytorch_android_DIR}/pytorch_jni_jit.cpp
${pytorch_android_DIR}/pytorch_jni_common.cpp
${pytorch_android_DIR}/pytorch_jni_common.h)
endif()
add_library(${PYTORCH_JNI_TARGET} SHARED ${pytorch_android_SOURCES})
if(APPLE)
# Need to add rpath so dlopen can find dependencies.
add_custom_command(TARGET pytorch_jni
POST_BUILD COMMAND
${CMAKE_INSTALL_NAME_TOOL} -add_rpath "@loader_path"
$<TARGET_FILE:pytorch_jni>)
add_custom_command(
TARGET pytorch_jni
POST_BUILD
COMMAND ${CMAKE_INSTALL_NAME_TOOL} -add_rpath "@loader_path"
$<TARGET_FILE:pytorch_jni>)
endif()
target_compile_options(${PYTORCH_JNI_TARGET} PRIVATE
-fexceptions
)
target_compile_options(${PYTORCH_JNI_TARGET} PRIVATE -fexceptions)
target_include_directories(${PYTORCH_JNI_TARGET} BEFORE
PUBLIC $<BUILD_INTERFACE:${libtorch_include_DIR}>)
PUBLIC $<BUILD_INTERFACE:${libtorch_include_DIR}>)
set(fbjni_DIR ${CMAKE_CURRENT_LIST_DIR}/../libs/fbjni/)
set(fbjni_BUILD_DIR ${CMAKE_BINARY_DIR}/fbjni/${BUILD_SUBDIR})
@ -102,8 +100,9 @@ if(ANDROID_ABI)
function(import_static_lib name)
add_library(${name} STATIC IMPORTED)
set_property(
TARGET ${name}
PROPERTY IMPORTED_LOCATION
TARGET ${name}
PROPERTY
IMPORTED_LOCATION
${CMAKE_CURRENT_LIST_DIR}/src/main/jniLibs/${ANDROID_ABI}/${name}.a)
endfunction(import_static_lib)
@ -135,17 +134,10 @@ if(ANDROID_ABI)
libpthreadpool
libeigen_blas
libcpuinfo
libclog
)
libclog)
else()
# Prefer dynamic linking on the host
set(pytorch_jni_LIBS
fbjni
torch
torch_cpu
c10
cpuinfo
)
set(pytorch_jni_LIBS fbjni torch torch_cpu c10 cpuinfo)
if(USE_NNPACK)
list(APPEND pytorch_jni_LIBS nnpack)
@ -173,12 +165,16 @@ endif()
target_link_libraries(${PYTORCH_JNI_TARGET} ${pytorch_jni_LIBS})
install(TARGETS ${PYTORCH_JNI_TARGET}
install(
TARGETS ${PYTORCH_JNI_TARGET}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) #For windows
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) # For windows
if(MSVC)
install(FILES $<TARGET_PDB_FILE:pytorch_jni> DESTINATION ${CMAKE_INSTALL_LIBDIR} OPTIONAL)
install(
FILES $<TARGET_PDB_FILE:pytorch_jni>
DESTINATION ${CMAKE_INSTALL_LIBDIR}
OPTIONAL)
install(TARGETS ${PYTORCH_JNI_TARGET} DESTINATION ${CMAKE_INSTALL_LIBDIR})
endif()

View File

@ -1,21 +1,19 @@
cmake_minimum_required(VERSION 3.5)
project(pytorch_vision_jni CXX)
set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.")
set(CMAKE_CXX_STANDARD
17
CACHE STRING
"The C++ standard whose features are requested to build this target.")
set(CMAKE_VERBOSE_MAKEFILE ON)
set(pytorch_vision_cpp_DIR ${CMAKE_CURRENT_LIST_DIR}/src/main/cpp)
file(GLOB pytorch_vision_SOURCES
${pytorch_vision_cpp_DIR}/pytorch_vision_jni.cpp
)
${pytorch_vision_cpp_DIR}/pytorch_vision_jni.cpp)
add_library(pytorch_vision_jni SHARED
${pytorch_vision_SOURCES}
)
add_library(pytorch_vision_jni SHARED ${pytorch_vision_SOURCES})
target_compile_options(pytorch_vision_jni PRIVATE
-fexceptions
)
target_compile_options(pytorch_vision_jni PRIVATE -fexceptions)
set(BUILD_SUBDIR ${ANDROID_ABI})

View File

@ -5,17 +5,17 @@ endif()
# Find modules
if(NOT INTERN_BUILD_MOBILE)
list(APPEND CMAKE_MODULE_PATH /usr/lib/x86_64-linux-gnu/)
list(APPEND CMAKE_LIBRARY_PATH /usr/lib/x86_64-linux-gnu/ /usr/lib/aarch64-linux-gnu/)
list(APPEND CMAKE_LIBRARY_PATH /usr/lib/x86_64-linux-gnu/
/usr/lib/aarch64-linux-gnu/)
endif()
list(APPEND CMAKE_MODULE_PATH
${CMAKE_CURRENT_SOURCE_DIR}/../cmake/Modules
${CMAKE_CURRENT_SOURCE_DIR}/../cmake/public
${CMAKE_CURRENT_SOURCE_DIR}/../cmake/Modules_CUDA_fix)
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../cmake/Modules
${CMAKE_CURRENT_SOURCE_DIR}/../cmake/public
${CMAKE_CURRENT_SOURCE_DIR}/../cmake/Modules_CUDA_fix)
cmake_policy(SET CMP0012 NEW)
#############################################
# ##############################################################################
set(ATen_CPU_SRCS)
set(ATen_MTIA_SRCS)
@ -47,20 +47,25 @@ set(ATen_CUDA_DEPENDENCY_LIBS)
set(ATen_HIP_DEPENDENCY_LIBS)
set(ATen_PUBLIC_CUDA_DEPENDENCY_LIBS)
set(ATen_PUBLIC_HIP_DEPENDENCY_LIBS)
set(ATEN_INSTALL_BIN_SUBDIR "bin" CACHE PATH "ATen install binary subdirectory")
set(ATEN_INSTALL_LIB_SUBDIR "lib" CACHE PATH "ATen install library subdirectory")
set(ATEN_INSTALL_INCLUDE_SUBDIR "include" CACHE PATH "ATen install include subdirectory")
set(ATEN_INSTALL_BIN_SUBDIR
"bin"
CACHE PATH "ATen install binary subdirectory")
set(ATEN_INSTALL_LIB_SUBDIR
"lib"
CACHE PATH "ATen install library subdirectory")
set(ATEN_INSTALL_INCLUDE_SUBDIR
"include"
CACHE PATH "ATen install include subdirectory")
set(MEM_EFF_ATTENTION_CUDA_SOURCES)
set(TH_LINK_STYLE STATIC)
set(TH_CPU_INCLUDE
${CMAKE_CURRENT_SOURCE_DIR}/src
${CMAKE_CURRENT_BINARY_DIR}/src
${CMAKE_BINARY_DIR}/aten/src)
set(TH_CPU_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/src
${CMAKE_CURRENT_BINARY_DIR}/src ${CMAKE_BINARY_DIR}/aten/src)
list(APPEND ATen_CPU_INCLUDE ${TH_CPU_INCLUDE})
if(USE_VULKAN)
list(APPEND ATen_CPU_INCLUDE ${CMAKE_BINARY_DIR}/vulkan ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/VulkanMemoryAllocator)
list(APPEND ATen_CPU_INCLUDE ${CMAKE_BINARY_DIR}/vulkan
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/VulkanMemoryAllocator)
endif()
# Find the HIP package, set the HIP paths, load the HIP CMake.
@ -73,12 +78,15 @@ endif()
# Both CUDA and ROCM are enabled and found. Report an error.
if(USE_CUDA AND USE_ROCM)
message(FATAL_ERROR "Both CUDA and ROCm are enabled and found. PyTorch can only be built with either of them. Please turn one off by using either USE_CUDA=OFF or USE_ROCM=OFF.")
message(
FATAL_ERROR
"Both CUDA and ROCm are enabled and found. PyTorch can only be built with either of them. Please turn one off by using either USE_CUDA=OFF or USE_ROCM=OFF."
)
endif()
if(USE_ROCM)
# TODO: AT_HIP_ENABLED (change this once we represent HIP as HIP in
# ATen proper)
# TODO: AT_HIP_ENABLED (change this once we represent HIP as HIP in ATen
# proper)
set(AT_CUDA_ENABLED 1)
add_subdirectory(src/THH)
message("ROCm is enabled.")
@ -109,45 +117,118 @@ else()
set(AT_HIPSPARSELT_ENABLED 0)
endif()
list(APPEND ATen_CPU_INCLUDE
${CMAKE_CURRENT_SOURCE_DIR}/src)
list(APPEND ATen_CPU_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/src)
add_subdirectory(src/ATen)
# Pass source, includes, and libs to parent
set(ATen_CPU_SRCS ${ATen_CPU_SRCS} PARENT_SCOPE)
set(ATen_CORE_SRCS ${ATen_CORE_SRCS} PARENT_SCOPE)
set(ATen_MTIA_SRCS ${ATen_MTIA_SRCS} PARENT_SCOPE)
set(ATen_XPU_SRCS ${ATen_XPU_SRCS} PARENT_SCOPE)
set(ATen_XPU_INCLUDE ${ATen_XPU_INCLUDE} PARENT_SCOPE)
set(ATen_CUDA_CU_SRCS ${ATen_CUDA_CU_SRCS} PARENT_SCOPE)
set(ATen_CUDA_CPP_SRCS ${ATen_CUDA_CPP_SRCS} PARENT_SCOPE)
set(ATen_CUDA_LINALG_SRCS ${ATen_CUDA_LINALG_SRCS} PARENT_SCOPE)
set(ATen_CUDA_SRCS_W_SORT_BY_KEY ${ATen_CUDA_SRCS_W_SORT_BY_KEY} PARENT_SCOPE)
set(ATen_CUDA_CU_SRCS_W_SORT_BY_KEY ${ATen_CUDA_CU_SRCS_W_SORT_BY_KEY} PARENT_SCOPE)
set(ATen_HIP_SRCS ${ATen_HIP_SRCS} PARENT_SCOPE)
set(ATen_MPS_SRCS ${ATen_MPS_SRCS} PARENT_SCOPE)
set(ATen_MPS_TEST_SRCS ${ATen_MPS_TEST_SRCS} PARENT_SCOPE)
set(ATen_HIP_SRCS_W_SORT_BY_KEY ${ATen_HIP_SRCS_W_SORT_BY_KEY} PARENT_SCOPE)
set(ATen_XPU_SRCS ${ATen_XPU_SRCS} PARENT_SCOPE)
set(ATen_XPU_TEST_SRCS ${ATen_XPU_TEST_SRCS} PARENT_SCOPE)
set(ATen_NVRTC_STUB_SRCS ${ATen_NVRTC_STUB_SRCS} PARENT_SCOPE)
set(ATen_CPU_TEST_SRCS ${ATen_CPU_TEST_SRCS} PARENT_SCOPE)
set(ATen_CUDA_TEST_SRCS ${ATen_CUDA_TEST_SRCS} PARENT_SCOPE)
set(ATen_HIP_TEST_SRCS ${ATen_HIP_TEST_SRCS} PARENT_SCOPE)
set(ATen_VULKAN_TEST_SRCS ${ATen_VULKAN_TEST_SRCS} PARENT_SCOPE)
set(ATen_MOBILE_BENCHMARK_SRCS ${ATen_MOBILE_BENCHMARK_SRCS} PARENT_SCOPE)
set(ATen_MOBILE_TEST_SRCS ${ATen_MOBILE_TEST_SRCS} PARENT_SCOPE)
set(ATen_VEC_TEST_SRCS ${ATen_VEC_TEST_SRCS} PARENT_SCOPE)
set(ATen_CPU_INCLUDE ${ATen_CPU_INCLUDE} PARENT_SCOPE)
set(ATen_CUDA_INCLUDE ${ATen_CUDA_INCLUDE} PARENT_SCOPE)
set(ATen_HIP_INCLUDE ${ATen_HIP_INCLUDE} PARENT_SCOPE)
set(ATen_XPU_INCLUDE ${ATen_XPU_INCLUDE} PARENT_SCOPE)
set(ATen_THIRD_PARTY_INCLUDE ${ATen_THIRD_PARTY_INCLUDE} PARENT_SCOPE)
set(ATen_CPU_DEPENDENCY_LIBS ${ATen_CPU_DEPENDENCY_LIBS} PARENT_SCOPE)
set(ATen_XPU_DEPENDENCY_LIBS ${ATen_XPU_DEPENDENCY_LIBS} PARENT_SCOPE)
set(ATen_CUDA_DEPENDENCY_LIBS ${ATen_CUDA_DEPENDENCY_LIBS} PARENT_SCOPE)
set(ATen_HIP_DEPENDENCY_LIBS ${ATen_HIP_DEPENDENCY_LIBS} PARENT_SCOPE)
set(ATen_CORE_TEST_SRCS ${ATen_CORE_TEST_SRCS} PARENT_SCOPE)
set(FLASH_ATTENTION_CUDA_SOURCES ${FLASH_ATTENTION_CUDA_SOURCES} PARENT_SCOPE)
set(MEM_EFF_ATTENTION_CUDA_SOURCES ${MEM_EFF_ATTENTION_CUDA_SOURCES} PARENT_SCOPE)
set(ATen_ATTENTION_KERNEL_SRCS ${ATen_ATTENTION_KERNEL_SRCS} PARENT_SCOPE)
set(ATen_CPU_SRCS
${ATen_CPU_SRCS}
PARENT_SCOPE)
set(ATen_CORE_SRCS
${ATen_CORE_SRCS}
PARENT_SCOPE)
set(ATen_MTIA_SRCS
${ATen_MTIA_SRCS}
PARENT_SCOPE)
set(ATen_XPU_SRCS
${ATen_XPU_SRCS}
PARENT_SCOPE)
set(ATen_XPU_INCLUDE
${ATen_XPU_INCLUDE}
PARENT_SCOPE)
set(ATen_CUDA_CU_SRCS
${ATen_CUDA_CU_SRCS}
PARENT_SCOPE)
set(ATen_CUDA_CPP_SRCS
${ATen_CUDA_CPP_SRCS}
PARENT_SCOPE)
set(ATen_CUDA_LINALG_SRCS
${ATen_CUDA_LINALG_SRCS}
PARENT_SCOPE)
set(ATen_CUDA_SRCS_W_SORT_BY_KEY
${ATen_CUDA_SRCS_W_SORT_BY_KEY}
PARENT_SCOPE)
set(ATen_CUDA_CU_SRCS_W_SORT_BY_KEY
${ATen_CUDA_CU_SRCS_W_SORT_BY_KEY}
PARENT_SCOPE)
set(ATen_HIP_SRCS
${ATen_HIP_SRCS}
PARENT_SCOPE)
set(ATen_MPS_SRCS
${ATen_MPS_SRCS}
PARENT_SCOPE)
set(ATen_MPS_TEST_SRCS
${ATen_MPS_TEST_SRCS}
PARENT_SCOPE)
set(ATen_HIP_SRCS_W_SORT_BY_KEY
${ATen_HIP_SRCS_W_SORT_BY_KEY}
PARENT_SCOPE)
set(ATen_XPU_SRCS
${ATen_XPU_SRCS}
PARENT_SCOPE)
set(ATen_XPU_TEST_SRCS
${ATen_XPU_TEST_SRCS}
PARENT_SCOPE)
set(ATen_NVRTC_STUB_SRCS
${ATen_NVRTC_STUB_SRCS}
PARENT_SCOPE)
set(ATen_CPU_TEST_SRCS
${ATen_CPU_TEST_SRCS}
PARENT_SCOPE)
set(ATen_CUDA_TEST_SRCS
${ATen_CUDA_TEST_SRCS}
PARENT_SCOPE)
set(ATen_HIP_TEST_SRCS
${ATen_HIP_TEST_SRCS}
PARENT_SCOPE)
set(ATen_VULKAN_TEST_SRCS
${ATen_VULKAN_TEST_SRCS}
PARENT_SCOPE)
set(ATen_MOBILE_BENCHMARK_SRCS
${ATen_MOBILE_BENCHMARK_SRCS}
PARENT_SCOPE)
set(ATen_MOBILE_TEST_SRCS
${ATen_MOBILE_TEST_SRCS}
PARENT_SCOPE)
set(ATen_VEC_TEST_SRCS
${ATen_VEC_TEST_SRCS}
PARENT_SCOPE)
set(ATen_CPU_INCLUDE
${ATen_CPU_INCLUDE}
PARENT_SCOPE)
set(ATen_CUDA_INCLUDE
${ATen_CUDA_INCLUDE}
PARENT_SCOPE)
set(ATen_HIP_INCLUDE
${ATen_HIP_INCLUDE}
PARENT_SCOPE)
set(ATen_XPU_INCLUDE
${ATen_XPU_INCLUDE}
PARENT_SCOPE)
set(ATen_THIRD_PARTY_INCLUDE
${ATen_THIRD_PARTY_INCLUDE}
PARENT_SCOPE)
set(ATen_CPU_DEPENDENCY_LIBS
${ATen_CPU_DEPENDENCY_LIBS}
PARENT_SCOPE)
set(ATen_XPU_DEPENDENCY_LIBS
${ATen_XPU_DEPENDENCY_LIBS}
PARENT_SCOPE)
set(ATen_CUDA_DEPENDENCY_LIBS
${ATen_CUDA_DEPENDENCY_LIBS}
PARENT_SCOPE)
set(ATen_HIP_DEPENDENCY_LIBS
${ATen_HIP_DEPENDENCY_LIBS}
PARENT_SCOPE)
set(ATen_CORE_TEST_SRCS
${ATen_CORE_TEST_SRCS}
PARENT_SCOPE)
set(FLASH_ATTENTION_CUDA_SOURCES
${FLASH_ATTENTION_CUDA_SOURCES}
PARENT_SCOPE)
set(MEM_EFF_ATTENTION_CUDA_SOURCES
${MEM_EFF_ATTENTION_CUDA_SOURCES}
PARENT_SCOPE)
set(ATen_ATTENTION_KERNEL_SRCS
${ATen_ATTENTION_KERNEL_SRCS}
PARENT_SCOPE)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,15 +1,15 @@
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
# This source code is licensed under the BSD-style license found in the LICENSE
# file in the root directory of this source tree.
cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR)
project(cpuinfo-download NONE)
include(ExternalProject)
ExternalProject_Add(cpuinfo
ExternalProject_Add(
cpuinfo
GIT_REPOSITORY https://github.com/pytorch/cpuinfo.git
GIT_TAG master
SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/cpuinfo"
@ -17,5 +17,4 @@ ExternalProject_Add(cpuinfo
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
TEST_COMMAND "")

View File

@ -1,15 +1,15 @@
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
# This source code is licensed under the BSD-style license found in the LICENSE
# file in the root directory of this source tree.
cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR)
project(fp16-download NONE)
include(ExternalProject)
ExternalProject_Add(fp16
ExternalProject_Add(
fp16
GIT_REPOSITORY https://github.com/Maratyszcza/FP16.git
GIT_TAG master
SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/fp16"
@ -17,5 +17,4 @@ ExternalProject_Add(fp16
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
TEST_COMMAND "")

View File

@ -1,15 +1,15 @@
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
# This source code is licensed under the BSD-style license found in the LICENSE
# file in the root directory of this source tree.
cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR)
project(fxdiv-download NONE)
include(ExternalProject)
ExternalProject_Add(fxdiv
ExternalProject_Add(
fxdiv
GIT_REPOSITORY https://github.com/Maratyszcza/FXdiv.git
GIT_TAG master
SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/fxdiv"
@ -17,5 +17,4 @@ ExternalProject_Add(fxdiv
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
TEST_COMMAND "")

View File

@ -1,21 +1,21 @@
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
# This source code is licensed under the BSD-style license found in the LICENSE
# file in the root directory of this source tree.
cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR)
project(googlebenchmark-download NONE)
include(ExternalProject)
ExternalProject_Add(googlebenchmark
ExternalProject_Add(
googlebenchmark
URL https://github.com/google/benchmark/archive/v1.4.1.zip
URL_HASH SHA256=61ae07eb5d4a0b02753419eb17a82b7d322786bb36ab62bd3df331a4d47c00a7
URL_HASH
SHA256=61ae07eb5d4a0b02753419eb17a82b7d322786bb36ab62bd3df331a4d47c00a7
SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googlebenchmark"
BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
TEST_COMMAND "")

View File

@ -1,21 +1,21 @@
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
# This source code is licensed under the BSD-style license found in the LICENSE
# file in the root directory of this source tree.
cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR)
project(googletest-download NONE)
include(ExternalProject)
ExternalProject_Add(googletest
ExternalProject_Add(
googletest
URL https://github.com/google/googletest/archive/release-1.10.0.zip
URL_HASH SHA256=94c634d499558a76fa649edb13721dce6e98fb1e7018dfaeba3cd7a083945e91
URL_HASH
SHA256=94c634d499558a76fa649edb13721dce6e98fb1e7018dfaeba3cd7a083945e91
SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest"
BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
TEST_COMMAND "")

View File

@ -1,15 +1,15 @@
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
# This source code is licensed under the BSD-style license found in the LICENSE
# file in the root directory of this source tree.
cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR)
project(psimd-download NONE)
include(ExternalProject)
ExternalProject_Add(psimd
ExternalProject_Add(
psimd
GIT_REPOSITORY https://github.com/Maratyszcza/psimd.git
GIT_TAG master
SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/psimd"
@ -17,5 +17,4 @@ ExternalProject_Add(psimd
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
TEST_COMMAND "")

View File

@ -1,15 +1,15 @@
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
# This source code is licensed under the BSD-style license found in the LICENSE
# file in the root directory of this source tree.
cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR)
project(pthreadpool-download NONE)
include(ExternalProject)
ExternalProject_Add(pthreadpool
ExternalProject_Add(
pthreadpool
GIT_REPOSITORY https://github.com/Maratyszcza/pthreadpool.git
GIT_TAG master
SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/pthreadpool"
@ -17,5 +17,4 @@ ExternalProject_Add(pthreadpool
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
TEST_COMMAND "")

View File

@ -1,63 +1,92 @@
# generate a list of kernels, but not actually emit files at config stage
execute_process(
COMMAND python3 ${CMAKE_SOURCE_DIR}/third_party/composable_kernel/example/ck_tile/01_fmha/generate.py
--api fwd --receipt 600 --list_blobs ${CMAKE_CURRENT_LIST_DIR}/fwd_blob_list.txt
RESULT_VARIABLE ret
)
COMMAND
python3
${CMAKE_SOURCE_DIR}/third_party/composable_kernel/example/ck_tile/01_fmha/generate.py
--api fwd --receipt 600 --list_blobs
${CMAKE_CURRENT_LIST_DIR}/fwd_blob_list.txt
RESULT_VARIABLE ret)
if(ret AND NOT ret EQUAL 0)
message( FATAL_ERROR "CK Tile FMHA FAILED to generate a list of FWD kernels via Python.")
message(
FATAL_ERROR
"CK Tile FMHA FAILED to generate a list of FWD kernels via Python.")
endif()
execute_process(
COMMAND python3 ${CMAKE_SOURCE_DIR}/third_party/composable_kernel/example/ck_tile/01_fmha/generate.py
--api bwd --receipt 600 --list_blobs ${CMAKE_CURRENT_LIST_DIR}/bwd_blob_list.txt
RESULT_VARIABLE ret
)
COMMAND
python3
${CMAKE_SOURCE_DIR}/third_party/composable_kernel/example/ck_tile/01_fmha/generate.py
--api bwd --receipt 600 --list_blobs
${CMAKE_CURRENT_LIST_DIR}/bwd_blob_list.txt
RESULT_VARIABLE ret)
if(ret AND NOT ret EQUAL 0)
message( FATAL_ERROR "CK Tile FMHA FAILED to generate a list of BWD kernels via Python.")
message(
FATAL_ERROR
"CK Tile FMHA FAILED to generate a list of BWD kernels via Python.")
endif()
# Generate the files for both fwd and bwd
execute_process(COMMAND python3 ${CMAKE_SOURCE_DIR}/third_party/composable_kernel/example/ck_tile/01_fmha/generate.py --api fwd --receipt 600 --output_dir ${CMAKE_CURRENT_LIST_DIR}
)
execute_process(
COMMAND
python3
${CMAKE_SOURCE_DIR}/third_party/composable_kernel/example/ck_tile/01_fmha/generate.py
--api fwd --receipt 600 --output_dir ${CMAKE_CURRENT_LIST_DIR})
if(ret AND NOT ret EQUAL 0)
message( FATAL_ERROR "CK Tile FMHA FAILED to generate FWD kernels.")
message(FATAL_ERROR "CK Tile FMHA FAILED to generate FWD kernels.")
endif()
execute_process(COMMAND python3 ${CMAKE_SOURCE_DIR}/third_party/composable_kernel/example/ck_tile/01_fmha/generate.py --api bwd --receipt 600 --output_dir ${CMAKE_CURRENT_LIST_DIR}
RESULT_VARIABLE ret
)
execute_process(
COMMAND
python3
${CMAKE_SOURCE_DIR}/third_party/composable_kernel/example/ck_tile/01_fmha/generate.py
--api bwd --receipt 600 --output_dir ${CMAKE_CURRENT_LIST_DIR}
RESULT_VARIABLE ret)
if(ret AND NOT ret EQUAL 0)
message( FATAL_ERROR "CK Tile FMHA FAILED to generate BWD kernels.")
message(FATAL_ERROR "CK Tile FMHA FAILED to generate BWD kernels.")
endif()
# Change make_kernel to make_kernel_pt for fwd
execute_process(
COMMAND bash -c "${CMAKE_CURRENT_LIST_DIR}/add_make_kernel_pt.sh ${CMAKE_CURRENT_LIST_DIR}/fwd_blob_list.txt"
COMMAND
bash -c
"${CMAKE_CURRENT_LIST_DIR}/add_make_kernel_pt.sh ${CMAKE_CURRENT_LIST_DIR}/fwd_blob_list.txt"
RESULT_VARIABLE ret)
if(ret AND NOT ret EQUAL 0)
message( FATAL_ERROR "CK Tile FMHA FAILED to change make_kernel to make_kernel_pt for the fwd pass")
message(
FATAL_ERROR
"CK Tile FMHA FAILED to change make_kernel to make_kernel_pt for the fwd pass"
)
endif()
# Change make_kernel to make_kernel_pt for bwd
execute_process(
COMMAND bash -c "${CMAKE_CURRENT_LIST_DIR}/add_make_kernel_pt.sh ${CMAKE_CURRENT_LIST_DIR}/bwd_blob_list.txt"
COMMAND
bash -c
"${CMAKE_CURRENT_LIST_DIR}/add_make_kernel_pt.sh ${CMAKE_CURRENT_LIST_DIR}/bwd_blob_list.txt"
RESULT_VARIABLE ret)
if(ret AND NOT ret EQUAL 0)
message( FATAL_ERROR "CK Tile FMHA FAILED to change make_kernel to make_kernel_pt for the bwd pass")
message(
FATAL_ERROR
"CK Tile FMHA FAILED to change make_kernel to make_kernel_pt for the bwd pass"
)
endif()
# Change file extensions to .hip
execute_process(COMMAND bash -c "for file in ${CMAKE_CURRENT_LIST_DIR}/*.cpp; do mv -- \"$file\" \"\${file%.cpp}.hip\"; done"
RESULT_VARIABLE ret
)
execute_process(
COMMAND
bash -c
"for file in ${CMAKE_CURRENT_LIST_DIR}/*.cpp; do mv -- \"$file\" \"\${file%.cpp}.hip\"; done"
RESULT_VARIABLE ret)
if(ret AND NOT ret EQUAL 0)
message( FATAL_ERROR "CK Tile FMHA FAILED to change the generated instances extensions from .cpp to .hpp")
message(
FATAL_ERROR
"CK Tile FMHA FAILED to change the generated instances extensions from .cpp to .hpp"
)
endif()

View File

@ -2,19 +2,26 @@ include(CMakePrintHelpers)
# Generate AITER/CK Asm code
execute_process(
COMMAND python3 ${CMAKE_SOURCE_DIR}/third_party/aiter/csrc/py_itfs_cu/fmha_v3_bwd_kernel_generate.py --receipt 1 --output_dir ${CMAKE_CURRENT_LIST_DIR}
RESULT_VARIABLE ret
)
COMMAND
python3
${CMAKE_SOURCE_DIR}/third_party/aiter/csrc/py_itfs_cu/fmha_v3_bwd_kernel_generate.py
--receipt 1 --output_dir ${CMAKE_CURRENT_LIST_DIR}
RESULT_VARIABLE ret)
if(ret AND NOT ret EQUAL 0)
message( FATAL_ERROR "Failed to generate FAv3 CK Kernels")
message(FATAL_ERROR "Failed to generate FAv3 CK Kernels")
endif()
execute_process(
COMMAND python3 ${CMAKE_SOURCE_DIR}/third_party/aiter/csrc/cpp_itfs/mha_bwd_generate.py --receipt 3 --output_dir ${CMAKE_CURRENT_LIST_DIR}
RESULT_VARIABLE ret
)
COMMAND
python3
${CMAKE_SOURCE_DIR}/third_party/aiter/csrc/cpp_itfs/mha_bwd_generate.py
--receipt 3 --output_dir ${CMAKE_CURRENT_LIST_DIR}
RESULT_VARIABLE ret)
# Change file extensions to .hip
execute_process(COMMAND bash -c "for file in ${CMAKE_CURRENT_LIST_DIR}/*.cpp; do mv -- \"$file\" \"\${file%.cpp}.hip\"; done")
execute_process(
COMMAND
bash -c
"for file in ${CMAKE_CURRENT_LIST_DIR}/*.cpp; do mv -- \"$file\" \"\${file%.cpp}.hip\"; done"
)

View File

@ -3,19 +3,21 @@ if(PYTORCH_NNAPI_STANDALONE)
cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
project(pytorch_nnapi)
set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ standard whose features are requested to build this target.")
set(CMAKE_CXX_STANDARD
14
CACHE
STRING
"The C++ standard whose features are requested to build this target.")
find_package(Torch REQUIRED)
set(NNAPI_SRCS
nnapi_bind.cpp
nnapi_wrapper.cpp
nnapi_model_loader.cpp
)
set(NNAPI_SRCS nnapi_bind.cpp nnapi_wrapper.cpp nnapi_model_loader.cpp)
add_library(pytorch_nnapi SHARED ${NNAPI_SRCS})
target_link_libraries(pytorch_nnapi torch)
else()
# Building within the PyTorch tree.
file(GLOB ATen_NNAPI_SRCS "*.cpp")
set(ATen_NNAPI_SRCS ${ATen_NNAPI_SRCS} PARENT_SCOPE)
set(ATen_NNAPI_SRCS
${ATen_NNAPI_SRCS}
PARENT_SCOPE)
endif()

View File

@ -1,9 +1,16 @@
file(GLOB_RECURSE ATen_QUANTIZED_HEADERS "*.h")
file(GLOB_RECURSE ATen_QUANTIZED_SRCS "*.cpp")
file(GLOB_RECURSE ATen_QUANTIZED_TEST_SRCS "*_test.cpp")
EXCLUDE(ATen_QUANTIZED_SRCS "${ATen_QUANTIZED_SRCS}" ${ATen_QUANTIZED_TEST_SRCS})
exclude(ATen_QUANTIZED_SRCS "${ATen_QUANTIZED_SRCS}"
${ATen_QUANTIZED_TEST_SRCS})
# Pass to parent
set(ATen_QUANTIZED_HEADERS ${ATen_QUANTIZED_HEADERS} PARENT_SCOPE)
set(ATen_QUANTIZED_SRCS ${ATen_QUANTIZED_SRCS} PARENT_SCOPE)
set(ATen_QUANTIZED_TEST_SRCS ${ATen_QUANTIZED_TEST_SRCS} PARENT_SCOPE)
set(ATen_QUANTIZED_HEADERS
${ATen_QUANTIZED_HEADERS}
PARENT_SCOPE)
set(ATen_QUANTIZED_SRCS
${ATen_QUANTIZED_SRCS}
PARENT_SCOPE)
set(ATen_QUANTIZED_TEST_SRCS
${ATen_QUANTIZED_TEST_SRCS}
PARENT_SCOPE)

View File

@ -4,7 +4,9 @@ if(MSVC)
endif()
endif(MSVC)
list(APPEND ATen_CPU_TEST_SRCS
list(
APPEND
ATen_CPU_TEST_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/Dict_test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Dimname_test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/MaybeOwned_test.cpp
@ -49,11 +51,12 @@ list(APPEND ATen_CPU_TEST_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/weakref_test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/wrapdim_test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/xla_tensor_test.cpp
# Fix this.
# ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack_test.cpp
)
# Fix this. ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack_test.cpp
)
list(APPEND ATen_CUDA_TEST_SRCS
list(
APPEND
ATen_CUDA_TEST_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/cuda_allocator_test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cuda_apply_test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cuda_atomic_ops_test.cu
@ -76,10 +79,12 @@ list(APPEND ATen_CUDA_TEST_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/cuda_vectorized_test.cu)
if(CAFFE2_USE_CUDNN)
list(APPEND ATen_CUDA_TEST_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/cuda_cudnn_test.cpp)
${CMAKE_CURRENT_SOURCE_DIR}/cuda_cudnn_test.cpp)
endif()
list(APPEND ATen_HIP_TEST_SRCS
list(
APPEND
ATen_HIP_TEST_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/hip/hip_apply_test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/hip/hip_complex_math_test.hip
${CMAKE_CURRENT_SOURCE_DIR}/hip/hip_complex_test.hip
@ -92,45 +97,62 @@ list(APPEND ATen_HIP_TEST_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/hip/hip_packedtensoraccessor_test.hip
${CMAKE_CURRENT_SOURCE_DIR}/hip/hip_vectorized_test.hip)
# TODO: fix and enable these
# ${CMAKE_CURRENT_SOURCE_DIR}/hip/hip_tensor_interop_test.cpp
# ${CMAKE_CURRENT_SOURCE_DIR}/hip/hip_stream_test.cpp
# ${CMAKE_CURRENT_SOURCE_DIR}/hip/hip_tensor_interop_test.cpp
# ${CMAKE_CURRENT_SOURCE_DIR}/hip/hip_stream_test.cpp
list(APPEND ATen_VULKAN_TEST_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/vulkan_api_test.cpp)
${CMAKE_CURRENT_SOURCE_DIR}/vulkan_api_test.cpp)
list(APPEND ATen_MOBILE_TEST_SRCS
list(
APPEND
ATen_MOBILE_TEST_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/cpu_caching_allocator_test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cpu_profiling_allocator_test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/quantized_test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/vec_test_all_types.cpp)
list(APPEND ATen_VEC_TEST_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/vec_test_all_types.cpp
)
${CMAKE_CURRENT_SOURCE_DIR}/vec_test_all_types.cpp)
list(APPEND ATen_MPS_TEST_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/mps_test_print.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mps_test_allocator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mps_test_metal_library.cpp)
list(APPEND ATen_MPS_TEST_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/mps_test_print.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mps_test_allocator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mps_test_metal_library.cpp)
if(APPLE AND USE_MPS)
list(APPEND ATen_MPS_TEST_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/mps_test_objc_interface.mm)
${CMAKE_CURRENT_SOURCE_DIR}/mps_test_objc_interface.mm)
endif()
list(APPEND ATen_XPU_TEST_SRCS
list(
APPEND
ATen_XPU_TEST_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/xpu_caching_host_allocator_test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/xpu_device_test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/xpu_event_test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/xpu_generator_test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/xpu_reportMemoryUsage_test.cpp
)
${CMAKE_CURRENT_SOURCE_DIR}/xpu_reportMemoryUsage_test.cpp)
# ---[ Send the lists to the parent scope.
set(ATen_CPU_TEST_SRCS ${ATen_CPU_TEST_SRCS} PARENT_SCOPE)
set(ATen_CUDA_TEST_SRCS ${ATen_CUDA_TEST_SRCS} PARENT_SCOPE)
set(ATen_HIP_TEST_SRCS ${ATen_HIP_TEST_SRCS} PARENT_SCOPE)
set(ATen_VULKAN_TEST_SRCS ${ATen_VULKAN_TEST_SRCS} PARENT_SCOPE)
set(ATen_MOBILE_TEST_SRCS ${ATen_MOBILE_TEST_SRCS} PARENT_SCOPE)
set(ATen_VEC_TEST_SRCS ${ATen_VEC_TEST_SRCS} PARENT_SCOPE)
set(ATen_MPS_TEST_SRCS ${ATen_MPS_TEST_SRCS} PARENT_SCOPE)
set(ATen_XPU_TEST_SRCS ${ATen_XPU_TEST_SRCS} PARENT_SCOPE)
set(ATen_CPU_TEST_SRCS
${ATen_CPU_TEST_SRCS}
PARENT_SCOPE)
set(ATen_CUDA_TEST_SRCS
${ATen_CUDA_TEST_SRCS}
PARENT_SCOPE)
set(ATen_HIP_TEST_SRCS
${ATen_HIP_TEST_SRCS}
PARENT_SCOPE)
set(ATen_VULKAN_TEST_SRCS
${ATen_VULKAN_TEST_SRCS}
PARENT_SCOPE)
set(ATen_MOBILE_TEST_SRCS
${ATen_MOBILE_TEST_SRCS}
PARENT_SCOPE)
set(ATen_VEC_TEST_SRCS
${ATen_VEC_TEST_SRCS}
PARENT_SCOPE)
set(ATen_MPS_TEST_SRCS
${ATen_MPS_TEST_SRCS}
PARENT_SCOPE)
set(ATen_XPU_TEST_SRCS
${ATen_XPU_TEST_SRCS}
PARENT_SCOPE)

View File

@ -4,7 +4,7 @@ include_directories(${ATEN_INCLUDE_DIR})
# C++17
if(not MSVC)
set(CMAKE_CXX_FLAGS "--std=c++17 ${CMAKE_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "--std=c++17 ${CMAKE_CXX_FLAGS}")
endif()
add_executable(main main.cpp)
target_link_libraries(main ${ATEN_LIBRARIES})

View File

@ -1,8 +1,6 @@
set(ATen_CUDA_INCLUDE ${ATen_CUDA_INCLUDE}
"${CMAKE_CURRENT_SOURCE_DIR}"
PARENT_SCOPE)
set(ATen_CUDA_INCLUDE
${ATen_CUDA_INCLUDE} "${CMAKE_CURRENT_SOURCE_DIR}"
PARENT_SCOPE)
install(FILES
THCAtomics.cuh
THCDeviceUtils.cuh
DESTINATION "${ATEN_INSTALL_INCLUDE_SUBDIR}/THC")
install(FILES THCAtomics.cuh THCDeviceUtils.cuh
DESTINATION "${ATEN_INSTALL_INCLUDE_SUBDIR}/THC")

View File

@ -1,10 +1,20 @@
list(APPEND STATIC_RUNTIME_BENCHMARK_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/deep_wide_pt.cc)
list(APPEND STATIC_RUNTIME_BENCHMARK_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/deep_wide_pt_bench.cc)
set(STATIC_RUNTIME_BENCHMARK_SRCS ${STATIC_RUNTIME_BENCHMARK_SRCS} PARENT_SCOPE)
list(APPEND STATIC_RUNTIME_BENCHMARK_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/deep_wide_pt.cc)
list(APPEND STATIC_RUNTIME_BENCHMARK_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/deep_wide_pt_bench.cc)
set(STATIC_RUNTIME_BENCHMARK_SRCS
${STATIC_RUNTIME_BENCHMARK_SRCS}
PARENT_SCOPE)
list(APPEND STATIC_RUNTIME_TEST_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/deep_wide_pt.cc)
list(APPEND STATIC_RUNTIME_TEST_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/deep_wide_pt.cc)
list(APPEND STATIC_RUNTIME_TEST_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/test_utils.cc)
list(APPEND STATIC_RUNTIME_TEST_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/test_static_runtime.cc)
list(APPEND STATIC_RUNTIME_TEST_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/test_static_module.cc)
list(APPEND STATIC_RUNTIME_TEST_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/test_generated_ops.cc)
set(STATIC_RUNTIME_TEST_SRCS ${STATIC_RUNTIME_TEST_SRCS} PARENT_SCOPE)
list(APPEND STATIC_RUNTIME_TEST_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/test_static_runtime.cc)
list(APPEND STATIC_RUNTIME_TEST_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/test_static_module.cc)
list(APPEND STATIC_RUNTIME_TEST_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/test_generated_ops.cc)
set(STATIC_RUNTIME_TEST_SRCS
${STATIC_RUNTIME_TEST_SRCS}
PARENT_SCOPE)

View File

@ -8,12 +8,12 @@ if(INTERN_BUILD_MOBILE)
endif()
caffe2_binary_target("parallel_info.cc")
target_include_directories(parallel_info PUBLIC
${CMAKE_BINARY_DIR}/aten/src) # provides "ATen/TypeExtendedInterface.h" to ATen.h
target_include_directories(parallel_info PUBLIC ${CMAKE_BINARY_DIR}/aten/src
)# provides "ATen/TypeExtendedInterface.h" to ATen.h
caffe2_binary_target("record_function_benchmark.cc")
target_include_directories(record_function_benchmark PUBLIC
${CMAKE_BINARY_DIR}/aten/src)
target_include_directories(record_function_benchmark
PUBLIC ${CMAKE_BINARY_DIR}/aten/src)
caffe2_binary_target("speed_benchmark_torch.cc")
caffe2_binary_target("compare_models_torch.cc")

View File

@ -1,55 +1,65 @@
cmake_minimum_required(VERSION 3.27 FATAL_ERROR)
project(c10 CXX)
set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.")
set(CMAKE_CXX_STANDARD
17
CACHE STRING
"The C++ standard whose features are requested to build this target.")
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
# Main build file for the C10 library.
#
# Note that the C10 library should maintain minimal dependencies - especially,
# it should not depend on any library that is implementation specific or
# backend specific. It should in particular NOT be dependent on any generated
# protobuf header files, because protobuf header files will transitively force
# one to link against a specific protobuf version.
# it should not depend on any library that is implementation specific or backend
# specific. It should in particular NOT be dependent on any generated protobuf
# header files, because protobuf header files will transitively force one to
# link against a specific protobuf version.
if(BUILD_LIBTORCHLESS)
find_library(C10_LIB c10 PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
find_library(
C10_LIB c10
PATHS $ENV{LIBTORCH_LIB_PATH}
NO_DEFAULT_PATH)
else()
set(C10_LIB c10)
endif()
set(C10_USE_GFLAGS ${USE_GFLAGS}) # also used in torch/headeronly
set(C10_USE_GLOG ${USE_GLOG}) # also used in torch/headeronly
set(C10_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # also used in torch/headeronly
set(C10_USE_NUMA ${USE_NUMA}) # also used in torch/headeronly
set(C10_USE_MSVC_STATIC_RUNTIME ${CAFFE2_USE_MSVC_STATIC_RUNTIME}) # also used in torch/headeronly
set(C10_USE_ROCM_KERNEL_ASSERT ${USE_ROCM_KERNEL_ASSERT}) # also used in torch/headeronly
set(C10_USE_GFLAGS ${USE_GFLAGS}) # also used in torch/headeronly
set(C10_USE_GLOG ${USE_GLOG}) # also used in torch/headeronly
set(C10_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # also used in torch/headeronly
set(C10_USE_NUMA ${USE_NUMA}) # also used in torch/headeronly
set(C10_USE_MSVC_STATIC_RUNTIME ${CAFFE2_USE_MSVC_STATIC_RUNTIME}
)# also used in torch/headeronly
set(C10_USE_ROCM_KERNEL_ASSERT ${USE_ROCM_KERNEL_ASSERT}) # also used in
# torch/headeronly
# Note: if you want to add ANY dependency to the c10 library, make sure you
# check with the core PyTorch developers as the dependency will be
# transitively passed on to all libraries dependent on PyTorch.
file(GLOB C10_SRCS
*.cpp
core/*.cpp
core/impl/*.cpp
mobile/*.cpp
macros/*.cpp
util/*.cpp
)
file(GLOB C10_HEADERS
*.h
core/*.h
core/impl/*.h
mobile/*.h
macros/*.h
util/*.h
)
# Note: if you want to add ANY dependency to the c10 library, make sure you
# check with the core PyTorch developers as the dependency will be transitively
# passed on to all libraries dependent on PyTorch.
file(
GLOB
C10_SRCS
*.cpp
core/*.cpp
core/impl/*.cpp
mobile/*.cpp
macros/*.cpp
util/*.cpp)
file(
GLOB
C10_HEADERS
*.h
core/*.h
core/impl/*.h
mobile/*.h
macros/*.h
util/*.h)
if(NOT BUILD_LIBTORCHLESS)
add_library(c10 ${C10_SRCS} ${C10_HEADERS})
torch_compile_options(c10)
if(HAVE_SOVERSION)
set_target_properties(c10 PROPERTIES
VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION})
set_target_properties(c10 PROPERTIES VERSION ${TORCH_VERSION}
SOVERSION ${TORCH_SOVERSION})
endif()
# If building shared library, set dllimport/dllexport proper.
target_compile_options(c10 PRIVATE "-DC10_BUILD_MAIN_LIB")
@ -58,7 +68,8 @@ if(NOT BUILD_LIBTORCHLESS)
target_compile_options(c10 PRIVATE "-fvisibility=hidden")
endif()
option(C10_USE_IWYU "Use include-what-you-use to clean up header inclusion" OFF)
option(C10_USE_IWYU "Use include-what-you-use to clean up header inclusion"
OFF)
if(C10_USE_IWYU)
find_program(iwyu NAMES include-what-you-use)
if(iwyu)
@ -71,8 +82,7 @@ if(NOT BUILD_LIBTORCHLESS)
"-Xiwyu"
"--prefix_header_includes=keep"
"-Xiwyu"
"--mapping_file=${CMAKE_CURRENT_LIST_DIR}/../tools/iwyu/all.imp"
)
"--mapping_file=${CMAKE_CURRENT_LIST_DIR}/../tools/iwyu/all.imp")
set_property(TARGET c10 PROPERTY CXX_INCLUDE_WHAT_YOU_USE ${iwyu_cmd})
endif()
endif()
@ -106,7 +116,8 @@ if(NOT BUILD_LIBTORCHLESS)
message(STATUS "don't use NUMA")
endif()
if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "s390x" AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64le")
if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "s390x" AND NOT CMAKE_SYSTEM_PROCESSOR
MATCHES "ppc64le")
target_link_libraries(c10 PRIVATE cpuinfo)
endif()
@ -134,10 +145,9 @@ if(NOT BUILD_LIBTORCHLESS)
endif()
target_include_directories(
c10 PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../>
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
$<INSTALL_INTERFACE:include>)
c10
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../>
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}> $<INSTALL_INTERFACE:include>)
endif()
add_subdirectory(test)
@ -148,8 +158,7 @@ if(USE_CUDA)
endif()
if(USE_ROCM)
# NB: This directory is generated by the HIPIFY script; it's
# not checked in
# NB: This directory is generated by the HIPIFY script; it's not checked in
add_subdirectory(hip)
endif()
@ -158,17 +167,25 @@ if(USE_XPU)
endif()
if(NOT BUILD_LIBTORCHLESS)
# ---[ Installation
# Note: for now, we will put all export path into one single Caffe2Targets group
# to deal with the cmake deployment need. Inside the Caffe2Targets set, the
# individual libraries like libc10.so and libcaffe2.so are still self-contained.
install(TARGETS c10 EXPORT Caffe2Targets DESTINATION lib)
# ---[ Installation Note: for now, we will put all export path into one single
# Caffe2Targets group to deal with the cmake deployment need. Inside the
# Caffe2Targets set, the individual libraries like libc10.so and libcaffe2.so
# are still self-contained.
install(
TARGETS c10
EXPORT Caffe2Targets
DESTINATION lib)
endif()
install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
DESTINATION include
FILES_MATCHING PATTERN "*.h")
install(
DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
DESTINATION include
FILES_MATCHING
PATTERN "*.h")
if(MSVC AND C10_BUILD_SHARED_LIBS)
install(FILES $<TARGET_PDB_FILE:c10> DESTINATION lib OPTIONAL)
install(
FILES $<TARGET_PDB_FILE:c10>
DESTINATION lib
OPTIONAL)
endif()

View File

@ -8,7 +8,10 @@ if(BUILD_TEST)
add_executable(${bench_name} "${bench_src}")
target_link_libraries(${bench_name} ${C10_LIB} benchmark)
if(INSTALL_TEST)
set_target_properties(${bench_name} PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
set_target_properties(
${bench_name}
PROPERTIES INSTALL_RPATH
"${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
install(TARGETS ${bench_name} DESTINATION test)
endif()
endforeach()

View File

@ -8,23 +8,24 @@ include(../../cmake/public/cuda.cmake)
# ---[ Configure macro file.
set(C10_CUDA_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # used in cmake_macros.h.in
# Probably have to do this :(
configure_file(
${CMAKE_CURRENT_LIST_DIR}/impl/cuda_cmake_macros.h.in
${CMAKE_BINARY_DIR}/c10/cuda/impl/cuda_cmake_macros.h)
configure_file(${CMAKE_CURRENT_LIST_DIR}/impl/cuda_cmake_macros.h.in
${CMAKE_BINARY_DIR}/c10/cuda/impl/cuda_cmake_macros.h)
if(BUILD_LIBTORCHLESS)
find_library(C10_CUDA_LIB c10_cuda PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
find_library(
C10_CUDA_LIB c10_cuda
PATHS $ENV{LIBTORCH_LIB_PATH}
NO_DEFAULT_PATH)
else()
set(C10_CUDA_LIB c10_cuda)
endif()
# Note: if you want to add ANY dependency to the c10 library, make sure you
# check with the core PyTorch developers as the dependency will be
# transitively passed on to all libraries dependent on PyTorch.
# check with the core PyTorch developers as the dependency will be transitively
# passed on to all libraries dependent on PyTorch.
# Note: if you add a new source file/header, you will need to update
# torch/utils/hipify/cuda_to_hip_mappings.py for new files
# and headers you add
# torch/utils/hipify/cuda_to_hip_mappings.py for new files and headers you add
set(C10_CUDA_SRCS
CUDAAllocatorConfig.cpp
CUDACachingAllocator.cpp
@ -36,8 +37,7 @@ set(C10_CUDA_SRCS
CUDAStream.cpp
impl/CUDAGuardImpl.cpp
impl/CUDATest.cpp
driver_api.cpp
)
driver_api.cpp)
set(C10_CUDA_HEADERS
CUDAAllocatorConfig.h
CUDACachingAllocator.h
@ -50,8 +50,7 @@ set(C10_CUDA_HEADERS
CUDAMiscFunctions.h
CUDAStream.h
impl/CUDAGuardImpl.h
impl/CUDATest.h
)
impl/CUDATest.h)
set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE)
if(NOT BUILD_LIBTORCHLESS)
@ -69,33 +68,39 @@ if(NOT BUILD_LIBTORCHLESS)
target_link_libraries(c10_cuda PUBLIC ${C10_LIB} torch::cudart)
if(NOT WIN32)
target_link_libraries(c10_cuda PRIVATE dl)
target_compile_options(c10_cuda PRIVATE "-DPYTORCH_C10_DRIVER_API_SUPPORTED")
target_link_libraries(c10_cuda PRIVATE dl)
target_compile_options(c10_cuda
PRIVATE "-DPYTORCH_C10_DRIVER_API_SUPPORTED")
endif()
target_include_directories(
c10_cuda PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../..>
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
$<INSTALL_INTERFACE:include>)
c10_cuda
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../..>
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}> $<INSTALL_INTERFACE:include>)
# ---[ Installation
# Note: for now, we will put all export path into one single Caffe2Targets group
# to deal with the cmake deployment need. Inside the Caffe2Targets set, the
# individual libraries like libc10.so and libcaffe2.so are still self-contained.
install(TARGETS c10_cuda EXPORT Caffe2Targets DESTINATION lib)
# ---[ Installation Note: for now, we will put all export path into one single
# Caffe2Targets group to deal with the cmake deployment need. Inside the
# Caffe2Targets set, the individual libraries like libc10.so and libcaffe2.so
# are still self-contained.
install(
TARGETS c10_cuda
EXPORT Caffe2Targets
DESTINATION lib)
endif()
add_subdirectory(test)
foreach(file ${C10_CUDA_HEADERS})
get_filename_component( dir ${file} DIRECTORY )
install( FILES ${file} DESTINATION include/c10/cuda/${dir} )
get_filename_component(dir ${file} DIRECTORY)
install(FILES ${file} DESTINATION include/c10/cuda/${dir})
endforeach()
install(FILES ${CMAKE_BINARY_DIR}/c10/cuda/impl/cuda_cmake_macros.h
DESTINATION include/c10/cuda/impl)
DESTINATION include/c10/cuda/impl)
if(MSVC AND C10_CUDA_BUILD_SHARED_LIBS)
install(FILES $<TARGET_PDB_FILE:c10_cuda> DESTINATION lib OPTIONAL)
install(
FILES $<TARGET_PDB_FILE:c10_cuda>
DESTINATION lib
OPTIONAL)
endif()

View File

@ -8,27 +8,33 @@ set(C10_CUDA_ALL_TEST_FILES
impl/CUDAAssertionsTest_multiple_writes_from_blocks_and_threads.cu
impl/CUDAAssertionsTest_multiple_writes_from_multiple_blocks.cu
impl/CUDAAssertionsTest_multiple_writes_from_same_block.cu
impl/CUDATest.cpp
)
impl/CUDATest.cpp)
if(BUILD_TEST)
foreach(test_src ${C10_CUDA_ALL_TEST_FILES})
get_filename_component(test_file_name ${test_src} NAME_WE)
set(test_name "c10_cuda_${test_file_name}")
if(WIN32 AND test_src MATCHES "^.*\.hip$")
set_source_files_properties(${test_src} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
set_source_files_properties(${test_src}
PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
hip_add_executable(${test_name} "${test_src}")
list(JOIN PYTORCH_ROCM_ARCH " " ROCM_PROPERTY_ARCH_LIST)
set_target_properties(${test_name} PROPERTIES LINKER_LANGUAGE CXX HIP_ARCHITECTURES ${ROCM_PROPERTY_ARCH_LIST})
set_target_properties(
${test_name} PROPERTIES LINKER_LANGUAGE CXX HIP_ARCHITECTURES
${ROCM_PROPERTY_ARCH_LIST})
else()
add_executable(${test_name} "${test_src}")
endif()
if(test_src MATCHES "^.*\.hip$")
set_target_properties(${test_name} PROPERTIES LINKER_LANGUAGE CXX)
endif()
target_link_libraries(${test_name} ${C10_CUDA_LIB} ${C10_LIB} gmock gtest gtest_main)
target_link_libraries(${test_name} ${C10_CUDA_LIB} ${C10_LIB} gmock gtest
gtest_main)
add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
if(INSTALL_TEST)
set_target_properties(${test_name} PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
set_target_properties(
${test_name}
PROPERTIES INSTALL_RPATH
"${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
install(TARGETS ${test_name} DESTINATION test)
endif()
endforeach()

View File

@ -1,33 +1,31 @@
# Build file for the C10 HIP. This directory doesn't actually contain
# any files; these files are copied over by the HIPIFY script in tools/amd_build
# Build file for the C10 HIP. This directory doesn't actually contain any
# files; these files are copied over by the HIPIFY script in tools/amd_build
#
# At the moment, this cmake is NOT standalone
include(../../cmake/public/utils.cmake)
if(BUILD_LIBTORCHLESS)
find_library(C10_HIP_LIB c10_hip PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
find_library(
C10_HIP_LIB c10_hip
PATHS $ENV{LIBTORCH_LIB_PATH}
NO_DEFAULT_PATH)
endif()
# ---[ Configure macro file.
set(C10_HIP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # used in cmake_macros.h.in
configure_file(
${CMAKE_CURRENT_LIST_DIR}/impl/hip_cmake_macros.h.in
${CMAKE_BINARY_DIR}/c10/hip/impl/hip_cmake_macros.h)
configure_file(${CMAKE_CURRENT_LIST_DIR}/impl/hip_cmake_macros.h.in
${CMAKE_BINARY_DIR}/c10/hip/impl/hip_cmake_macros.h)
# NB: All previous cu files are renamed into cc files. This isn't tested at the
# moment.
file(GLOB C10_HIP_SRCS
*.cpp
*.cc
impl/*.cpp
impl/*.cc
)
file(GLOB C10_HIP_SRCS *.cpp *.cc impl/*.cpp impl/*.cc)
# Mark the cc files as HIP files, so we call the compiler. (They have to be
# suffixed with cc, because the hcc compiler won't accept them otherwise.)
file(GLOB __c10_hip_srcs_cpp *.cc impl/*.cc)
set_source_files_properties(${__c10_hip_srcs_cpp} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
set_source_files_properties(${__c10_hip_srcs_cpp}
PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
file(GLOB_RECURSE C10_HIP_HEADERS *.h)
@ -38,8 +36,9 @@ if(NOT BUILD_LIBTORCHLESS)
# Propagate HIP_CXX_FLAGS that were set from Dependencies.cmake
target_compile_options(c10_hip PRIVATE ${HIP_CXX_FLAGS})
# caffe2_hip adds a bunch of dependencies like rocsparse, but c10/hip is supposed to be
# minimal. I'm not sure if we need hip_hcc or not; for now leave it out
# caffe2_hip adds a bunch of dependencies like rocsparse, but c10/hip is
# supposed to be minimal. I'm not sure if we need hip_hcc or not; for now
# leave it out
# If building shared library, set dllimport/dllexport proper.
target_compile_options(c10_hip PRIVATE "-DC10_HIP_BUILD_MAIN_LIB")
@ -52,19 +51,23 @@ if(NOT BUILD_LIBTORCHLESS)
target_link_libraries(c10_hip PUBLIC ${C10_LIB} hip::amdhip64)
target_include_directories(
c10_hip PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../..>
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
$<INSTALL_INTERFACE:include>)
install(TARGETS c10_hip EXPORT Caffe2Targets DESTINATION lib)
c10_hip
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../..>
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}> $<INSTALL_INTERFACE:include>)
install(
TARGETS c10_hip
EXPORT Caffe2Targets
DESTINATION lib)
set(C10_HIP_LIB c10_hip)
endif()
add_subdirectory(test)
# ---[ Installation
install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
DESTINATION include
FILES_MATCHING PATTERN "*.h")
install(
DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
DESTINATION include
FILES_MATCHING
PATTERN "*.h")
install(FILES ${CMAKE_BINARY_DIR}/c10/hip/impl/hip_cmake_macros.h
DESTINATION include/c10/hip/impl)
DESTINATION include/c10/hip/impl)

View File

@ -12,7 +12,10 @@ if(BUILD_TEST)
target_link_libraries(${test_name} ${C10_LIB} gmock gtest gtest_main)
add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
if(INSTALL_TEST)
set_target_properties(${test_name} PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
set_target_properties(
${test_name}
PROPERTIES INSTALL_RPATH
"${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
install(TARGETS ${test_name} DESTINATION test)
endif()
endforeach()

View File

@ -5,21 +5,20 @@
include(../../cmake/public/xpu.cmake)
if(NOT BUILD_LIBTORCHLESS)
find_library(C10_XPU_LIB c10_xpu PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
find_library(
C10_XPU_LIB c10_xpu
PATHS $ENV{LIBTORCH_LIB_PATH}
NO_DEFAULT_PATH)
endif()
# ---[ Configure macro file.
set(C10_XPU_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # used in xpu_cmake_macros.h.in
configure_file(
${CMAKE_CURRENT_LIST_DIR}/impl/xpu_cmake_macros.h.in
${CMAKE_BINARY_DIR}/c10/xpu/impl/xpu_cmake_macros.h)
set(C10_XPU_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # used in
# xpu_cmake_macros.h.in
configure_file(${CMAKE_CURRENT_LIST_DIR}/impl/xpu_cmake_macros.h.in
${CMAKE_BINARY_DIR}/c10/xpu/impl/xpu_cmake_macros.h)
set(C10_XPU_SRCS
XPUCachingAllocator.cpp
XPUFunctions.cpp
XPUStream.cpp
impl/XPUGuardImpl.cpp
)
set(C10_XPU_SRCS XPUCachingAllocator.cpp XPUFunctions.cpp XPUStream.cpp
impl/XPUGuardImpl.cpp)
set(C10_XPU_HEADERS
XPUCachingAllocator.h
XPUDeviceProp.h
@ -27,8 +26,7 @@ set(C10_XPU_HEADERS
XPUFunctions.h
XPUMacros.h
XPUStream.h
impl/XPUGuardImpl.h
)
impl/XPUGuardImpl.h)
if(NOT BUILD_LIBTORCHLESS)
add_library(c10_xpu ${C10_XPU_SRCS} ${C10_XPU_HEADERS})
torch_compile_options(c10_xpu)
@ -41,12 +39,13 @@ if(NOT BUILD_LIBTORCHLESS)
# ---[ Dependency of c10_xpu
target_link_libraries(c10_xpu PUBLIC c10 torch::xpurt)
target_include_directories(
c10_xpu PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../..>
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
$<INSTALL_INTERFACE:include>
)
install(TARGETS c10_xpu EXPORT Caffe2Targets DESTINATION lib)
c10_xpu
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../..>
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}> $<INSTALL_INTERFACE:include>)
install(
TARGETS c10_xpu
EXPORT Caffe2Targets
DESTINATION lib)
set(C10_XPU_LIB c10_xpu)
add_subdirectory(test)
endif()
@ -58,8 +57,11 @@ foreach(file ${C10_XPU_HEADERS})
install(FILES ${file} DESTINATION include/c10/xpu/${dir})
endforeach()
install(FILES ${CMAKE_BINARY_DIR}/c10/xpu/impl/xpu_cmake_macros.h
DESTINATION include/c10/xpu/impl)
DESTINATION include/c10/xpu/impl)
if(MSVC AND C10_XPU_BUILD_SHARED_LIBS)
install(FILES $<TARGET_PDB_FILE:c10_xpu> DESTINATION lib OPTIONAL)
install(
FILES $<TARGET_PDB_FILE:c10_xpu>
DESTINATION lib
OPTIONAL)
endif()

View File

@ -1,11 +1,8 @@
# ---[ Test binaries.
set(C10_XPU_ALL_TEST_FILES
impl/XPUCachingAllocatorTest.cpp
impl/XPUDeviceTest.cpp
impl/XPUGuardTest.cpp
impl/XPUStreamTest.cpp
)
impl/XPUCachingAllocatorTest.cpp impl/XPUDeviceTest.cpp
impl/XPUGuardTest.cpp impl/XPUStreamTest.cpp)
if(BUILD_TEST)
foreach(test_src ${C10_XPU_ALL_TEST_FILES})
get_filename_component(test_file_name ${test_src} NAME_WE)
@ -14,7 +11,10 @@ if(BUILD_TEST)
target_link_libraries(${test_name} ${C10_XPU_LIB} gmock gtest gtest_main)
add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
if(INSTALL_TEST)
set_target_properties(${test_name} PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
set_target_properties(
${test_name}
PROPERTIES INSTALL_RPATH
"${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
install(TARGETS ${test_name} DESTINATION test)
endif()
endforeach()

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
list(APPEND Caffe2_CPU_SRCS
"${CMAKE_CURRENT_SOURCE_DIR}/common.cc"
)
set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} PARENT_SCOPE)
list(APPEND Caffe2_CPU_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/common.cc")
set(Caffe2_CPU_SRCS
${Caffe2_CPU_SRCS}
PARENT_SCOPE)

View File

@ -1,8 +1,9 @@
if(INTERN_BUILD_MOBILE)
list(APPEND Caffe2_CPU_SRCS
"${CMAKE_CURRENT_SOURCE_DIR}/embedding_lookup_idx.cc"
)
set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} PARENT_SCOPE)
"${CMAKE_CURRENT_SOURCE_DIR}/embedding_lookup_idx.cc")
set(Caffe2_CPU_SRCS
${Caffe2_CPU_SRCS}
PARENT_SCOPE)
return()
endif()
@ -28,21 +29,22 @@ if(CXX_AVX2_FOUND)
target_link_libraries(Caffe2_perfkernels_avx2 PRIVATE c10)
if(MSVC AND NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
target_compile_options(Caffe2_perfkernels_avx2
PRIVATE "/arch:AVX2"
PRIVATE "/D__FMA__"
PRIVATE "/D__F16C__")
target_compile_options(
Caffe2_perfkernels_avx2
PRIVATE "/arch:AVX2"
PRIVATE "/D__FMA__"
PRIVATE "/D__F16C__")
else()
target_compile_options(Caffe2_perfkernels_avx2
PRIVATE "-mavx2"
PRIVATE "-mfma"
PRIVATE "-mavx"
PRIVATE "-mf16c")
target_compile_options(
Caffe2_perfkernels_avx2
PRIVATE "-mavx2"
PRIVATE "-mfma"
PRIVATE "-mavx"
PRIVATE "-mf16c")
endif()
caffe2_interface_library(
Caffe2_perfkernels_avx2 Caffe2_perfkernels_avx2_interface)
list(APPEND
Caffe2_DEPENDENCY_WHOLE_LINK_LIBS
caffe2_interface_library(Caffe2_perfkernels_avx2
Caffe2_perfkernels_avx2_interface)
list(APPEND Caffe2_DEPENDENCY_WHOLE_LINK_LIBS
"Caffe2_perfkernels_avx2_interface")
endif()
@ -52,23 +54,25 @@ if(CXX_SVE_FOUND)
add_library(Caffe2_perfkernels_sve STATIC ${sve_srcs})
target_link_libraries(Caffe2_perfkernels_sve PRIVATE c10)
install(TARGETS Caffe2_perfkernels_sve
ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}")
ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}")
target_compile_options(Caffe2_perfkernels_sve PRIVATE "-march=armv8-a+sve")
caffe2_interface_library(
Caffe2_perfkernels_sve Caffe2_perfkernels_sve_interface)
list(APPEND
Caffe2_DEPENDENCY_WHOLE_LINK_LIBS "Caffe2_perfkernels_sve_interface")
caffe2_interface_library(Caffe2_perfkernels_sve
Caffe2_perfkernels_sve_interface)
list(APPEND Caffe2_DEPENDENCY_WHOLE_LINK_LIBS
"Caffe2_perfkernels_sve_interface")
endif()
# TODO(jiayq): currently, we only implement the very base files for the
# perfkernels. This is because to implement avx and avx2 files, we actually
# need to set up different compilation units and this is a bit more involving
# in terms of CMakefile changes. This is a stop-gap solution until we get a
# more proper implementation.
# perfkernels. This is because to implement avx and avx2 files, we actually need
# to set up different compilation units and this is a bit more involving in
# terms of CMakefile changes. This is a stop-gap solution until we get a more
# proper implementation.
set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} PARENT_SCOPE)
set(Caffe2_CPU_SRCS
${Caffe2_CPU_SRCS}
PARENT_SCOPE)
set(Caffe2_DEPENDENCY_WHOLE_LINK_LIBS
${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS}
PARENT_SCOPE)

View File

@ -1,7 +1,9 @@
file(GLOB tmp *_test.cc)
set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} ${tmp})
list(APPEND Caffe2_CPU_SRCS
list(
APPEND
Caffe2_CPU_SRCS
${PROJECT_SOURCE_DIR}/third_party/miniz-3.0.2/miniz.c
${CMAKE_CURRENT_SOURCE_DIR}/inline_container.cc
${CMAKE_CURRENT_SOURCE_DIR}/istream_adapter.cc
@ -10,6 +12,12 @@ list(APPEND Caffe2_CPU_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/read_adapter_interface.cc)
list(APPEND Caffe2_CPU_INCLUDE ${PROJECT_SOURCE_DIR}/third_party/miniz-3.0.2)
set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} PARENT_SCOPE)
set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} PARENT_SCOPE)
set(Caffe2_CPU_INCLUDE ${Caffe2_CPU_INCLUDE} PARENT_SCOPE)
set(Caffe2_CPU_TEST_SRCS
${Caffe2_CPU_TEST_SRCS}
PARENT_SCOPE)
set(Caffe2_CPU_SRCS
${Caffe2_CPU_SRCS}
PARENT_SCOPE)
set(Caffe2_CPU_INCLUDE
${Caffe2_CPU_INCLUDE}
PARENT_SCOPE)

View File

@ -1,18 +1,14 @@
list(APPEND Caffe2_CPU_SRCS
utils/string_utils.cc
utils/threadpool/ThreadPool.cc
)
list(APPEND Caffe2_CPU_SRCS utils/string_utils.cc
utils/threadpool/ThreadPool.cc)
if(USE_PTHREADPOOL)
list(APPEND Caffe2_CPU_SRCS
utils/threadpool/pthreadpool-cpp.cc
utils/threadpool/thread_pool_guard.cpp
)
list(APPEND Caffe2_CPU_SRCS utils/threadpool/pthreadpool-cpp.cc
utils/threadpool/thread_pool_guard.cpp)
endif()
if(NOT INTERN_BUILD_MOBILE)
list(APPEND Caffe2_CPU_SRCS
utils/proto_wrap.cc
)
list(APPEND Caffe2_CPU_SRCS utils/proto_wrap.cc)
endif()
set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} PARENT_SCOPE)
set(Caffe2_CPU_SRCS
${Caffe2_CPU_SRCS}
PARENT_SCOPE)

View File

@ -1,4 +1,3 @@
if(__caffe2_allowlist_included)
return()
endif()

View File

@ -1,26 +1,28 @@
# Push host architecture when cross-compiling otherwise check would fail
# when cross-compiling for arm64 on x86_64
# Push host architecture when cross-compiling otherwise check would fail when
# cross-compiling for arm64 on x86_64
cmake_push_check_state(RESET)
if(CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64)$")
if(CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_OSX_ARCHITECTURES MATCHES
"^(x86_64|arm64)$")
list(APPEND CMAKE_REQUIRED_FLAGS "-arch ${CMAKE_HOST_SYSTEM_PROCESSOR}")
endif()
# Set values through env variables if cross compiling
if(CMAKE_CROSSCOMPILING)
if("$ENV{PYTORCH_BLAS_F2C}" STREQUAL "ON")
SET(BLAS_F2C TRUE)
set(BLAS_F2C TRUE)
else()
SET(BLAS_F2C FALSE)
set(BLAS_F2C FALSE)
endif()
if("$ENV{PYTORCH_BLAS_USE_CBLAS_DOT}" STREQUAL "ON")
SET(BLAS_USE_CBLAS_DOT TRUE)
set(BLAS_USE_CBLAS_DOT TRUE)
else()
SET(BLAS_USE_CBLAS_DOT FALSE)
set(BLAS_USE_CBLAS_DOT FALSE)
endif()
else()
SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES})
CHECK_C_SOURCE_RUNS("
set(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES})
check_c_source_runs(
"
#include <stdlib.h>
#include <stdio.h>
float x[4] = { 1, 2, 3, 4 };
@ -32,8 +34,10 @@ int main() {
int i;
double r = sdot_(&four, x, &one, y, &one);
exit((float)r != (float).1234);
}" BLAS_F2C_DOUBLE_WORKS )
CHECK_C_SOURCE_RUNS("
}"
BLAS_F2C_DOUBLE_WORKS)
check_c_source_runs(
"
#include <stdlib.h>
#include <stdio.h>
float x[4] = { 1, 2, 3, 4 };
@ -45,15 +49,17 @@ int main() {
int i;
double r = sdot_(&four, x, &one, y, &one);
exit((float)r != (float).1234);
}" BLAS_F2C_FLOAT_WORKS )
}"
BLAS_F2C_FLOAT_WORKS)
if(BLAS_F2C_DOUBLE_WORKS AND NOT BLAS_F2C_FLOAT_WORKS)
MESSAGE(STATUS "This BLAS uses the F2C return conventions")
SET(BLAS_F2C TRUE)
message(STATUS "This BLAS uses the F2C return conventions")
set(BLAS_F2C TRUE)
else(BLAS_F2C_DOUBLE_WORKS AND NOT BLAS_F2C_FLOAT_WORKS)
SET(BLAS_F2C FALSE)
set(BLAS_F2C FALSE)
endif(BLAS_F2C_DOUBLE_WORKS AND NOT BLAS_F2C_FLOAT_WORKS)
CHECK_C_SOURCE_RUNS("
check_c_source_runs(
"
#include <stdlib.h>
#include <stdio.h>
float x[4] = { 1, 2, 3, 4 };
@ -63,14 +69,15 @@ int main() {
int i;
double r = cblas_sdot(4, x, 1, y, 1);
exit((float)r != (float).1234);
}" BLAS_USE_CBLAS_DOT )
}"
BLAS_USE_CBLAS_DOT)
if(BLAS_USE_CBLAS_DOT)
SET(BLAS_USE_CBLAS_DOT TRUE)
set(BLAS_USE_CBLAS_DOT TRUE)
else(BLAS_USE_CBLAS_DOT)
SET(BLAS_USE_CBLAS_DOT FALSE)
set(BLAS_USE_CBLAS_DOT FALSE)
endif(BLAS_USE_CBLAS_DOT)
SET(CMAKE_REQUIRED_LIBRARIES)
set(CMAKE_REQUIRED_LIBRARIES)
endif(CMAKE_CROSSCOMPILING)
MESSAGE(STATUS "BLAS_USE_CBLAS_DOT: ${BLAS_USE_CBLAS_DOT}")
MESSAGE(STATUS "BLAS_F2C: ${BLAS_F2C}")
message(STATUS "BLAS_USE_CBLAS_DOT: ${BLAS_USE_CBLAS_DOT}")
message(STATUS "BLAS_F2C: ${BLAS_F2C}")
cmake_pop_check_state()

View File

@ -1,14 +1,13 @@
# ---[ Declare variables that we are going to use across the Caffe2 build.
# This file defines common, Caffe2-wide variables that we use to collect
# source files and other things. Each variable is annotated with their
# intended uses.
# Note that adding and / or deleting these variables usually involves
# changing the whole build system, so make sure you send a PR early if you
# want to change them.
# ---[ Declare variables that we are going to use across the Caffe2 build. This
# file defines common, Caffe2-wide variables that we use to collect source files
# and other things. Each variable is annotated with their intended uses. Note
# that adding and / or deleting these variables usually involves changing the
# whole build system, so make sure you send a PR early if you want to change
# them.
# Caffe2_{CPU,GPU}_SRCS is the list that will have all the related source
# files for CPU and GPU respectively. They will be filled with the
# CMakeLists.txt files under each folder respectively.
# Caffe2_{CPU,GPU}_SRCS is the list that will have all the related source files
# for CPU and GPU respectively. They will be filled with the CMakeLists.txt
# files under each folder respectively.
set(Caffe2_CPU_SRCS)
set(Caffe2_GPU_SRCS)
@ -27,12 +26,12 @@ set(Caffe2_DEPENDENCY_LIBS "")
set(Caffe2_CUDA_DEPENDENCY_LIBS "")
# This variable contains dependency libraries of Caffe2 which requires whole
# symbol linkage. One example is the onnx lib where we need all its schema
# symbols. However, if the lib is whole linked in caffe2 lib, we don't want
# it to be linked in binaries that will link caffe2 lib. Because if caffe2 lib
# is built as dynamic library, it will result in two copied of symbols of
# symbols. However, if the lib is whole linked in caffe2 lib, we don't want it
# to be linked in binaries that will link caffe2 lib. Because if caffe2 lib is
# built as dynamic library, it will result in two copied of symbols of
# Caffe2_DEPENDENCY_WHOLE_LINK_LIBS existing in caffe2.so and the binary, which
# will cause issues. Therefore Caffe2_DEPENDENCY_WHOLE_LINK_LIBS will only
# be linked by caffe2 lib.
# will cause issues. Therefore Caffe2_DEPENDENCY_WHOLE_LINK_LIBS will only be
# linked by caffe2 lib.
set(Caffe2_DEPENDENCY_WHOLE_LINK_LIBS "")
# Lists for Caffe2 public dependency libraries. These libraries will be
@ -40,7 +39,7 @@ set(Caffe2_DEPENDENCY_WHOLE_LINK_LIBS "")
set(Caffe2_PUBLIC_DEPENDENCY_LIBS "")
set(Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS "")
# List of modules that is built as part of the main Caffe2 build. For all
# binary targets, such as Python and native binaries, they will be linked
# automatically with these modules.
# List of modules that is built as part of the main Caffe2 build. For all binary
# targets, such as Python and native binaries, they will be linked automatically
# with these modules.
set(Caffe2_MODULES "")

View File

@ -1,60 +1,66 @@
# This ill-named file does a number of things:
# - Installs Caffe2 header files (this has nothing to do with code generation)
# - Configures caffe2/core/macros.h
# - Creates an ATen target for its generated C++ files and adds it
# as a dependency
# - Reads build lists defined in build_variables.bzl
# This ill-named file does a number of things: - Installs Caffe2 header files
# (this has nothing to do with code generation) - Configures
# caffe2/core/macros.h - Creates an ATen target for its generated C++ files and
# adds it as a dependency - Reads build lists defined in build_variables.bzl
################################################################################
# ##############################################################################
# Helper functions
################################################################################
# ##############################################################################
function(filter_list output input)
unset(result)
foreach(filename ${${input}})
foreach(pattern ${ARGN})
if("${filename}" MATCHES "${pattern}")
list(APPEND result "${filename}")
endif()
endforeach()
unset(result)
foreach(filename ${${input}})
foreach(pattern ${ARGN})
if("${filename}" MATCHES "${pattern}")
list(APPEND result "${filename}")
endif()
endforeach()
set(${output} ${result} PARENT_SCOPE)
endforeach()
set(${output}
${result}
PARENT_SCOPE)
endfunction()
function(filter_list_exclude output input)
unset(result)
foreach(filename ${${input}})
foreach(pattern ${ARGN})
if(NOT "${filename}" MATCHES "${pattern}")
list(APPEND result "${filename}")
endif()
endforeach()
unset(result)
foreach(filename ${${input}})
foreach(pattern ${ARGN})
if(NOT "${filename}" MATCHES "${pattern}")
list(APPEND result "${filename}")
endif()
endforeach()
set(${output} ${result} PARENT_SCOPE)
endforeach()
set(${output}
${result}
PARENT_SCOPE)
endfunction()
################################################################################
# ##############################################################################
# -- [ Determine commit hash
execute_process(
COMMAND "${Python_EXECUTABLE}" -c "from tools.generate_torch_version import get_sha;print(get_sha('.'), end='')"
OUTPUT_VARIABLE COMMIT_SHA
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..
)
COMMAND
"${Python_EXECUTABLE}" -c
"from tools.generate_torch_version import get_sha;print(get_sha('.'), end='')"
OUTPUT_VARIABLE COMMIT_SHA
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..)
# ---[ Write the macros file
configure_file(
${CMAKE_CURRENT_LIST_DIR}/../caffe2/core/macros.h.in
${CMAKE_BINARY_DIR}/caffe2/core/macros.h)
configure_file(${CMAKE_CURRENT_LIST_DIR}/../caffe2/core/macros.h.in
${CMAKE_BINARY_DIR}/caffe2/core/macros.h)
# ---[ Installing the header files
install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/../caffe2
DESTINATION include
FILES_MATCHING PATTERN "*.h")
install(
DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/../caffe2
DESTINATION include
FILES_MATCHING
PATTERN "*.h")
if(NOT INTERN_BUILD_ATEN_OPS)
install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/core
DESTINATION include/ATen
FILES_MATCHING PATTERN "*.h")
install(
DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/core
DESTINATION include/ATen
FILES_MATCHING
PATTERN "*.h")
endif()
install(FILES ${CMAKE_BINARY_DIR}/caffe2/core/macros.h
DESTINATION include/caffe2/core)
@ -71,22 +77,23 @@ if(INTERN_BUILD_ATEN_OPS)
endif(MSVC)
if(NOT MSVC AND NOT "${CMAKE_C_COMPILER_ID}" MATCHES "Clang")
set_source_files_properties(${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/MapAllocator.cpp PROPERTIES COMPILE_FLAGS "-fno-openmp")
set_source_files_properties(
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/MapAllocator.cpp
PROPERTIES COMPILE_FLAGS "-fno-openmp")
endif()
file(GLOB_RECURSE all_python "${CMAKE_CURRENT_LIST_DIR}/../torchgen/*.py")
# Handle files that may need sm89/sm90a/sm100a flags (stable/nightly
# builds are not built for these archs).
# Handle files that may need sm89/sm90a/sm100a flags (stable/nightly builds
# are not built for these archs).
if(USE_CUDA)
# The stable/nightly builds do not enable some SM architectures,
# like 89/90a/100a. Still, some files need to be built for these
# architectures specifically. This function makes it possible to
# enable building given file for a specific such architecture, in
# case if PyTorch is built for corresponding other architecture;
# for example, it will enable building for SM 90a in case PyTorch
# built for SM 90, etc. For examples of how to use the function,
# see below the function itself.
# The stable/nightly builds do not enable some SM architectures, like
# 89/90a/100a. Still, some files need to be built for these architectures
# specifically. This function makes it possible to enable building given
# file for a specific such architecture, in case if PyTorch is built for
# corresponding other architecture; for example, it will enable building for
# SM 90a in case PyTorch built for SM 90, etc. For examples of how to use
# the function, see below the function itself.
function(_BUILD_FOR_ADDITIONAL_ARCHS file archs)
torch_cuda_get_nvcc_gencode_flag(_existing_arch_flags)
@ -95,38 +102,43 @@ if(INTERN_BUILD_ATEN_OPS)
foreach(_arch ${archs})
if("${_arch}" STREQUAL "89")
if(_existing_arch_flags MATCHES ".*compute_86.*")
list(APPEND _file_compile_flags "-gencode;arch=compute_89,code=sm_89")
list(APPEND _file_compile_flags
"-gencode;arch=compute_89,code=sm_89")
endif()
endif()
if("${_arch}" STREQUAL "90a")
if(_existing_arch_flags MATCHES ".*compute_90.*")
list(APPEND _file_compile_flags "-gencode;arch=compute_90a,code=sm_90a")
list(APPEND _file_compile_flags
"-gencode;arch=compute_90a,code=sm_90a")
endif()
endif()
if("${_arch}" STREQUAL "100a")
if(_existing_arch_flags MATCHES ".*compute_100.*")
list(APPEND _file_compile_flags "-gencode;arch=compute_100a,code=sm_100a")
list(APPEND _file_compile_flags
"-gencode;arch=compute_100a,code=sm_100a")
endif()
endif()
if("${_arch}" STREQUAL "120a")
if(_existing_arch_flags MATCHES ".*compute_120.*")
list(APPEND _file_compile_flags "-gencode;arch=compute_120a,code=sm_120a")
list(APPEND _file_compile_flags
"-gencode;arch=compute_120a,code=sm_120a")
endif()
endif()
endforeach()
endif()
list(JOIN _file_compile_flags " " _file_compile_flags)
set_source_files_properties(${file} PROPERTIES COMPILE_FLAGS "${_file_compile_flags}")
set_source_files_properties(${file} PROPERTIES COMPILE_FLAGS
"${_file_compile_flags}")
endfunction()
_BUILD_FOR_ADDITIONAL_ARCHS(
_build_for_additional_archs(
"${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/cuda/RowwiseScaledMM.cu"
"89;90a;100a;120a")
_BUILD_FOR_ADDITIONAL_ARCHS(
_build_for_additional_archs(
"${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/cuda/ScaledGroupMM.cu"
"90a")
_BUILD_FOR_ADDITIONAL_ARCHS(
_build_for_additional_archs(
"${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/cuda/GroupMM.cu"
"90a;100a")
@ -150,7 +162,8 @@ if(INTERN_BUILD_ATEN_OPS)
set(CUSTOM_BUILD_FLAGS)
if(INTERN_BUILD_MOBILE)
if(USE_VULKAN)
list(APPEND CUSTOM_BUILD_FLAGS --backend_whitelist CPU QuantizedCPU Vulkan)
list(APPEND CUSTOM_BUILD_FLAGS --backend_whitelist CPU QuantizedCPU
Vulkan)
else()
list(APPEND CUSTOM_BUILD_FLAGS --backend_whitelist CPU QuantizedCPU)
endif()
@ -158,64 +171,69 @@ if(INTERN_BUILD_ATEN_OPS)
if(SELECTED_OP_LIST)
if(TRACING_BASED)
message(STATUS "Running tracing-based selective build given operator list: ${SELECTED_OP_LIST}")
list(APPEND CUSTOM_BUILD_FLAGS
--op_selection_yaml_path ${SELECTED_OP_LIST})
elseif(NOT STATIC_DISPATCH_BACKEND)
message(WARNING
"You have to run tracing-based selective build with dynamic dispatch.\n"
"Switching to STATIC_DISPATCH_BACKEND=CPU."
message(
STATUS
"Running tracing-based selective build given operator list: ${SELECTED_OP_LIST}"
)
list(APPEND CUSTOM_BUILD_FLAGS --op_selection_yaml_path
${SELECTED_OP_LIST})
elseif(NOT STATIC_DISPATCH_BACKEND)
message(
WARNING
"You have to run tracing-based selective build with dynamic dispatch.\n"
"Switching to STATIC_DISPATCH_BACKEND=CPU.")
set(STATIC_DISPATCH_BACKEND CPU)
endif()
endif()
if(STATIC_DISPATCH_BACKEND)
message(STATUS "Custom build with static dispatch backends: ${STATIC_DISPATCH_BACKEND}")
message(
STATUS
"Custom build with static dispatch backends: ${STATIC_DISPATCH_BACKEND}"
)
list(LENGTH STATIC_DISPATCH_BACKEND len)
list(APPEND CUSTOM_BUILD_FLAGS
--static_dispatch_backend ${STATIC_DISPATCH_BACKEND})
list(APPEND CUSTOM_BUILD_FLAGS --static_dispatch_backend
${STATIC_DISPATCH_BACKEND})
endif()
# Codegen unboxing
if(USE_LIGHTWEIGHT_DISPATCH)
file(GLOB_RECURSE all_unboxing_script "${CMAKE_CURRENT_LIST_DIR}/../tools/jit/*.py")
file(GLOB_RECURSE all_unboxing_script
"${CMAKE_CURRENT_LIST_DIR}/../tools/jit/*.py")
list(APPEND CUSTOM_BUILD_FLAGS --skip_dispatcher_op_registration)
set(GEN_UNBOXING_COMMAND
"${Python_EXECUTABLE}" -m tools.jit.gen_unboxing
--source-path ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen
--install_dir ${CMAKE_BINARY_DIR}/aten/src/ATen
)
"${Python_EXECUTABLE}" -m tools.jit.gen_unboxing --source-path
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen --install_dir
${CMAKE_BINARY_DIR}/aten/src/ATen)
if(SELECTED_OP_LIST)
list(APPEND GEN_UNBOXING_COMMAND
--TEST_ONLY_op_registration_allowlist_yaml_path "${SELECTED_OP_LIST}")
--TEST_ONLY_op_registration_allowlist_yaml_path
"${SELECTED_OP_LIST}")
endif()
set("GEN_UNBOXING_COMMAND_sources"
${GEN_UNBOXING_COMMAND}
--output-dependencies ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake
)
${GEN_UNBOXING_COMMAND} --output-dependencies
${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake)
message(STATUS "Generating sources for lightweight dispatch")
execute_process(
COMMAND ${GEN_UNBOXING_COMMAND_sources} --dry-run
RESULT_VARIABLE RETURN_VALUE
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..
)
COMMAND ${GEN_UNBOXING_COMMAND_sources} --dry-run
RESULT_VARIABLE RETURN_VALUE
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..)
if(NOT RETURN_VALUE EQUAL 0)
message(FATAL_ERROR "Failed to get generated_unboxing_sources list")
endif()
include("${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake")
include(
"${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake")
add_custom_command(
COMMENT "Generating ATen unboxing sources"
OUTPUT
${generated_unboxing_sources}
${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake
COMMAND ${GEN_UNBOXING_COMMAND_sources}
DEPENDS ${all_unboxing_script} ${sources_templates}
COMMENT "Generating ATen unboxing sources"
OUTPUT ${generated_unboxing_sources}
${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake
COMMAND ${GEN_UNBOXING_COMMAND_sources}
DEPENDS
${all_unboxing_script} ${sources_templates}
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/native_functions.yaml
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/tags.yaml
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..
)
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..)
else() # Otherwise do not generate or include sources into build.
set(generated_unboxing_sources "")
endif()
@ -226,116 +244,125 @@ if(INTERN_BUILD_ATEN_OPS)
endif()
set(GEN_COMMAND
"${Python_EXECUTABLE}" -m torchgen.gen
--source-path ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen
--install_dir ${CMAKE_BINARY_DIR}/aten/src/ATen
${GEN_PER_OPERATOR_FLAG}
${GEN_ROCM_FLAG}
${GEN_MPS_FLAG}
${GEN_XPU_FLAG}
${CUSTOM_BUILD_FLAGS}
)
"${Python_EXECUTABLE}" -m torchgen.gen --source-path
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen --install_dir
${CMAKE_BINARY_DIR}/aten/src/ATen ${GEN_PER_OPERATOR_FLAG}
${GEN_ROCM_FLAG} ${GEN_MPS_FLAG} ${GEN_XPU_FLAG} ${CUSTOM_BUILD_FLAGS})
file(GLOB_RECURSE headers_templates "${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/templates/*\.h")
file(GLOB_RECURSE sources_templates "${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/templates/*\.cpp")
file(GLOB_RECURSE headers_templates
"${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/templates/*\.h")
file(GLOB_RECURSE sources_templates
"${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/templates/*\.cpp")
set(declarations_yaml_templates "")
foreach(gen_type "headers" "sources" "declarations_yaml")
# The codegen outputs may change dynamically as PyTorch is
# developed, but add_custom_command only supports dynamic inputs.
# The codegen outputs may change dynamically as PyTorch is developed, but
# add_custom_command only supports dynamic inputs.
#
# We work around this by generating a .cmake file which is
# included below to set the list of output files. If that file
# ever changes then cmake will be re-run automatically because it
# was included and so we get fully dynamic outputs.
# We work around this by generating a .cmake file which is included below to
# set the list of output files. If that file ever changes then cmake will be
# re-run automatically because it was included and so we get fully dynamic
# outputs.
set("GEN_COMMAND_${gen_type}"
${GEN_COMMAND}
--generate ${gen_type}
--output-dependencies ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_${gen_type}.cmake
)
${GEN_COMMAND} --generate ${gen_type} --output-dependencies
${CMAKE_BINARY_DIR}/aten/src/ATen/generated_${gen_type}.cmake)
# Dry run to bootstrap the output variables
execute_process(
COMMAND ${GEN_COMMAND_${gen_type}} --dry-run
RESULT_VARIABLE RETURN_VALUE
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..
)
COMMAND ${GEN_COMMAND_${gen_type}} --dry-run
RESULT_VARIABLE RETURN_VALUE
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..)
if(NOT RETURN_VALUE EQUAL 0)
message(FATAL_ERROR "Failed to get generated_${gen_type} list")
endif()
include("${CMAKE_BINARY_DIR}/aten/src/ATen/generated_${gen_type}.cmake")
include("${CMAKE_BINARY_DIR}/aten/src/ATen/core_generated_${gen_type}.cmake")
include("${CMAKE_BINARY_DIR}/aten/src/ATen/cpu_vec_generated_${gen_type}.cmake")
include("${CMAKE_BINARY_DIR}/aten/src/ATen/cuda_generated_${gen_type}.cmake")
include(
"${CMAKE_BINARY_DIR}/aten/src/ATen/core_generated_${gen_type}.cmake")
include(
"${CMAKE_BINARY_DIR}/aten/src/ATen/cpu_vec_generated_${gen_type}.cmake")
include(
"${CMAKE_BINARY_DIR}/aten/src/ATen/cuda_generated_${gen_type}.cmake")
include("${CMAKE_BINARY_DIR}/aten/src/ATen/ops_generated_${gen_type}.cmake")
if(USE_XPU)
include("${CMAKE_BINARY_DIR}/aten/src/ATen/xpu_generated_${gen_type}.cmake")
include(
"${CMAKE_BINARY_DIR}/aten/src/ATen/xpu_generated_${gen_type}.cmake")
endif()
message(STATUS "${gen_type} outputs: ${gen_outputs}")
set(OUTPUT_LIST
${generated_${gen_type}}
${cuda_generated_${gen_type}}
${core_generated_${gen_type}}
${cpu_vec_generated_${gen_type}}
${ops_generated_${gen_type}}
${CMAKE_BINARY_DIR}/aten/src/ATen/generated_${gen_type}.cmake
${CMAKE_BINARY_DIR}/aten/src/ATen/ops_generated_${gen_type}.cmake
${CMAKE_BINARY_DIR}/aten/src/ATen/core_generated_${gen_type}.cmake
${CMAKE_BINARY_DIR}/aten/src/ATen/cpu_vec_generated_${gen_type}.cmake
${CMAKE_BINARY_DIR}/aten/src/ATen/cuda_generated_${gen_type}.cmake)
${generated_${gen_type}}
${cuda_generated_${gen_type}}
${core_generated_${gen_type}}
${cpu_vec_generated_${gen_type}}
${ops_generated_${gen_type}}
${CMAKE_BINARY_DIR}/aten/src/ATen/generated_${gen_type}.cmake
${CMAKE_BINARY_DIR}/aten/src/ATen/ops_generated_${gen_type}.cmake
${CMAKE_BINARY_DIR}/aten/src/ATen/core_generated_${gen_type}.cmake
${CMAKE_BINARY_DIR}/aten/src/ATen/cpu_vec_generated_${gen_type}.cmake
${CMAKE_BINARY_DIR}/aten/src/ATen/cuda_generated_${gen_type}.cmake)
if(USE_XPU)
list(APPEND OUTPUT_LIST
${xpu_generated_${gen_type}}
${CMAKE_BINARY_DIR}/aten/src/ATen/xpu_generated_${gen_type}.cmake
)
list(APPEND OUTPUT_LIST ${xpu_generated_${gen_type}}
${CMAKE_BINARY_DIR}/aten/src/ATen/xpu_generated_${gen_type}.cmake)
endif()
add_custom_command(
COMMENT "Generating ATen ${gen_type}"
OUTPUT ${OUTPUT_LIST}
COMMAND ${GEN_COMMAND_${gen_type}}
DEPENDS ${all_python} ${${gen_type}_templates}
DEPENDS
${all_python} ${${gen_type}_templates}
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/native_functions.yaml
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/tags.yaml
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..
)
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..)
endforeach()
# Generated headers used from a CUDA (.cu) file are
# not tracked correctly in CMake. We make the libATen.so depend explicitly
# on building the generated ATen files to workaround.
add_custom_target(ATEN_CPU_FILES_GEN_TARGET DEPENDS
${generated_headers} ${core_generated_headers} ${cpu_vec_generated_headers} ${ops_generated_headers}
${generated_sources} ${core_generated_sources} ${cpu_vec_generated_sources} ${ops_generated_sources}
${generated_declarations_yaml} ${generated_unboxing_sources})
add_custom_target(ATEN_CUDA_FILES_GEN_TARGET DEPENDS
${cuda_generated_headers} ${cuda_generated_sources})
# Generated headers used from a CUDA (.cu) file are not tracked correctly in
# CMake. We make the libATen.so depend explicitly on building the generated
# ATen files to workaround.
add_custom_target(
ATEN_CPU_FILES_GEN_TARGET
DEPENDS ${generated_headers}
${core_generated_headers}
${cpu_vec_generated_headers}
${ops_generated_headers}
${generated_sources}
${core_generated_sources}
${cpu_vec_generated_sources}
${ops_generated_sources}
${generated_declarations_yaml}
${generated_unboxing_sources})
add_custom_target(ATEN_CUDA_FILES_GEN_TARGET
DEPENDS ${cuda_generated_headers} ${cuda_generated_sources})
add_library(ATEN_CPU_FILES_GEN_LIB INTERFACE)
add_library(ATEN_CUDA_FILES_GEN_LIB INTERFACE)
add_dependencies(ATEN_CPU_FILES_GEN_LIB ATEN_CPU_FILES_GEN_TARGET)
add_dependencies(ATEN_CUDA_FILES_GEN_LIB ATEN_CUDA_FILES_GEN_TARGET)
if(USE_PER_OPERATOR_HEADERS)
target_compile_definitions(ATEN_CPU_FILES_GEN_LIB INTERFACE AT_PER_OPERATOR_HEADERS)
target_compile_definitions(ATEN_CUDA_FILES_GEN_LIB INTERFACE AT_PER_OPERATOR_HEADERS)
target_compile_definitions(ATEN_CPU_FILES_GEN_LIB
INTERFACE AT_PER_OPERATOR_HEADERS)
target_compile_definitions(ATEN_CUDA_FILES_GEN_LIB
INTERFACE AT_PER_OPERATOR_HEADERS)
endif()
if(USE_XPU)
add_custom_target(ATEN_XPU_FILES_GEN_TARGET DEPENDS
${xpu_generated_headers} ${xpu_generated_sources})
add_custom_target(ATEN_XPU_FILES_GEN_TARGET
DEPENDS ${xpu_generated_headers} ${xpu_generated_sources})
add_library(ATEN_XPU_FILES_GEN_LIB INTERFACE)
add_dependencies(ATEN_XPU_FILES_GEN_LIB ATEN_XPU_FILES_GEN_TARGET)
if(USE_PER_OPERATOR_HEADERS)
target_compile_definitions(ATEN_XPU_FILES_GEN_LIB INTERFACE AT_PER_OPERATOR_HEADERS)
target_compile_definitions(ATEN_XPU_FILES_GEN_LIB
INTERFACE AT_PER_OPERATOR_HEADERS)
endif()
endif()
# Handle source files that need to be compiled multiple times for
# different vectorization options
file(GLOB cpu_kernel_cpp_in "${PROJECT_SOURCE_DIR}/aten/src/ATen/native/cpu/*.cpp" "${PROJECT_SOURCE_DIR}/aten/src/ATen/native/quantized/cpu/kernels/*.cpp")
# Handle source files that need to be compiled multiple times for different
# vectorization options
file(GLOB cpu_kernel_cpp_in
"${PROJECT_SOURCE_DIR}/aten/src/ATen/native/cpu/*.cpp"
"${PROJECT_SOURCE_DIR}/aten/src/ATen/native/quantized/cpu/kernels/*.cpp")
list(APPEND CPU_CAPABILITY_NAMES "DEFAULT")
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG}")
@ -346,7 +373,8 @@ if(INTERN_BUILD_ATEN_OPS)
if(MSVC)
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG}/arch:AVX512")
else(MSVC)
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -mavx512f -mavx512bw -mavx512vl -mavx512dq -mfma")
list(APPEND CPU_CAPABILITY_FLAGS
"${OPT_FLAG} -mavx512f -mavx512bw -mavx512vl -mavx512dq -mfma")
endif(MSVC)
endif(CXX_AVX512_FOUND)
@ -354,12 +382,15 @@ if(INTERN_BUILD_ATEN_OPS)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_AVX2_CPU_DEFINITION")
# Some versions of GCC pessimistically split unaligned load and store
# instructions when using the default tuning. This is a bad choice on
# new Intel and AMD processors so we disable it when compiling with AVX2.
# See https://stackoverflow.com/questions/52626726/why-doesnt-gcc-resolve-mm256-loadu-pd-as-single-vmovupd#tab-top
check_cxx_compiler_flag("-mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store" COMPILER_SUPPORTS_NO_AVX256_SPLIT)
# instructions when using the default tuning. This is a bad choice on new
# Intel and AMD processors so we disable it when compiling with AVX2. See
# https://stackoverflow.com/questions/52626726/why-doesnt-gcc-resolve-mm256-loadu-pd-as-single-vmovupd#tab-top
check_cxx_compiler_flag(
"-mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store"
COMPILER_SUPPORTS_NO_AVX256_SPLIT)
if(COMPILER_SUPPORTS_NO_AVX256_SPLIT)
set(CPU_NO_AVX256_SPLIT_FLAGS "-mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store")
set(CPU_NO_AVX256_SPLIT_FLAGS
"-mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store")
endif(COMPILER_SUPPORTS_NO_AVX256_SPLIT)
list(APPEND CPU_CAPABILITY_NAMES "AVX2")
@ -370,7 +401,8 @@ if(INTERN_BUILD_ATEN_OPS)
if(MSVC)
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG}/arch:AVX512")
else(MSVC)
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -march=native ${CPU_NO_AVX256_SPLIT_FLAGS}")
list(APPEND CPU_CAPABILITY_FLAGS
"${OPT_FLAG} -march=native ${CPU_NO_AVX256_SPLIT_FLAGS}")
endif(MSVC)
endif(CXX_AVX512_FOUND)
endif()
@ -378,68 +410,94 @@ if(INTERN_BUILD_ATEN_OPS)
if(MSVC)
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG}/arch:AVX2")
else(MSVC)
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -mavx2 -mfma -mf16c ${CPU_NO_AVX256_SPLIT_FLAGS}")
list(APPEND CPU_CAPABILITY_FLAGS
"${OPT_FLAG} -mavx2 -mfma -mf16c ${CPU_NO_AVX256_SPLIT_FLAGS}")
endif(MSVC)
endif()
endif(CXX_AVX2_FOUND)
if(CXX_VSX_FOUND)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_VSX_CPU_DEFINITION")
LIST(APPEND CPU_CAPABILITY_NAMES "VSX")
LIST(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} ${CXX_VSX_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_VSX_CPU_DEFINITION")
list(APPEND CPU_CAPABILITY_NAMES "VSX")
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} ${CXX_VSX_FLAGS}")
endif(CXX_VSX_FOUND)
if(CXX_ZVECTOR_FOUND)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_ZVECTOR_CPU_DEFINITION")
LIST(APPEND CPU_CAPABILITY_NAMES "ZVECTOR")
LIST(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} ${CXX_ZVECTOR_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_ZVECTOR_CPU_DEFINITION")
list(APPEND CPU_CAPABILITY_NAMES "ZVECTOR")
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} ${CXX_ZVECTOR_FLAGS}")
endif(CXX_ZVECTOR_FOUND)
if(CXX_SVE_FOUND AND CXX_SVE256_FOUND AND CXX_ARM_BF16_FOUND)
if(CXX_SVE_FOUND
AND CXX_SVE256_FOUND
AND CXX_ARM_BF16_FOUND)
list(APPEND CPU_CAPABILITY_NAMES "SVE256")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_SVE_CPU_DEFINITION -DHAVE_SVE256_CPU_DEFINITION -DHAVE_ARM_BF16_CPU_DEFINITION")
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -DHAVE_SVE_CPU_DEFINITION -DHAVE_SVE256_CPU_DEFINITION -DHAVE_ARM_BF16_CPU_DEFINITION"
)
if("${CMAKE_C_COMPILER_ID}" MATCHES "Clang")
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -O2 -march=armv8-a+sve+bf16 -D__ARM_FEATURE_BF16 -DCPU_CAPABILITY_SVE -msve-vector-bits=256")
list(
APPEND
CPU_CAPABILITY_FLAGS
"${OPT_FLAG} -O2 -march=armv8-a+sve+bf16 -D__ARM_FEATURE_BF16 -DCPU_CAPABILITY_SVE -msve-vector-bits=256"
)
else()
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -march=armv8-a+sve+bf16 -D__ARM_FEATURE_BF16 -DCPU_CAPABILITY_SVE -msve-vector-bits=256")
list(
APPEND
CPU_CAPABILITY_FLAGS
"${OPT_FLAG} -march=armv8-a+sve+bf16 -D__ARM_FEATURE_BF16 -DCPU_CAPABILITY_SVE -msve-vector-bits=256"
)
endif()
endif()
list(LENGTH CPU_CAPABILITY_NAMES NUM_CPU_CAPABILITY_NAMES)
math(EXPR NUM_CPU_CAPABILITY_NAMES "${NUM_CPU_CAPABILITY_NAMES}-1")
# The sources list might get reordered later based on the capabilities.
# See NOTE [ Linking AVX and non-AVX files ]
# The sources list might get reordered later based on the capabilities. See
# NOTE [ Linking AVX and non-AVX files ]
foreach(i RANGE ${NUM_CPU_CAPABILITY_NAMES})
function(process_vec NAME)
list(GET CPU_CAPABILITY_NAMES ${i} CPU_CAPABILITY)
set(NEW_IMPL ${CMAKE_BINARY_DIR}/aten/src/ATen/${NAME}.${CPU_CAPABILITY}.cpp)
configure_file("${PROJECT_SOURCE_DIR}/cmake/IncludeSource.cpp.in" ${NEW_IMPL})
set(cpu_kernel_cpp ${NEW_IMPL} ${cpu_kernel_cpp} PARENT_SCOPE) # Create list of copies
set(NEW_IMPL
${CMAKE_BINARY_DIR}/aten/src/ATen/${NAME}.${CPU_CAPABILITY}.cpp)
configure_file("${PROJECT_SOURCE_DIR}/cmake/IncludeSource.cpp.in"
${NEW_IMPL})
set(cpu_kernel_cpp
${NEW_IMPL} ${cpu_kernel_cpp}
PARENT_SCOPE) # Create list of copies
list(GET CPU_CAPABILITY_FLAGS ${i} FLAGS)
if(MSVC)
set(EXTRA_FLAGS "/DCPU_CAPABILITY=${CPU_CAPABILITY} /DCPU_CAPABILITY_${CPU_CAPABILITY}")
set(EXTRA_FLAGS
"/DCPU_CAPABILITY=${CPU_CAPABILITY} /DCPU_CAPABILITY_${CPU_CAPABILITY}"
)
else(MSVC)
set(EXTRA_FLAGS "-DCPU_CAPABILITY=${CPU_CAPABILITY} -DCPU_CAPABILITY_${CPU_CAPABILITY}")
set(EXTRA_FLAGS
"-DCPU_CAPABILITY=${CPU_CAPABILITY} -DCPU_CAPABILITY_${CPU_CAPABILITY}"
)
endif(MSVC)
# Only parallelize the SortingKernel for now to avoid side effects
if(${NAME} STREQUAL "native/cpu/SortingKernel.cpp" AND NOT MSVC AND USE_OMP)
if(${NAME} STREQUAL "native/cpu/SortingKernel.cpp"
AND NOT MSVC
AND USE_OMP)
string(APPEND EXTRA_FLAGS " -D_GLIBCXX_PARALLEL")
endif()
# Disable certain warnings for GCC-9.X
if(CMAKE_COMPILER_IS_GNUCXX)
if(("${NAME}" STREQUAL "native/cpu/GridSamplerKernel.cpp") AND ("${CPU_CAPABILITY}" STREQUAL "DEFAULT"))
if(("${NAME}" STREQUAL "native/cpu/GridSamplerKernel.cpp")
AND ("${CPU_CAPABILITY}" STREQUAL "DEFAULT"))
# See https://github.com/pytorch/pytorch/issues/38855
set(EXTRA_FLAGS "${EXTRA_FLAGS} -Wno-uninitialized")
endif()
if("${NAME}" STREQUAL "native/quantized/cpu/kernels/QuantizedOpKernels.cpp")
if("${NAME}" STREQUAL
"native/quantized/cpu/kernels/QuantizedOpKernels.cpp")
# See https://github.com/pytorch/pytorch/issues/38854
set(EXTRA_FLAGS "${EXTRA_FLAGS} -Wno-deprecated-copy")
endif()
endif()
set_source_files_properties(${NEW_IMPL} PROPERTIES COMPILE_FLAGS "${FLAGS} ${EXTRA_FLAGS}")
set_source_files_properties(
${NEW_IMPL} PROPERTIES COMPILE_FLAGS "${FLAGS} ${EXTRA_FLAGS}")
endfunction()
foreach(IMPL ${cpu_kernel_cpp_in})
file(RELATIVE_PATH NAME "${PROJECT_SOURCE_DIR}/aten/src/ATen/" "${IMPL}")
@ -456,22 +514,29 @@ endif()
function(append_filelist name outputvar)
set(_rootdir "${Torch_SOURCE_DIR}/")
# configure_file adds its input to the list of CMAKE_RERUN dependencies
configure_file(
${PROJECT_SOURCE_DIR}/build_variables.bzl
${PROJECT_BINARY_DIR}/caffe2/build_variables.bzl)
configure_file(${PROJECT_SOURCE_DIR}/build_variables.bzl
${PROJECT_BINARY_DIR}/caffe2/build_variables.bzl)
execute_process(
COMMAND "${Python_EXECUTABLE}" -c
"exec(open('${PROJECT_SOURCE_DIR}/build_variables.bzl').read());print(';'.join(['${_rootdir}' + x for x in ${name}]))"
COMMAND
"${Python_EXECUTABLE}" -c
"exec(open('${PROJECT_SOURCE_DIR}/build_variables.bzl').read());print(';'.join(['${_rootdir}' + x for x in ${name}]))"
WORKING_DIRECTORY "${_rootdir}"
RESULT_VARIABLE _retval
OUTPUT_VARIABLE _tempvar)
if(NOT _retval EQUAL 0)
message(FATAL_ERROR "Failed to fetch filelist ${name} from build_variables.bzl")
message(
FATAL_ERROR "Failed to fetch filelist ${name} from build_variables.bzl")
endif()
string(REPLACE "\n" "" _tempvar "${_tempvar}")
list(APPEND ${outputvar} ${_tempvar})
set(${outputvar} "${${outputvar}}" PARENT_SCOPE)
set(${outputvar}
"${${outputvar}}"
PARENT_SCOPE)
endfunction()
set(NUM_CPU_CAPABILITY_NAMES ${NUM_CPU_CAPABILITY_NAMES} PARENT_SCOPE)
set(CPU_CAPABILITY_FLAGS ${CPU_CAPABILITY_FLAGS} PARENT_SCOPE)
set(NUM_CPU_CAPABILITY_NAMES
${NUM_CPU_CAPABILITY_NAMES}
PARENT_SCOPE)
set(CPU_CAPABILITY_FLAGS
${CPU_CAPABILITY_FLAGS}
PARENT_SCOPE)

View File

@ -1,26 +1,34 @@
function(print_target_properties tgt)
# Get all properties that cmake supports
execute_process(COMMAND cmake --help-property-list OUTPUT_VARIABLE CMAKE_PROPERTY_LIST)
execute_process(COMMAND cmake --help-property-list
OUTPUT_VARIABLE CMAKE_PROPERTY_LIST)
# Convert command output into a CMake list
STRING(REGEX REPLACE ";" "\\\\;" CMAKE_PROPERTY_LIST "${CMAKE_PROPERTY_LIST}")
STRING(REGEX REPLACE "\n" ";" CMAKE_PROPERTY_LIST "${CMAKE_PROPERTY_LIST}")
if(NOT TARGET ${tgt})
message("There is no target named '${tgt}'")
return()
endif()
string(REGEX REPLACE ";" "\\\\;" CMAKE_PROPERTY_LIST "${CMAKE_PROPERTY_LIST}")
string(REGEX REPLACE "\n" ";" CMAKE_PROPERTY_LIST "${CMAKE_PROPERTY_LIST}")
if(NOT TARGET ${tgt})
message("There is no target named '${tgt}'")
return()
endif()
foreach(prop ${CMAKE_PROPERTY_LIST})
string(REPLACE "<CONFIG>" "${CMAKE_BUILD_TYPE}" prop ${prop})
# Fix https://stackoverflow.com/questions/32197663/how-can-i-remove-the-the-location-property-may-not-be-read-from-target-error-i
if(prop STREQUAL "LOCATION" OR prop MATCHES "^LOCATION_" OR prop MATCHES "_LOCATION$")
continue()
foreach(prop ${CMAKE_PROPERTY_LIST})
string(REPLACE "<CONFIG>" "${CMAKE_BUILD_TYPE}" prop ${prop})
# Fix
# https://stackoverflow.com/questions/32197663/how-can-i-remove-the-the-location-property-may-not-be-read-from-target-error-i
if(prop STREQUAL "LOCATION"
OR prop MATCHES "^LOCATION_"
OR prop MATCHES "_LOCATION$")
continue()
endif()
# message ("Checking ${prop}")
get_property(propval TARGET ${tgt} PROPERTY ${prop} SET)
if(propval)
get_target_property(propval ${tgt} ${prop})
message("${tgt} ${prop} = ${propval}")
endif()
endforeach(prop)
# message ("Checking ${prop}")
get_property(
propval
TARGET ${tgt}
PROPERTY ${prop}
SET)
if(propval)
get_target_property(propval ${tgt} ${prop})
message("${tgt} ${prop} = ${propval}")
endif()
endforeach(prop)
endfunction(print_target_properties)

File diff suppressed because it is too large Load Diff

View File

@ -7,47 +7,50 @@ if(NOT INTERN_BUILD_MOBILE OR NOT INTERN_USE_EIGEN_BLAS)
return()
endif()
##############################################################################
# Eigen BLAS is built together with Libtorch mobile.
# By default, it builds code from third-party/eigen/blas submodule.
##############################################################################
# ##############################################################################
# Eigen BLAS is built together with Libtorch mobile. By default, it builds code
# from third-party/eigen/blas submodule.
# ##############################################################################
set(CAFFE2_THIRD_PARTY_ROOT ${PROJECT_SOURCE_DIR}/third_party)
set(EIGEN_BLAS_SRC_DIR "${CAFFE2_THIRD_PARTY_ROOT}/eigen/blas" CACHE STRING "Eigen BLAS source directory")
set(EIGEN_BLAS_SRC_DIR
"${CAFFE2_THIRD_PARTY_ROOT}/eigen/blas"
CACHE STRING "Eigen BLAS source directory")
set(EigenBlas_SRCS
${EIGEN_BLAS_SRC_DIR}/single.cpp
${EIGEN_BLAS_SRC_DIR}/double.cpp
${EIGEN_BLAS_SRC_DIR}/complex_single.cpp
${EIGEN_BLAS_SRC_DIR}/complex_double.cpp
${EIGEN_BLAS_SRC_DIR}/xerbla.cpp
${EIGEN_BLAS_SRC_DIR}/f2c/srotm.c
${EIGEN_BLAS_SRC_DIR}/f2c/srotmg.c
${EIGEN_BLAS_SRC_DIR}/f2c/drotm.c
${EIGEN_BLAS_SRC_DIR}/f2c/drotmg.c
${EIGEN_BLAS_SRC_DIR}/f2c/lsame.c
${EIGEN_BLAS_SRC_DIR}/f2c/dspmv.c
${EIGEN_BLAS_SRC_DIR}/f2c/ssbmv.c
${EIGEN_BLAS_SRC_DIR}/f2c/chbmv.c
${EIGEN_BLAS_SRC_DIR}/f2c/sspmv.c
${EIGEN_BLAS_SRC_DIR}/f2c/zhbmv.c
${EIGEN_BLAS_SRC_DIR}/f2c/chpmv.c
${EIGEN_BLAS_SRC_DIR}/f2c/dsbmv.c
${EIGEN_BLAS_SRC_DIR}/f2c/zhpmv.c
${EIGEN_BLAS_SRC_DIR}/f2c/dtbmv.c
${EIGEN_BLAS_SRC_DIR}/f2c/stbmv.c
${EIGEN_BLAS_SRC_DIR}/f2c/ctbmv.c
${EIGEN_BLAS_SRC_DIR}/f2c/ztbmv.c
${EIGEN_BLAS_SRC_DIR}/f2c/d_cnjg.c
${EIGEN_BLAS_SRC_DIR}/f2c/r_cnjg.c
${EIGEN_BLAS_SRC_DIR}/f2c/complexdots.c
)
${EIGEN_BLAS_SRC_DIR}/single.cpp
${EIGEN_BLAS_SRC_DIR}/double.cpp
${EIGEN_BLAS_SRC_DIR}/complex_single.cpp
${EIGEN_BLAS_SRC_DIR}/complex_double.cpp
${EIGEN_BLAS_SRC_DIR}/xerbla.cpp
${EIGEN_BLAS_SRC_DIR}/f2c/srotm.c
${EIGEN_BLAS_SRC_DIR}/f2c/srotmg.c
${EIGEN_BLAS_SRC_DIR}/f2c/drotm.c
${EIGEN_BLAS_SRC_DIR}/f2c/drotmg.c
${EIGEN_BLAS_SRC_DIR}/f2c/lsame.c
${EIGEN_BLAS_SRC_DIR}/f2c/dspmv.c
${EIGEN_BLAS_SRC_DIR}/f2c/ssbmv.c
${EIGEN_BLAS_SRC_DIR}/f2c/chbmv.c
${EIGEN_BLAS_SRC_DIR}/f2c/sspmv.c
${EIGEN_BLAS_SRC_DIR}/f2c/zhbmv.c
${EIGEN_BLAS_SRC_DIR}/f2c/chpmv.c
${EIGEN_BLAS_SRC_DIR}/f2c/dsbmv.c
${EIGEN_BLAS_SRC_DIR}/f2c/zhpmv.c
${EIGEN_BLAS_SRC_DIR}/f2c/dtbmv.c
${EIGEN_BLAS_SRC_DIR}/f2c/stbmv.c
${EIGEN_BLAS_SRC_DIR}/f2c/ctbmv.c
${EIGEN_BLAS_SRC_DIR}/f2c/ztbmv.c
${EIGEN_BLAS_SRC_DIR}/f2c/d_cnjg.c
${EIGEN_BLAS_SRC_DIR}/f2c/r_cnjg.c
${EIGEN_BLAS_SRC_DIR}/f2c/complexdots.c)
add_library(eigen_blas STATIC ${EigenBlas_SRCS})
# We build static versions of eigen blas but link into a shared library, so they need PIC.
# We build static versions of eigen blas but link into a shared library, so they
# need PIC.
set_property(TARGET eigen_blas PROPERTY POSITION_INDEPENDENT_CODE ON)
install(TARGETS eigen_blas
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib)
install(
TARGETS eigen_blas
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib)

View File

@ -5,103 +5,113 @@ if(NOT __AOTRITON_INCLUDED)
set(__AOTRITON_INSTALL_DIR "${PROJECT_SOURCE_DIR}/torch")
add_library(__caffe2_aotriton INTERFACE)
# AOTriton package information from GitHub Release Pages
# Replaces .ci/docker/aotriton_version.txt
# Note packages information may have versions skipped (due to no ABI breaks)
# But they must be listed from lower version to higher version
# AOTriton package information from GitHub Release Pages Replaces
# .ci/docker/aotriton_version.txt Note packages information may have versions
# skipped (due to no ABI breaks) But they must be listed from lower version to
# higher version
set(__AOTRITON_VER "0.10b")
set(__AOTRITON_MANYLINUX_LIST
"manylinux_2_28" # rocm6.3
"manylinux_2_28" # rocm6.4
"manylinux_2_28" # rocm6.5
"manylinux_2_28" # rocm7.0
)
set(__AOTRITON_ROCM_LIST
"rocm6.3"
"rocm6.4"
"rocm6.5"
"rocm7.0"
)
"manylinux_2_28" # rocm6.3
"manylinux_2_28" # rocm6.4
"manylinux_2_28" # rocm6.5
"manylinux_2_28" # rocm7.0
)
set(__AOTRITON_ROCM_LIST "rocm6.3" "rocm6.4" "rocm6.5" "rocm7.0")
set(__AOTRITON_CI_COMMIT "6fca155f4deeb8d9529326f7b69f350aeeb93477")
set(__AOTRITON_SHA256_LIST
"861cd9f7479eec943933c27cb86920247e5b5dd139bc7c1376c81808abb7d7fe" # rocm6.3
"acea7d811a2d3bbe718b6e07fc2a9f739e49eecd60b4b6a36fcb3fe8edf85d78" # rocm6.4
"7e29c325d5bd33ba896ddb106f5d4fc7d715274dca7fe937f724fffa82017838" # rocm6.5
"1e9b3dddf0c7fc07131c6f0f5266129e83ce2331f459fa2be8c63f4ae91b0f5b" # rocm7.0
)
"861cd9f7479eec943933c27cb86920247e5b5dd139bc7c1376c81808abb7d7fe" # rocm6.3
"acea7d811a2d3bbe718b6e07fc2a9f739e49eecd60b4b6a36fcb3fe8edf85d78" # rocm6.4
"7e29c325d5bd33ba896ddb106f5d4fc7d715274dca7fe937f724fffa82017838" # rocm6.5
"1e9b3dddf0c7fc07131c6f0f5266129e83ce2331f459fa2be8c63f4ae91b0f5b" # rocm7.0
)
set(__AOTRITON_Z "gz")
# Note it is INSTALL"ED"
if(DEFINED ENV{AOTRITON_INSTALLED_PREFIX})
install(DIRECTORY
$ENV{AOTRITON_INSTALLED_PREFIX}/lib
$ENV{AOTRITON_INSTALLED_PREFIX}/include
install(DIRECTORY $ENV{AOTRITON_INSTALLED_PREFIX}/lib
$ENV{AOTRITON_INSTALLED_PREFIX}/include
DESTINATION ${__AOTRITON_INSTALL_DIR})
set(__AOTRITON_INSTALL_DIR "$ENV{AOTRITON_INSTALLED_PREFIX}")
message(STATUS "Using Preinstalled AOTriton at ${__AOTRITON_INSTALL_DIR}")
elseif(DEFINED ENV{AOTRITON_INSTALL_FROM_SOURCE})
ExternalProject_Add(aotriton_external
ExternalProject_Add(
aotriton_external
GIT_REPOSITORY https://github.com/ROCm/aotriton.git
GIT_TAG ${__AOTRITON_CI_COMMIT}
PREFIX ${__AOTRITON_EXTERN_PREFIX}
INSTALL_DIR ${__AOTRITON_INSTALL_DIR}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${__AOTRITON_INSTALL_DIR}
-DAOTRITON_TARGET_ARCH:STRING=${PYTORCH_ROCM_ARCH}
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-DAOTRITON_NO_PYTHON=ON
-DAOTRITON_NO_SHARED=OFF
-DAOTRITON_TARGET_ARCH:STRING=${PYTORCH_ROCM_ARCH}
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-DAOTRITON_NO_PYTHON=ON
-DAOTRITON_NO_SHARED=OFF
# CONFIGURE_COMMAND ""
BUILD_COMMAND "" # No build, install command will repeat the build process due to problems in the build system.
BUILD_COMMAND "" # No build, install command will repeat the build process
# due to problems in the build system.
BUILD_BYPRODUCTS "${__AOTRITON_INSTALL_DIR}/lib/libaotriton_v2.so"
USES_TERMINAL_DOWNLOAD TRUE
USES_TERMINAL_CONFIGURE TRUE
USES_TERMINAL_BUILD TRUE
USES_TERMINAL_INSTALL TRUE
# INSTALL_COMMAND ${MAKE_COMMAND} install
)
)
add_dependencies(__caffe2_aotriton aotriton_external)
message(STATUS "Using AOTriton compiled from source directory ${__AOTRITON_EXTERN_PREFIX}")
message(
STATUS
"Using AOTriton compiled from source directory ${__AOTRITON_EXTERN_PREFIX}"
)
else()
set(__AOTRITON_SYSTEM_ROCM "${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}")
list(GET __AOTRITON_ROCM_LIST 0 __AOTRITON_ROCM_DEFAULT_STR)
# Initialize __AOTRITON_ROCM to lowest version, in case all builds > system's ROCM
# Initialize __AOTRITON_ROCM to lowest version, in case all builds >
# system's ROCM
string(SUBSTRING ${__AOTRITON_ROCM_DEFAULT_STR} 4 -1 __AOTRITON_ROCM)
foreach(AOTRITON_ROCM_BUILD_STR IN LISTS __AOTRITON_ROCM_LIST)
# len("rocm") == 4
string(SUBSTRING ${AOTRITON_ROCM_BUILD_STR} 4 -1 AOTRITON_ROCM_BUILD)
# Find the last build that <= system's ROCM
# Assume the list is from lower to higher
# Find the last build that <= system's ROCM Assume the list is from lower
# to higher
if(AOTRITON_ROCM_BUILD VERSION_GREATER __AOTRITON_SYSTEM_ROCM)
break()
endif()
set(__AOTRITON_ROCM ${AOTRITON_ROCM_BUILD})
endforeach()
list(FIND __AOTRITON_ROCM_LIST "rocm${__AOTRITON_ROCM}" __AOTRITON_ROCM_INDEX)
list(FIND __AOTRITON_ROCM_LIST "rocm${__AOTRITON_ROCM}"
__AOTRITON_ROCM_INDEX)
list(GET __AOTRITON_SHA256_LIST ${__AOTRITON_ROCM_INDEX} __AOTRITON_SHA256)
list(GET __AOTRITON_MANYLINUX_LIST ${__AOTRITON_ROCM_INDEX} __AOTRITON_MANYLINUX)
list(GET __AOTRITON_MANYLINUX_LIST ${__AOTRITON_ROCM_INDEX}
__AOTRITON_MANYLINUX)
set(__AOTRITON_ARCH ${CMAKE_HOST_SYSTEM_PROCESSOR})
string(CONCAT __AOTRITON_FILE "aotriton-"
"${__AOTRITON_VER}-${__AOTRITON_MANYLINUX}"
"_${__AOTRITON_ARCH}-rocm${__AOTRITON_ROCM}"
"-shared.tar.${__AOTRITON_Z}")
string(CONCAT __AOTRITON_URL "https://github.com/ROCm/aotriton/releases/download/" # @lint-ignore
"${__AOTRITON_VER}/${__AOTRITON_FILE}")
ExternalProject_Add(aotriton_external
string(
CONCAT __AOTRITON_FILE
"aotriton-"
"${__AOTRITON_VER}-${__AOTRITON_MANYLINUX}"
"_${__AOTRITON_ARCH}-rocm${__AOTRITON_ROCM}"
"-shared.tar.${__AOTRITON_Z}")
string(
CONCAT
__AOTRITON_URL
"https://github.com/ROCm/aotriton/releases/download/" # @lint-ignore
"${__AOTRITON_VER}/${__AOTRITON_FILE}")
ExternalProject_Add(
aotriton_external
URL "${__AOTRITON_URL}"
URL_HASH SHA256=${__AOTRITON_SHA256}
SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/aotriton_tarball
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory
"${CMAKE_CURRENT_BINARY_DIR}/aotriton_tarball"
"${__AOTRITON_INSTALL_DIR}"
BUILD_BYPRODUCTS "${__AOTRITON_INSTALL_DIR}/lib/libaotriton_v2.so"
)
INSTALL_COMMAND
${CMAKE_COMMAND} -E copy_directory
"${CMAKE_CURRENT_BINARY_DIR}/aotriton_tarball"
"${__AOTRITON_INSTALL_DIR}"
BUILD_BYPRODUCTS "${__AOTRITON_INSTALL_DIR}/lib/libaotriton_v2.so")
add_dependencies(__caffe2_aotriton aotriton_external)
message(STATUS "Using AOTriton from pre-compiled binary ${__AOTRITON_URL}.\
Set env variables AOTRITON_INSTALL_FROM_SOURCE=1 to build from source.")
endif()
target_link_libraries(__caffe2_aotriton INTERFACE ${__AOTRITON_INSTALL_DIR}/lib/libaotriton_v2.so)
target_include_directories(__caffe2_aotriton INTERFACE ${__AOTRITON_INSTALL_DIR}/include)
target_link_libraries(
__caffe2_aotriton INTERFACE ${__AOTRITON_INSTALL_DIR}/lib/libaotriton_v2.so)
target_include_directories(__caffe2_aotriton
INTERFACE ${__AOTRITON_INSTALL_DIR}/include)
set(AOTRITON_FOUND TRUE)
endif() # __AOTRITON_INCLUDED

View File

@ -2,7 +2,8 @@ if(NOT __NCCL_INCLUDED)
set(__NCCL_INCLUDED TRUE)
if(USE_SYSTEM_NCCL)
# NCCL_ROOT, NCCL_LIB_DIR, NCCL_INCLUDE_DIR will be accounted in the following line.
# NCCL_ROOT, NCCL_LIB_DIR, NCCL_INCLUDE_DIR will be accounted in the
# following line.
find_package(NCCL REQUIRED)
if(NCCL_FOUND)
add_library(__caffe2_nccl INTERFACE)
@ -30,7 +31,8 @@ if(NOT __NCCL_INCLUDED)
if("${CMAKE_GENERATOR}" MATCHES "Make")
# Recursive make with jobserver for parallelism, and also put a load limit
# here to avoid flaky OOM, https://www.gnu.org/software/make/manual/html_node/Parallel.html
# here to avoid flaky OOM,
# https://www.gnu.org/software/make/manual/html_node/Parallel.html
set(MAKE_COMMAND "$(MAKE)" "-l${MAX_JOBS}")
else()
# Parallel build with CPU load limit to avoid oversubscription
@ -38,35 +40,32 @@ if(NOT __NCCL_INCLUDED)
endif()
set(__NCCL_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/nccl")
ExternalProject_Add(nccl_external
ExternalProject_Add(
nccl_external
SOURCE_DIR ${PROJECT_SOURCE_DIR}/third_party/nccl
BUILD_IN_SOURCE 1
CONFIGURE_COMMAND ""
BUILD_COMMAND
${MAKE_COMMAND}
"CXX=${CMAKE_CXX_COMPILER}"
"CUDA_HOME=${CUDA_TOOLKIT_ROOT_DIR}"
"NVCC=${CUDA_NVCC_EXECUTABLE}"
"NVCC_GENCODE=${NVCC_GENCODE}"
"BUILDDIR=${__NCCL_BUILD_DIR}"
"VERBOSE=0"
"DEBUG=0"
${MAKE_COMMAND} "CXX=${CMAKE_CXX_COMPILER}"
"CUDA_HOME=${CUDA_TOOLKIT_ROOT_DIR}" "NVCC=${CUDA_NVCC_EXECUTABLE}"
"NVCC_GENCODE=${NVCC_GENCODE}" "BUILDDIR=${__NCCL_BUILD_DIR}"
"VERBOSE=0" "DEBUG=0"
BUILD_BYPRODUCTS "${__NCCL_BUILD_DIR}/lib/libnccl_static.a"
INSTALL_COMMAND ""
)
INSTALL_COMMAND "")
set(__NCCL_LIBRARY_DEP nccl_external)
set(NCCL_LIBRARIES ${__NCCL_BUILD_DIR}/lib/libnccl_static.a)
set(NCCL_FOUND TRUE)
add_library(__caffe2_nccl INTERFACE)
# The following old-style variables are set so that other libs, such as Gloo,
# can still use it.
# The following old-style variables are set so that other libs, such as
# Gloo, can still use it.
set(NCCL_INCLUDE_DIRS ${__NCCL_BUILD_DIR}/include)
add_dependencies(__caffe2_nccl ${__NCCL_LIBRARY_DEP})
target_link_libraries(__caffe2_nccl INTERFACE ${NCCL_LIBRARIES})
target_include_directories(__caffe2_nccl INTERFACE ${NCCL_INCLUDE_DIRS})
# nccl includes calls to shm_open/shm_close and therefore must depend on librt on Linux
# nccl includes calls to shm_open/shm_close and therefore must depend on
# librt on Linux
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
target_link_libraries(__caffe2_nccl INTERFACE rt)
endif()

View File

@ -7,73 +7,107 @@ if(NOT USE_NNPACK)
return()
endif()
##############################################################################
# NNPACK is built together with Caffe2
# By default, it builds code from third-party/NNPACK submodule.
# Define NNPACK_SOURCE_DIR to build with a different version.
##############################################################################
# ##############################################################################
# NNPACK is built together with Caffe2 By default, it builds code from
# third-party/NNPACK submodule. Define NNPACK_SOURCE_DIR to build with a
# different version.
# ##############################################################################
##############################################################################
# ##############################################################################
# (1) MSVC - unsupported
##############################################################################
# ##############################################################################
if(MSVC)
message(WARNING "NNPACK not supported on MSVC yet. Turn this warning off by USE_NNPACK=OFF.")
message(
WARNING
"NNPACK not supported on MSVC yet. Turn this warning off by USE_NNPACK=OFF."
)
set(USE_NNPACK OFF)
return()
endif()
##############################################################################
# ##############################################################################
# (2) Anything but x86, x86-64, ARM, ARM64 - unsupported
##############################################################################
# ##############################################################################
if(CMAKE_SYSTEM_PROCESSOR)
if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(i686|x86_64|armv5te|armv7-a|armv7l|arm64|aarch64)$")
message(WARNING "NNPACK is not supported on ${CMAKE_SYSTEM_PROCESSOR} processors. "
"The only supported architectures are x86, x86-64, ARM, and ARM64. "
"Turn this warning off by USE_NNPACK=OFF.")
if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES
"^(i686|x86_64|armv5te|armv7-a|armv7l|arm64|aarch64)$")
message(
WARNING
"NNPACK is not supported on ${CMAKE_SYSTEM_PROCESSOR} processors. "
"The only supported architectures are x86, x86-64, ARM, and ARM64. "
"Turn this warning off by USE_NNPACK=OFF.")
set(USE_NNPACK OFF)
return()
endif()
endif()
##############################################################################
# ##############################################################################
# (3) Android, iOS, Linux, macOS - supported
##############################################################################
# ##############################################################################
if(ANDROID OR IOS OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR ${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
if(ANDROID
OR IOS
OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux"
OR ${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
message(STATUS "Brace yourself, we are building NNPACK")
set(CAFFE2_THIRD_PARTY_ROOT ${PROJECT_SOURCE_DIR}/third_party)
# Directories for NNPACK dependencies submoduled in Caffe2
set(PYTHON_PEACHPY_SOURCE_DIR "${CAFFE2_THIRD_PARTY_ROOT}/python-peachpy" CACHE STRING "PeachPy (Python package) source directory")
set(PYTHON_PEACHPY_SOURCE_DIR
"${CAFFE2_THIRD_PARTY_ROOT}/python-peachpy"
CACHE STRING "PeachPy (Python package) source directory")
if(NOT DEFINED CPUINFO_SOURCE_DIR)
set(CPUINFO_SOURCE_DIR "${CAFFE2_THIRD_PARTY_ROOT}/cpuinfo" CACHE STRING "cpuinfo source directory")
set(CPUINFO_SOURCE_DIR
"${CAFFE2_THIRD_PARTY_ROOT}/cpuinfo"
CACHE STRING "cpuinfo source directory")
endif()
set(NNPACK_SOURCE_DIR "${CAFFE2_THIRD_PARTY_ROOT}/NNPACK" CACHE STRING "NNPACK source directory")
set(FP16_SOURCE_DIR "${CAFFE2_THIRD_PARTY_ROOT}/FP16" CACHE STRING "FP16 source directory")
set(FXDIV_SOURCE_DIR "${CAFFE2_THIRD_PARTY_ROOT}/FXdiv" CACHE STRING "FXdiv source directory")
set(PSIMD_SOURCE_DIR "${CAFFE2_THIRD_PARTY_ROOT}/psimd" CACHE STRING "PSimd source directory")
set(PTHREADPOOL_SOURCE_DIR "${CAFFE2_THIRD_PARTY_ROOT}/pthreadpool" CACHE STRING "pthreadpool source directory")
set(GOOGLETEST_SOURCE_DIR "${CAFFE2_THIRD_PARTY_ROOT}/googletest" CACHE STRING "Google Test source directory")
set(NNPACK_SOURCE_DIR
"${CAFFE2_THIRD_PARTY_ROOT}/NNPACK"
CACHE STRING "NNPACK source directory")
set(FP16_SOURCE_DIR
"${CAFFE2_THIRD_PARTY_ROOT}/FP16"
CACHE STRING "FP16 source directory")
set(FXDIV_SOURCE_DIR
"${CAFFE2_THIRD_PARTY_ROOT}/FXdiv"
CACHE STRING "FXdiv source directory")
set(PSIMD_SOURCE_DIR
"${CAFFE2_THIRD_PARTY_ROOT}/psimd"
CACHE STRING "PSimd source directory")
set(PTHREADPOOL_SOURCE_DIR
"${CAFFE2_THIRD_PARTY_ROOT}/pthreadpool"
CACHE STRING "pthreadpool source directory")
set(GOOGLETEST_SOURCE_DIR
"${CAFFE2_THIRD_PARTY_ROOT}/googletest"
CACHE STRING "Google Test source directory")
if(NOT TARGET nnpack)
set(NNPACK_BUILD_TESTS OFF CACHE BOOL "")
set(NNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "")
set(NNPACK_LIBRARY_TYPE "static" CACHE STRING "")
set(PTHREADPOOL_LIBRARY_TYPE "static" CACHE STRING "")
set(CPUINFO_LIBRARY_TYPE "static" CACHE STRING "")
set(NNPACK_BUILD_TESTS
OFF
CACHE BOOL "")
set(NNPACK_BUILD_BENCHMARKS
OFF
CACHE BOOL "")
set(NNPACK_LIBRARY_TYPE
"static"
CACHE STRING "")
set(PTHREADPOOL_LIBRARY_TYPE
"static"
CACHE STRING "")
set(CPUINFO_LIBRARY_TYPE
"static"
CACHE STRING "")
if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
message(WARNING "Ancient nnpack forces CMake compatibility")
set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
endif()
add_subdirectory(
"${NNPACK_SOURCE_DIR}"
"${CONFU_DEPENDENCIES_BINARY_DIR}/NNPACK")
add_subdirectory("${NNPACK_SOURCE_DIR}"
"${CONFU_DEPENDENCIES_BINARY_DIR}/NNPACK")
if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
unset(CMAKE_POLICY_VERSION_MINIMUM)
endif()
# We build static versions of nnpack and pthreadpool but link
# them into a shared library for Caffe2, so they need PIC.
# We build static versions of nnpack and pthreadpool but link them into a
# shared library for Caffe2, so they need PIC.
set_property(TARGET nnpack PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET pthreadpool PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON)
@ -82,17 +116,16 @@ if(ANDROID OR IOS OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR ${CMAKE_SYSTEM_NAM
set(NNPACK_FOUND TRUE)
if(TARGET nnpack)
set(NNPACK_INCLUDE_DIRS
$<TARGET_PROPERTY:nnpack,INCLUDE_DIRECTORIES>
$<TARGET_PROPERTY:pthreadpool,INCLUDE_DIRECTORIES>)
set(NNPACK_INCLUDE_DIRS $<TARGET_PROPERTY:nnpack,INCLUDE_DIRECTORIES>
$<TARGET_PROPERTY:pthreadpool,INCLUDE_DIRECTORIES>)
set(NNPACK_LIBRARIES $<TARGET_OBJECTS:nnpack> $<TARGET_OBJECTS:cpuinfo>)
endif()
return()
endif()
##############################################################################
# ##############################################################################
# (4) Catch-all: not supported.
##############################################################################
# ##############################################################################
message(WARNING "Unknown platform - I don't know how to build NNPACK. "
"See cmake/External/nnpack.cmake for details.")

View File

@ -2,7 +2,8 @@ if(NOT __NCCL_INCLUDED)
set(__NCCL_INCLUDED TRUE)
if(USE_SYSTEM_NCCL)
# NCCL_ROOT, NCCL_LIB_DIR, NCCL_INCLUDE_DIR will be accounted in the following line.
# NCCL_ROOT, NCCL_LIB_DIR, NCCL_INCLUDE_DIR will be accounted in the
# following line.
find_package(rccl REQUIRED)
if(rccl_FOUND)
message(STATUS "RCCL Found!")

View File

@ -10,6 +10,7 @@ if(NOT __UCC_INCLUDED)
target_include_directories(__caffe2_ucc INTERFACE ${UCC_INCLUDE_DIRS})
endif()
else()
message(FATAL_ERROR "USE_SYSTEM_UCC=OFF is not supported yet when using UCC")
message(
FATAL_ERROR "USE_SYSTEM_UCC=OFF is not supported yet when using UCC")
endif()
endif()

View File

@ -1,10 +1,5 @@
set(FlatBuffers_Include ${PROJECT_SOURCE_DIR}/third_party/flatbuffers/include)
file(GLOB FlatBuffers_Library_SRCS
${FlatBuffers_Include}/flatbuffers/*.h
)
file(GLOB FlatBuffers_Library_SRCS ${FlatBuffers_Include}/flatbuffers/*.h)
add_library(flatbuffers INTERFACE)
target_sources(
flatbuffers
INTERFACE ${FlatBuffers_Library_SRCS}
)
target_sources(flatbuffers INTERFACE ${FlatBuffers_Library_SRCS})
target_include_directories(flatbuffers INTERFACE ${FlatBuffers_Include})

View File

@ -1,107 +1,135 @@
if(NOT APPLE)
return()
return()
endif()
set(METAL_CFLAGS -Wall -Wextra -fno-fast-math)
if(WERROR)
string(APPEND METAL_CFLAGS -Werror)
string(APPEND METAL_CFLAGS -Werror)
endif()
function(metal_to_air SRC TARGET FLAGS)
add_custom_command(COMMAND xcrun metal -c ${SRC} -I ${CMAKE_SOURCE_DIR} -I ${CMAKE_SOURCE_DIR}/aten/src -o ${TARGET} ${FLAGS} ${METAL_CFLAGS}
DEPENDS ${SRC}
OUTPUT ${TARGET}
COMMENT "Compiling ${SRC} to ${TARGET}"
VERBATIM)
add_custom_command(
COMMAND xcrun metal -c ${SRC} -I ${CMAKE_SOURCE_DIR} -I
${CMAKE_SOURCE_DIR}/aten/src -o ${TARGET} ${FLAGS} ${METAL_CFLAGS}
DEPENDS ${SRC}
OUTPUT ${TARGET}
COMMENT "Compiling ${SRC} to ${TARGET}"
VERBATIM)
endfunction()
function(air_to_metallib TARGET OBJECTS)
set(_OBJECTS ${OBJECTS} ${ARGN})
add_custom_command(COMMAND xcrun metallib -o ${TARGET} ${_OBJECTS}
DEPENDS ${_OBJECTS}
OUTPUT ${TARGET}
COMMENT "Linking ${TARGET}"
VERBATIM)
set(_OBJECTS ${OBJECTS} ${ARGN})
add_custom_command(
COMMAND xcrun metallib -o ${TARGET} ${_OBJECTS}
DEPENDS ${_OBJECTS}
OUTPUT ${TARGET}
COMMENT "Linking ${TARGET}"
VERBATIM)
endfunction()
function(metal_to_metallib_h SRC TGT)
execute_process(COMMAND ${Python_EXECUTABLE} torch/utils/_cpp_embed_headers.py ${SRC}
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE SHADER_CONTENT
RESULT_VARIABLE _exitcode)
if(NOT _exitcode EQUAL 0)
message(FATAL_ERROR "Failed to preprocess Metal shader ${SRC}")
return()
endif()
file(WRITE ${TGT} "#include <ATen/native/mps/OperationUtils.h>\n")
file(APPEND ${TGT} "static ::at::native::mps::MetalShaderLibrary lib(R\"SHDR(\n")
file(APPEND ${TGT} "${SHADER_CONTENT}")
file(APPEND ${TGT} ")SHDR\");\n")
execute_process(
COMMAND ${Python_EXECUTABLE} torch/utils/_cpp_embed_headers.py ${SRC}
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE SHADER_CONTENT
RESULT_VARIABLE _exitcode)
if(NOT _exitcode EQUAL 0)
message(FATAL_ERROR "Failed to preprocess Metal shader ${SRC}")
return()
endif()
file(WRITE ${TGT} "#include <ATen/native/mps/OperationUtils.h>\n")
file(APPEND ${TGT}
"static ::at::native::mps::MetalShaderLibrary lib(R\"SHDR(\n")
file(APPEND ${TGT} "${SHADER_CONTENT}")
file(APPEND ${TGT} ")SHDR\");\n")
endfunction()
set(BFLOAT_METAL_CODE "
set(BFLOAT_METAL_CODE
"
kernel void inc(device bfloat* ptr,
uint idx [[thread_position_in_grid]]) {
ptr[idx] += 1;
}
")
if(NOT CAN_COMPILE_METAL_FOUND)
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/bfloat_inc.metal" "${BFLOAT_METAL_CODE}")
execute_process(COMMAND xcrun metal -std=metal3.1 bfloat_inc.metal
WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
OUTPUT_VARIABLE XCRUN_OUTPUT
ERROR_VARIABLE XCRUN_OUTPUT
RESULT_VARIABLE XCRUN_RC)
if(${XCRUN_RC} EQUAL 0)
message(STATUS "Machine can compile metal shaders")
set(CAN_COMPILE_METAL YES CACHE BOOL "Host can compile metal shaders")
else()
message(WARNING "Machine can not compile metal shaders, fails with ${XCRUN_OUTPUT}")
set(CAN_COMPILE_METAL NO CACHE BOOL "Host can compile metal shaders")
endif()
set(CAN_COMPILE_METAL_FOUND YES CACHE INTERNAL "Run check for shader compiler")
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/bfloat_inc.metal"
"${BFLOAT_METAL_CODE}")
execute_process(
COMMAND xcrun metal -std=metal3.1 bfloat_inc.metal
WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
OUTPUT_VARIABLE XCRUN_OUTPUT
ERROR_VARIABLE XCRUN_OUTPUT
RESULT_VARIABLE XCRUN_RC)
if(${XCRUN_RC} EQUAL 0)
message(STATUS "Machine can compile metal shaders")
set(CAN_COMPILE_METAL
YES
CACHE BOOL "Host can compile metal shaders")
else()
message(
WARNING
"Machine can not compile metal shaders, fails with ${XCRUN_OUTPUT}")
set(CAN_COMPILE_METAL
NO
CACHE BOOL "Host can compile metal shaders")
endif()
set(CAN_COMPILE_METAL_FOUND
YES
CACHE INTERNAL "Run check for shader compiler")
endif()
if(NOT USE_PYTORCH_METAL)
return()
return()
endif()
if(IOS OR INTERN_BUILD_MOBILE)
return()
return()
endif()
set(OSX_PLATFORM "MacOSX.platform")
exec_program(/usr/bin/xcode-select ARGS -print-path OUTPUT_VARIABLE CMAKE_XCODE_DEVELOPER_DIR)
set(XCODE_POST_43_ROOT "${CMAKE_XCODE_DEVELOPER_DIR}/Platforms/${OSX_PLATFORM}/Developer")
exec_program(
/usr/bin/xcode-select ARGS
-print-path
OUTPUT_VARIABLE CMAKE_XCODE_DEVELOPER_DIR)
set(XCODE_POST_43_ROOT
"${CMAKE_XCODE_DEVELOPER_DIR}/Platforms/${OSX_PLATFORM}/Developer")
set(XCODE_PRE_43_ROOT "/Developer/Platforms/${OSX_PLATFORM}/Developer")
if(NOT DEFINED CMAKE_OSX_DEVELOPER_ROOT)
if(EXISTS ${XCODE_POST_43_ROOT})
set(CMAKE_OSX_DEVELOPER_ROOT ${XCODE_POST_43_ROOT})
elseif(EXISTS ${XCODE_PRE_43_ROOT})
set(CMAKE_OSX_DEVELOPER_ROOT ${XCODE_PRE_43_ROOT})
elseif(EXISTS ${CMAKE_XCODE_DEVELOPER_DIR} AND ${CMAKE_XCODE_DEVELOPER_DIR} STREQUAL "/Library/Developer/CommandLineTools")
set(CMAKE_OSX_DEVELOPER_ROOT ${CMAKE_XCODE_DEVELOPER_DIR})
endif()
if(EXISTS ${XCODE_POST_43_ROOT})
set(CMAKE_OSX_DEVELOPER_ROOT ${XCODE_POST_43_ROOT})
elseif(EXISTS ${XCODE_PRE_43_ROOT})
set(CMAKE_OSX_DEVELOPER_ROOT ${XCODE_PRE_43_ROOT})
elseif(EXISTS ${CMAKE_XCODE_DEVELOPER_DIR}
AND ${CMAKE_XCODE_DEVELOPER_DIR} STREQUAL
"/Library/Developer/CommandLineTools")
set(CMAKE_OSX_DEVELOPER_ROOT ${CMAKE_XCODE_DEVELOPER_DIR})
endif()
endif(NOT DEFINED CMAKE_OSX_DEVELOPER_ROOT)
set(CMAKE_OSX_DEVELOPER_ROOT ${CMAKE_OSX_DEVELOPER_ROOT} CACHE PATH "Location of OSX SDKs root directory")
set(CMAKE_OSX_DEVELOPER_ROOT
${CMAKE_OSX_DEVELOPER_ROOT}
CACHE PATH "Location of OSX SDKs root directory")
if(NOT DEFINED CMAKE_OSX_SDK_ROOT)
file(GLOB _CMAKE_OSX_SDKS "${CMAKE_OSX_DEVELOPER_ROOT}/SDKs/*")
if(_CMAKE_OSX_SDKS)
list(SORT _CMAKE_OSX_SDKS)
list(REVERSE _CMAKE_OSX_SDKS)
list(GET _CMAKE_OSX_SDKS 0 CMAKE_OSX_SDK_ROOT)
message(STATUS "_CMAKE_OSX_SDKS: ${_CMAKE_OSX_SDKS}")
else(_CMAKE_OSX_SDKS)
message(FATAL_ERROR "No OSX SDK's found in default search path ${CMAKE_OSX_DEVELOPER_ROOT}.")
endif(_CMAKE_OSX_SDKS)
message(STATUS "Toolchain using default OSX SDK: ${CMAKE_OSX_SDK_ROOT}")
file(GLOB _CMAKE_OSX_SDKS "${CMAKE_OSX_DEVELOPER_ROOT}/SDKs/*")
if(_CMAKE_OSX_SDKS)
list(SORT _CMAKE_OSX_SDKS)
list(REVERSE _CMAKE_OSX_SDKS)
list(GET _CMAKE_OSX_SDKS 0 CMAKE_OSX_SDK_ROOT)
message(STATUS "_CMAKE_OSX_SDKS: ${_CMAKE_OSX_SDKS}")
else(_CMAKE_OSX_SDKS)
message(
FATAL_ERROR
"No OSX SDK's found in default search path ${CMAKE_OSX_DEVELOPER_ROOT}."
)
endif(_CMAKE_OSX_SDKS)
message(STATUS "Toolchain using default OSX SDK: ${CMAKE_OSX_SDK_ROOT}")
endif(NOT DEFINED CMAKE_OSX_SDK_ROOT)
set(CMAKE_OSX_SDK_ROOT ${CMAKE_OSX_SDK_ROOT} CACHE PATH "Location of the selected OSX SDK")
set(CMAKE_OSX_SDK_ROOT
${CMAKE_OSX_SDK_ROOT}
CACHE PATH "Location of the selected OSX SDK")
set(CMAKE_FRAMEWORK_PATH
${CMAKE_OSX_SDK_ROOT}/System/Library/Frameworks
${CMAKE_OSX_SDK_ROOT}/System/Library/PrivateFrameworks
${CMAKE_OSX_SDK_ROOT}/Developer/Library/Frameworks
)
${CMAKE_OSX_SDK_ROOT}/Developer/Library/Frameworks)
message(STATUS "CMAKE_FRAMEWORK_PATH: ${CMAKE_FRAMEWORK_PATH}")
set(CMAKE_FIND_FRAMEWORK FIRST)

View File

@ -6,12 +6,14 @@ include(CMakePushCheckState)
if(USE_GLOG)
cmake_push_check_state(RESET)
set(CMAKE_REQUIRED_FLAGS "-std=c++17")
CHECK_CXX_SOURCE_COMPILES(
"#include <glog/stl_logging.h>
check_cxx_source_compiles(
"#include <glog/stl_logging.h>
int main(int argc, char** argv) {
return 0;
}" CAFFE2_NEED_TO_TURN_OFF_DEPRECATION_WARNING
FAIL_REGEX ".*-Wno-deprecated.*")
}"
CAFFE2_NEED_TO_TURN_OFF_DEPRECATION_WARNING
FAIL_REGEX
".*-Wno-deprecated.*")
if(NOT CAFFE2_NEED_TO_TURN_OFF_DEPRECATION_WARNING AND NOT MSVC)
message(STATUS "Turning off deprecation warning due to glog.")
@ -24,7 +26,9 @@ endif()
if(NOT INTERN_BUILD_MOBILE)
find_package(AVX) # checks AVX and AVX2
if(CXX_AVX2_FOUND)
message(STATUS "Current compiler supports avx2 extension. Will build perfkernels.")
message(
STATUS "Current compiler supports avx2 extension. Will build perfkernels."
)
# Also see CMakeLists.txt under caffe2/perfkernels.
set(CAFFE2_PERF_WITH_AVX 1)
set(CAFFE2_PERF_WITH_AVX2 1)
@ -39,14 +43,13 @@ if(MSVC AND NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(CMAKE_REQUIRED_FLAGS "/D__AVX512F__ /D__AVX512DQ__ /D__AVX512VL__")
else()
# We only consider the case where all of avx512f, avx512dq, and avx512vl are
# supported.
# Platforms where avx512f is supported by not avx512dq and avx512vl as of
# Jan 15 2019 : linux_manywheel_2.7mu_cpu_build and
# supported. Platforms where avx512f is supported by not avx512dq and avx512vl
# as of Jan 15 2019 : linux_manywheel_2.7mu_cpu_build and
# linux_conda_3.7_cu100_build
set(CMAKE_REQUIRED_FLAGS "-mavx512f -mavx512dq -mavx512vl")
endif()
CHECK_CXX_SOURCE_COMPILES(
"#if defined(_MSC_VER)
check_cxx_source_compiles(
"#if defined(_MSC_VER)
#include <intrin.h>
#else
#include <immintrin.h>
@ -65,24 +68,27 @@ CHECK_CXX_SOURCE_COMPILES(
ymm = _mm256_abs_epi64(ymm); // check avx512vl
__mmask16 m = _mm512_cmp_epi32_mask(a, a, _MM_CMPINT_EQ);
__m512i r = _mm512_andnot_si512(a, a);
}" CAFFE2_COMPILER_SUPPORTS_AVX512_EXTENSIONS)
}"
CAFFE2_COMPILER_SUPPORTS_AVX512_EXTENSIONS)
if(CAFFE2_COMPILER_SUPPORTS_AVX512_EXTENSIONS)
message(STATUS "Current compiler supports avx512f extension. Will build fbgemm.")
message(
STATUS "Current compiler supports avx512f extension. Will build fbgemm.")
endif()
cmake_pop_check_state()
# ---[ Checks if compiler supports -fvisibility=hidden
check_cxx_compiler_flag("-fvisibility=hidden" COMPILER_SUPPORTS_HIDDEN_VISIBILITY)
check_cxx_compiler_flag("-fvisibility-inlines-hidden" COMPILER_SUPPORTS_HIDDEN_INLINE_VISIBILITY)
check_cxx_compiler_flag("-fvisibility=hidden"
COMPILER_SUPPORTS_HIDDEN_VISIBILITY)
check_cxx_compiler_flag("-fvisibility-inlines-hidden"
COMPILER_SUPPORTS_HIDDEN_INLINE_VISIBILITY)
if(${COMPILER_SUPPORTS_HIDDEN_INLINE_VISIBILITY})
set(CAFFE2_VISIBILITY_FLAG "-fvisibility-inlines-hidden")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CAFFE2_VISIBILITY_FLAG}")
endif()
# ---[ Checks if linker supports -rdynamic. `-rdynamic` tells linker
# -to add all (including unused) symbols into the dynamic symbol
# -table. We need this to get symbols when generating backtrace at
# -runtime.
# ---[ Checks if linker supports -rdynamic. `-rdynamic` tells linker -to add all
# (including unused) symbols into the dynamic symbol -table. We need this to get
# symbols when generating backtrace at -runtime.
if(NOT MSVC)
check_cxx_compiler_flag("-rdynamic" COMPILER_SUPPORTS_RDYNAMIC)
if(${COMPILER_SUPPORTS_RDYNAMIC})
@ -92,12 +98,10 @@ if(NOT MSVC)
endif()
# ---[ If we are building on ios, or building with opengl support, we will
# enable -mfpu=neon-fp16 for iOS Metal build. For Android, this fpu setting
# is going to be done with android-cmake by setting
# -DANDROID_ABI="armeabi-v7a with NEON FP16"
# in the build command.
# Also, we will turn off deprecated-declarations
# due to protobuf.
# enable -mfpu=neon-fp16 for iOS Metal build. For Android, this fpu setting is
# going to be done with android-cmake by setting -DANDROID_ABI="armeabi-v7a with
# NEON FP16" in the build command. Also, we will turn off
# deprecated-declarations due to protobuf.
# ---[ Check if the compiler has SVE support.
find_package(ARM) # checks SVE
@ -106,7 +110,9 @@ if(CXX_SVE_FOUND)
# Also see CMakeLists.txt under caffe2/perfkernels.
add_compile_definitions(CAFFE2_PERF_WITH_SVE=1)
else()
message(STATUS "Compiler does not support SVE extension. Will not build perfkernels.")
message(
STATUS
"Compiler does not support SVE extension. Will not build perfkernels.")
endif()
if(IOS AND (${IOS_ARCH} MATCHES "armv7*"))
@ -124,7 +130,7 @@ if(USE_NATIVE_ARCH AND NOT MSVC)
add_definitions("-march=native")
else()
message(
WARNING
WARNING
"Your compiler does not support -march=native. Turn off this warning "
"by setting -DUSE_NATIVE_ARCH=OFF.")
endif()

View File

@ -1,13 +1,13 @@
# Finds Google Protocol Buffers library and compilers and extends
# the standard cmake script with version and python generation support
# Finds Google Protocol Buffers library and compilers and extends the standard
# cmake script with version and python generation support
macro(custom_protobuf_find)
message(STATUS "Use custom protobuf build.")
option(protobuf_BUILD_TESTS "" OFF)
option(protobuf_BUILD_EXAMPLES "" OFF)
option(protobuf_WITH_ZLIB "" OFF)
if(${CAFFE2_LINK_LOCAL_PROTOBUF})
# If we are going to link protobuf locally, we will need to turn off
# shared libs build for protobuf.
# If we are going to link protobuf locally, we will need to turn off shared
# libs build for protobuf.
option(protobuf_BUILD_SHARED_LIBS "" OFF)
else()
# If we are building Caffe2 as shared libs, we will also build protobuf as
@ -18,7 +18,8 @@ macro(custom_protobuf_find)
option(protobuf_MSVC_STATIC_RUNTIME "" ${CAFFE2_USE_MSVC_STATIC_RUNTIME})
if(${CAFFE2_LINK_LOCAL_PROTOBUF})
set(__caffe2_CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ${CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS})
set(__caffe2_CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS
${CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS})
set(__caffe2_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS OFF)
set(BUILD_SHARED_LIBS OFF)
@ -30,7 +31,8 @@ macro(custom_protobuf_find)
endif()
endif()
set(__caffe2_CMAKE_POSITION_INDEPENDENT_CODE ${CMAKE_POSITION_INDEPENDENT_CODE})
set(__caffe2_CMAKE_POSITION_INDEPENDENT_CODE
${CMAKE_POSITION_INDEPENDENT_CODE})
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
@ -42,22 +44,24 @@ macro(custom_protobuf_find)
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/protobuf/cmake)
endif()
set(CMAKE_POSITION_INDEPENDENT_CODE ${__caffe2_CMAKE_POSITION_INDEPENDENT_CODE})
set(CMAKE_POSITION_INDEPENDENT_CODE
${__caffe2_CMAKE_POSITION_INDEPENDENT_CODE})
if(${CAFFE2_LINK_LOCAL_PROTOBUF})
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ${__caffe2_CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS})
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS
${__caffe2_CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS})
set(BUILD_SHARED_LIBS ON)
set(CMAKE_CXX_FLAGS ${__caffe2_CMAKE_CXX_FLAGS})
endif()
# Protobuf "namespaced" target is only added post protobuf 3.5.1. As a
# result, for older versions, we will manually add alias.
# Protobuf "namespaced" target is only added post protobuf 3.5.1. As a result,
# for older versions, we will manually add alias.
if(NOT TARGET protobuf::libprotobuf)
add_library(protobuf::libprotobuf ALIAS libprotobuf)
add_library(protobuf::libprotobuf-lite ALIAS libprotobuf-lite)
# There is link error when cross compiling protoc on mobile:
# https://github.com/protocolbuffers/protobuf/issues/2719
# And protoc is very unlikely needed for mobile builds.
# https://github.com/protocolbuffers/protobuf/issues/2719 And protoc is very
# unlikely needed for mobile builds.
if(NOT (ANDROID OR IOS))
add_executable(protobuf::protoc ALIAS protoc)
endif()
@ -65,23 +69,27 @@ macro(custom_protobuf_find)
endmacro()
# Main entry for protobuf. If we are building on Android, iOS or we have hard
# coded BUILD_CUSTOM_PROTOBUF, we will hard code the use of custom protobuf
# in the submodule.
# coded BUILD_CUSTOM_PROTOBUF, we will hard code the use of custom protobuf in
# the submodule.
if(ANDROID OR IOS)
if(NOT BUILD_CUSTOM_PROTOBUF)
message(WARNING
"For Android and iOS cross compilation, I am automatically using "
"custom protobuf under third party. Note that this behavior may "
"change in the future, and you will need to specify "
"-DBUILD_CUSTOM_PROTOBUF=ON explicitly.")
message(
WARNING "For Android and iOS cross compilation, I am automatically using "
"custom protobuf under third party. Note that this behavior may "
"change in the future, and you will need to specify "
"-DBUILD_CUSTOM_PROTOBUF=ON explicitly.")
endif()
# There is link error when cross compiling protoc on mobile:
# https://github.com/protocolbuffers/protobuf/issues/2719
# And protoc is very unlikely needed for mobile builds.
# https://github.com/protocolbuffers/protobuf/issues/2719 And protoc is very
# unlikely needed for mobile builds.
set(__caffe2_protobuf_BUILD_PROTOC_BINARIES ${protobuf_BUILD_PROTOC_BINARIES})
set(protobuf_BUILD_PROTOC_BINARIES OFF CACHE BOOL "" FORCE)
set(protobuf_BUILD_PROTOC_BINARIES
OFF
CACHE BOOL "" FORCE)
custom_protobuf_find()
set(protobuf_BUILD_PROTOC_BINARIES ${__caffe2_protobuf_BUILD_PROTOC_BINARIES} CACHE BOOL "" FORCE)
set(protobuf_BUILD_PROTOC_BINARIES
${__caffe2_protobuf_BUILD_PROTOC_BINARIES}
CACHE BOOL "" FORCE)
elseif(BUILD_CUSTOM_PROTOBUF)
message(STATUS "Building using own protobuf under third_party per request.")
custom_protobuf_find()
@ -89,20 +97,22 @@ else()
include(cmake/public/protobuf.cmake)
endif()
if((NOT TARGET protobuf::libprotobuf) AND (NOT TARGET protobuf::libprotobuf-lite))
message(WARNING
if((NOT TARGET protobuf::libprotobuf) AND (NOT TARGET protobuf::libprotobuf-lite
))
message(
WARNING
"Protobuf cannot be found. Caffe2 will automatically switch to use "
"own protobuf under third_party. Note that this behavior may change in "
"the future, and you will need to specify -DBUILD_CUSTOM_PROTOBUF=ON "
"explicitly.")
custom_protobuf_find()
# TODO(jiayq): enable this in the future, when Jenkins Mac support is
# properly set up with protobuf installs.
# TODO(jiayq): enable this in the future, when Jenkins Mac support is properly
# set up with protobuf installs.
# message(FATAL_ERROR
# "Protobuf cannot be found. Caffe2 will have to build with libprotobuf. "
# "Please set the proper paths so that I can find protobuf correctly.")
# message(FATAL_ERROR "Protobuf cannot be found. Caffe2 will have to build
# with libprotobuf. " "Please set the proper paths so that I can find protobuf
# correctly.")
endif()
get_target_property(__tmp protobuf::libprotobuf INTERFACE_INCLUDE_DIRECTORIES)
@ -119,28 +129,28 @@ else()
set(Protobuf_VERSION "Protobuf_VERSION_NOTFOUND")
endif()
# Figure out which protoc to use.
# If CAFFE2_CUSTOM_PROTOC_EXECUTABLE is set, we assume the user knows
# what they're doing and we blindly use the specified protoc. This
# is typically the case when cross-compiling where protoc must be
# compiled for the host architecture and libprotobuf must be
# compiled for the target architecture.
# If CAFFE2_CUSTOM_PROTOC_EXECUTABLE is NOT set, we use the protoc
# target that is built as part of including the protobuf project.
# Figure out which protoc to use. If CAFFE2_CUSTOM_PROTOC_EXECUTABLE is set, we
# assume the user knows what they're doing and we blindly use the specified
# protoc. This is typically the case when cross-compiling where protoc must be
# compiled for the host architecture and libprotobuf must be compiled for the
# target architecture. If CAFFE2_CUSTOM_PROTOC_EXECUTABLE is NOT set, we use the
# protoc target that is built as part of including the protobuf project.
if(EXISTS "${CAFFE2_CUSTOM_PROTOC_EXECUTABLE}")
set(CAFFE2_PROTOC_EXECUTABLE ${CAFFE2_CUSTOM_PROTOC_EXECUTABLE})
else()
set(CAFFE2_PROTOC_EXECUTABLE protobuf::protoc)
endif()
################################################################################################
# Modification of standard 'protobuf_generate_cpp()' with output dir parameter and python support
# Usage:
# caffe2_protobuf_generate_cpp_py(<srcs_var> <hdrs_var> <python_var> <proto_files>)
# ##############################################################################
# Modification of standard 'protobuf_generate_cpp()' with output dir parameter
# and python support Usage: caffe2_protobuf_generate_cpp_py(<srcs_var>
# <hdrs_var> <python_var> <proto_files>)
function(caffe2_protobuf_generate_cpp_py srcs_var hdrs_var python_var)
if(NOT ARGN)
message(SEND_ERROR "Error: caffe_protobuf_generate_cpp_py() called without any proto files")
message(
SEND_ERROR
"Error: caffe_protobuf_generate_cpp_py() called without any proto files"
)
return()
endif()
@ -158,31 +168,34 @@ function(caffe2_protobuf_generate_cpp_py srcs_var hdrs_var python_var)
# Add TORCH_API prefix to protobuf classes and methods in all cases
set(DLLEXPORT_STR "dllexport_decl=TORCH_API:")
# Note: the following depends on PROTOBUF_PROTOC_EXECUTABLE. This
# is done to make sure protoc is built before attempting to
# generate sources if we're using protoc from the third_party
# directory and are building it as part of the Caffe2 build. If
# points to an existing path, it is a no-op.
# Note: the following depends on PROTOBUF_PROTOC_EXECUTABLE. This is done to
# make sure protoc is built before attempting to generate sources if we're
# using protoc from the third_party directory and are building it as part of
# the Caffe2 build. If points to an existing path, it is a no-op.
if(${CAFFE2_LINK_LOCAL_PROTOBUF})
# We need to rewrite the pb.h files to route GetEmptyStringAlreadyInited
# through our wrapper in proto_utils so the memory location test
# is correct.
# through our wrapper in proto_utils so the memory location test is
# correct.
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${fil_we}.pb.cc"
"${CMAKE_CURRENT_BINARY_DIR}/${fil_we}.pb.h"
"${CMAKE_CURRENT_BINARY_DIR}/${fil_we}_pb2.py"
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_CURRENT_BINARY_DIR}"
COMMAND ${CAFFE2_PROTOC_EXECUTABLE} -I${PROJECT_SOURCE_DIR} --cpp_out=${DLLEXPORT_STR}${PROJECT_BINARY_DIR} ${abs_fil}
COMMAND ${CAFFE2_PROTOC_EXECUTABLE} -I${PROJECT_SOURCE_DIR} --python_out "${PROJECT_BINARY_DIR}" ${abs_fil}
# If we remove all reference to these pb.h files from external
# libraries and binaries this rewrite can be removed.
COMMAND ${CMAKE_COMMAND} -DFILENAME=${CMAKE_CURRENT_BINARY_DIR}/${fil_we}.pb.h -DNAMESPACES=caffe\;caffe2\;onnx\;torch -P ${PROJECT_SOURCE_DIR}/cmake/ProtoBufPatch.cmake
COMMAND ${CAFFE2_PROTOC_EXECUTABLE} -I${PROJECT_SOURCE_DIR}
--cpp_out=${DLLEXPORT_STR}${PROJECT_BINARY_DIR} ${abs_fil}
COMMAND ${CAFFE2_PROTOC_EXECUTABLE} -I${PROJECT_SOURCE_DIR} --python_out
"${PROJECT_BINARY_DIR}" ${abs_fil}
# If we remove all reference to these pb.h files from external libraries
# and binaries this rewrite can be removed.
COMMAND
${CMAKE_COMMAND} -DFILENAME=${CMAKE_CURRENT_BINARY_DIR}/${fil_we}.pb.h
-DNAMESPACES=caffe\;caffe2\;onnx\;torch -P
${PROJECT_SOURCE_DIR}/cmake/ProtoBufPatch.cmake
DEPENDS ${CAFFE2_PROTOC_EXECUTABLE} ${abs_fil}
COMMENT "Running C++/Python protocol buffer compiler on ${fil}" VERBATIM )
COMMENT "Running C++/Python protocol buffer compiler on ${fil}"
VERBATIM)
else()
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${fil_we}.pb.cc"
@ -190,16 +203,29 @@ function(caffe2_protobuf_generate_cpp_py srcs_var hdrs_var python_var)
"${CMAKE_CURRENT_BINARY_DIR}/${fil_we}_pb2.py"
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_CURRENT_BINARY_DIR}"
COMMAND ${CAFFE2_PROTOC_EXECUTABLE} -I${PROJECT_SOURCE_DIR} --cpp_out=${DLLEXPORT_STR}${PROJECT_BINARY_DIR} ${abs_fil}
COMMAND ${CAFFE2_PROTOC_EXECUTABLE} -I${PROJECT_SOURCE_DIR} --python_out "${PROJECT_BINARY_DIR}" ${abs_fil}
COMMAND ${CMAKE_COMMAND} -DFILENAME=${CMAKE_CURRENT_BINARY_DIR}/${fil_we}.pb.h -DNAMESPACES=caffe\;caffe2\;onnx\;torch -DSYSTEM_PROTOBUF=YES -P ${PROJECT_SOURCE_DIR}/cmake/ProtoBufPatch.cmake
COMMAND ${CAFFE2_PROTOC_EXECUTABLE} -I${PROJECT_SOURCE_DIR}
--cpp_out=${DLLEXPORT_STR}${PROJECT_BINARY_DIR} ${abs_fil}
COMMAND ${CAFFE2_PROTOC_EXECUTABLE} -I${PROJECT_SOURCE_DIR} --python_out
"${PROJECT_BINARY_DIR}" ${abs_fil}
COMMAND
${CMAKE_COMMAND} -DFILENAME=${CMAKE_CURRENT_BINARY_DIR}/${fil_we}.pb.h
-DNAMESPACES=caffe\;caffe2\;onnx\;torch -DSYSTEM_PROTOBUF=YES -P
${PROJECT_SOURCE_DIR}/cmake/ProtoBufPatch.cmake
DEPENDS ${CAFFE2_PROTOC_EXECUTABLE} ${abs_fil}
COMMENT "Running C++/Python protocol buffer compiler on ${fil}" VERBATIM )
COMMENT "Running C++/Python protocol buffer compiler on ${fil}"
VERBATIM)
endif()
endforeach()
set_source_files_properties(${${srcs_var}} ${${hdrs_var}} ${${python_var}} PROPERTIES GENERATED TRUE)
set(${srcs_var} ${${srcs_var}} PARENT_SCOPE)
set(${hdrs_var} ${${hdrs_var}} PARENT_SCOPE)
set(${python_var} ${${python_var}} PARENT_SCOPE)
set_source_files_properties(${${srcs_var}} ${${hdrs_var}} ${${python_var}}
PROPERTIES GENERATED TRUE)
set(${srcs_var}
${${srcs_var}}
PARENT_SCOPE)
set(${hdrs_var}
${${hdrs_var}}
PARENT_SCOPE)
set(${python_var}
${${python_var}}
PARENT_SCOPE)
endfunction()

View File

@ -1,48 +1,31 @@
# CMake file to replace the string contents in ONNX, Caffe, and Caffe2 proto.
# Usage example:
# cmake -DFILENAME=caffe2.pb.h -DLOCAL_PROTOBUF=ON -P ProtoBufPatch.cmake
# Usage example: cmake -DFILENAME=caffe2.pb.h -DLOCAL_PROTOBUF=ON -P
# ProtoBufPatch.cmake
file(READ ${FILENAME} content)
if(NOT SYSTEM_PROTOBUF)
# protobuf-3.6.0 pattern
string(
REPLACE
"::google::protobuf::internal::GetEmptyStringAlreadyInited"
"GetEmptyStringAlreadyInited"
content
"${content}")
string(REPLACE "::google::protobuf::internal::GetEmptyStringAlreadyInited"
"GetEmptyStringAlreadyInited" content "${content}")
# protobuf-3.8.0+ pattern
string(
REPLACE
"::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited"
"GetEmptyStringAlreadyInited"
content
"${content}")
REPLACE "::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited"
"GetEmptyStringAlreadyInited" content "${content}")
string(
REPLACE
"PROTOBUF_CONSTEXPR"
""
content
"${content}")
string(REPLACE "PROTOBUF_CONSTEXPR" "" content "${content}")
# https://github.com/protocolbuffers/protobuf/commit/0400cca3236de1ca303af38bf81eab332d042b7c
# changes PROTOBUF_CONSTEXPR to constexpr, which breaks windows
# build.
# changes PROTOBUF_CONSTEXPR to constexpr, which breaks windows build.
if(MSVC)
string(
REGEX REPLACE
"static constexpr ([^ ]+) ([^ ]+) ="
"static \\1 const \\2 ="
content
"${content}")
string(REGEX REPLACE "static constexpr ([^ ]+) ([^ ]+) ="
"static \\1 const \\2 =" content "${content}")
endif()
foreach(ns ${NAMESPACES})
# Insert "const ::std::string& GetEmptyStringAlreadyInited();" within
# the namespace and make sure we only do it once in the file. Unfortunately
# Insert "const ::std::string& GetEmptyStringAlreadyInited();" within the
# namespace and make sure we only do it once in the file. Unfortunately
# using string(REPLACE ...) doesn't work because it will replace at all
# locations and there might be multiple declarations of the namespace
# depending on how the proto is structured.
@ -53,48 +36,47 @@ if(NOT SYSTEM_PROTOBUF)
math(EXPR pos "${pos}+${search_len}")
string(SUBSTRING "${content}" 0 ${pos} content_pre)
string(SUBSTRING "${content}" ${pos} -1 content_post)
string(
CONCAT
content
"${content_pre}"
" const ::std::string& GetEmptyStringAlreadyInited(); "
"${content_post}")
string(CONCAT content "${content_pre}"
" const ::std::string& GetEmptyStringAlreadyInited(); "
"${content_post}")
endif()
endforeach()
# The moving constructor is defined in the header file, which will cause
# a link error that claims that the vftable is not found. Luckily, we
# could move the definition into the source file to solve the problem.
# The moving constructor is defined in the header file, which will cause a
# link error that claims that the vftable is not found. Luckily, we could move
# the definition into the source file to solve the problem.
list(LENGTH NAMESPACES ns_count)
if("${FILENAME}" MATCHES ".pb.h" AND ns_count EQUAL 1)
string(REPLACE ".pb.h" ".pb.cc" SOURCE_FILENAME ${FILENAME})
file(READ ${SOURCE_FILENAME} content_cc_origin)
string(REGEX MATCHALL "([a-zA-Z_]+)\\([a-zA-Z_]+&& from\\) noexcept[^}]*}" content_cc "${content}")
string(REGEX MATCHALL "([a-zA-Z_]+)\\([a-zA-Z_]+&& from\\) noexcept[^}]*}"
content_cc "${content}")
string(REGEX REPLACE "};" "}\n" content_cc "${content_cc}")
string(REGEX REPLACE "([a-zA-Z_]+)\\([a-zA-Z_]+&& from\\) noexcept" " \\1::\\1(\\1&& from) noexcept" content_cc "${content_cc}")
set(content_cc "${content_cc_origin}\nnamespace ${NAMESPACES} {\n#if LANG_CXX11\n${content_cc}\n#endif\n}")
string(REGEX
REPLACE "([a-zA-Z_]+)\\([a-zA-Z_]+&& from\\) noexcept"
" \\1::\\1(\\1&& from) noexcept" content_cc "${content_cc}")
set(content_cc
"${content_cc_origin}\nnamespace ${NAMESPACES} {\n#if LANG_CXX11\n${content_cc}\n#endif\n}"
)
string(REGEX REPLACE "([a-zA-Z_]+)\\([a-zA-Z_]+&& from\\) noexcept([^}]*)}" "\\1(\\1&& from) noexcept;" content "${content}")
string(REGEX REPLACE "([a-zA-Z_]+)\\([a-zA-Z_]+&& from\\) noexcept([^}]*)}"
"\\1(\\1&& from) noexcept;" content "${content}")
file(WRITE ${SOURCE_FILENAME} "${content_cc}")
endif()
endif(NOT SYSTEM_PROTOBUF)
# constexpr int TensorBoundShape_DimType_DimType_ARRAYSIZE = TensorBoundShape_DimType_DimType_MAX + 1;
# throws
# error: more than one operator "+" matches these operands:
# built-in operator "arithmetic + arithmetic"
# function "c10::operator+(int, c10::BFloat16)"
# function "c10::operator+(c10::BFloat16, int)"
# function "c10::operator+(int, c10::Half)"
# function "c10::operator+(c10::Half, int)"
# operand types are: const caffe2::ExternalDataProto_SourceType + int
# constexpr int TensorBoundShape_DimType_DimType_ARRAYSIZE =
# TensorBoundShape_DimType_DimType_MAX + 1; throws error: more than one operator
# "+" matches these operands: built-in operator "arithmetic + arithmetic"
# function "c10::operator+(int, c10::BFloat16)" function
# "c10::operator+(c10::BFloat16, int)" function "c10::operator+(int, c10::Half)"
# function "c10::operator+(c10::Half, int)" operand types are: const
# caffe2::ExternalDataProto_SourceType + int
string(
REGEX REPLACE
"constexpr ([^ ]+) ([^ ]+_ARRAYSIZE) = ([^ ]+_MAX) \\+ 1;"
"constexpr \\1 \\2 = static_cast<\\1>(\\3) + 1;"
content
"${content}")
REGEX
REPLACE "constexpr ([^ ]+) ([^ ]+_ARRAYSIZE) = ([^ ]+_MAX) \\+ 1;"
"constexpr \\1 \\2 = static_cast<\\1>(\\3) + 1;" content "${content}")
file(WRITE ${FILENAME} "${content}")

View File

@ -18,7 +18,8 @@ function(caffe2_print_configuration_summary)
message(STATUS " Static LD flags : ${CMAKE_STATIC_LINKER_FLAGS}")
message(STATUS " Module LD flags : ${CMAKE_MODULE_LINKER_FLAGS}")
message(STATUS " Build type : ${CMAKE_BUILD_TYPE}")
get_directory_property(tmp DIRECTORY ${PROJECT_SOURCE_DIR} COMPILE_DEFINITIONS)
get_directory_property(tmp DIRECTORY ${PROJECT_SOURCE_DIR}
COMPILE_DEFINITIONS)
message(STATUS " Compile definitions : ${tmp}")
message(STATUS " CMAKE_PREFIX_PATH : ${CMAKE_PREFIX_PATH}")
message(STATUS " CMAKE_INSTALL_PREFIX : ${CMAKE_INSTALL_PREFIX}")
@ -26,7 +27,9 @@ function(caffe2_print_configuration_summary)
message(STATUS "")
message(STATUS " TORCH_VERSION : ${TORCH_VERSION}")
message(STATUS " BUILD_STATIC_RUNTIME_BENCHMARK: ${BUILD_STATIC_RUNTIME_BENCHMARK}")
message(
STATUS " BUILD_STATIC_RUNTIME_BENCHMARK: ${BUILD_STATIC_RUNTIME_BENCHMARK}"
)
message(STATUS " BUILD_BINARY : ${BUILD_BINARY}")
message(STATUS " BUILD_CUSTOM_PROTOBUF : ${BUILD_CUSTOM_PROTOBUF}")
if(${CAFFE2_LINK_LOCAL_PROTOBUF})
@ -45,7 +48,10 @@ function(caffe2_print_configuration_summary)
message(STATUS " Python site-package : ${Python_SITELIB}")
endif()
message(STATUS " BUILD_SHARED_LIBS : ${BUILD_SHARED_LIBS}")
message(STATUS " CAFFE2_USE_MSVC_STATIC_RUNTIME : ${CAFFE2_USE_MSVC_STATIC_RUNTIME}")
message(
STATUS
" CAFFE2_USE_MSVC_STATIC_RUNTIME : ${CAFFE2_USE_MSVC_STATIC_RUNTIME}"
)
message(STATUS " BUILD_TEST : ${BUILD_TEST}")
message(STATUS " BUILD_JNI : ${BUILD_JNI}")
message(STATUS " BUILD_MOBILE_AUTOGRAD : ${BUILD_MOBILE_AUTOGRAD}")
@ -183,8 +189,11 @@ function(caffe2_print_configuration_summary)
endif()
message(STATUS " USE_VULKAN : ${USE_VULKAN}")
if(${USE_VULKAN})
message(STATUS " USE_VULKAN_FP16_INFERENCE : ${USE_VULKAN_FP16_INFERENCE}")
message(STATUS " USE_VULKAN_RELAXED_PRECISION : ${USE_VULKAN_RELAXED_PRECISION}")
message(
STATUS " USE_VULKAN_FP16_INFERENCE : ${USE_VULKAN_FP16_INFERENCE}")
message(
STATUS
" USE_VULKAN_RELAXED_PRECISION : ${USE_VULKAN_RELAXED_PRECISION}")
endif()
message(STATUS " USE_PROF : ${USE_PROF}")
message(STATUS " USE_PYTORCH_QNNPACK : ${USE_PYTORCH_QNNPACK}")
@ -202,7 +211,8 @@ function(caffe2_print_configuration_summary)
endif()
message(STATUS " Public Dependencies : ${Caffe2_PUBLIC_DEPENDENCY_LIBS}")
message(STATUS " Private Dependencies : ${Caffe2_DEPENDENCY_LIBS}")
message(STATUS " Public CUDA Deps. : ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS}")
message(
STATUS " Public CUDA Deps. : ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS}")
message(STATUS " Private CUDA Deps. : ${Caffe2_CUDA_DEPENDENCY_LIBS}")
# coreml
message(STATUS " USE_COREML_DELEGATE : ${USE_COREML_DELEGATE}")

View File

@ -21,15 +21,14 @@ if(ANDROID)
message(FATAL_ERROR "ANDROID_NDK not set")
endif()
set(GLSLC_PATH "${ANDROID_NDK}/shader-tools/${ANDROID_NDK_HOST_SYSTEM_NAME}/glslc")
set(GLSLC_PATH
"${ANDROID_NDK}/shader-tools/${ANDROID_NDK_HOST_SYSTEM_NAME}/glslc")
else()
find_program(
GLSLC_PATH glslc
PATHS
ENV VULKAN_SDK
PATHS ENV VULKAN_SDK
PATHS "$ENV{VULKAN_SDK}/${CMAKE_HOST_SYSTEM_PROCESSOR}/bin"
PATHS "$ENV{VULKAN_SDK}/bin"
)
PATHS "$ENV{VULKAN_SDK}/bin")
if(NOT GLSLC_PATH)
message(FATAL_ERROR "USE_VULKAN glslc not found")
@ -42,18 +41,18 @@ list(APPEND NEW_PYTHONPATH "${CMAKE_CURRENT_LIST_DIR}/..")
set(ENV{PYTHONPATH} ${NEW_PYTHONPATH})
execute_process(
COMMAND
"${Python_EXECUTABLE}"
${CMAKE_CURRENT_LIST_DIR}/../tools/gen_vulkan_spv.py
--glsl-path ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/vulkan/glsl
--output-path ${VULKAN_GEN_OUTPUT_PATH}
--glslc-path=${GLSLC_PATH}
--tmp-dir-path=${CMAKE_BINARY_DIR}/vulkan/spv
--env ${VULKAN_GEN_ARG_ENV}
"${Python_EXECUTABLE}" ${CMAKE_CURRENT_LIST_DIR}/../tools/gen_vulkan_spv.py
--glsl-path ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/vulkan/glsl
--output-path ${VULKAN_GEN_OUTPUT_PATH} --glslc-path=${GLSLC_PATH}
--tmp-dir-path=${CMAKE_BINARY_DIR}/vulkan/spv --env ${VULKAN_GEN_ARG_ENV}
RESULT_VARIABLE error_code)
set(ENV{PYTHONPATH} ${PYTHONPATH})
if(error_code)
message(FATAL_ERROR "Failed to gen spv.h and spv.cpp with precompiled shaders for Vulkan backend")
endif()
if(error_code)
message(
FATAL_ERROR
"Failed to gen spv.h and spv.cpp with precompiled shaders for Vulkan backend"
)
endif()
set(vulkan_generated_cpp ${VULKAN_GEN_OUTPUT_PATH}/spv.cpp)

View File

@ -8,18 +8,18 @@ if(ANDROID)
endif()
# Vulkan from ANDROID_NDK
set(VULKAN_INCLUDE_DIR "${ANDROID_NDK}/sources/third_party/vulkan/src/include")
set(VULKAN_INCLUDE_DIR
"${ANDROID_NDK}/sources/third_party/vulkan/src/include")
message(STATUS "VULKAN_INCLUDE_DIR:${VULKAN_INCLUDE_DIR}")
set(VULKAN_ANDROID_NDK_WRAPPER_DIR "${ANDROID_NDK}/sources/third_party/vulkan/src/common")
message(STATUS "Vulkan_ANDROID_NDK_WRAPPER_DIR:${VULKAN_ANDROID_NDK_WRAPPER_DIR}")
set(VULKAN_ANDROID_NDK_WRAPPER_DIR
"${ANDROID_NDK}/sources/third_party/vulkan/src/common")
message(
STATUS "Vulkan_ANDROID_NDK_WRAPPER_DIR:${VULKAN_ANDROID_NDK_WRAPPER_DIR}")
set(VULKAN_WRAPPER_DIR "${VULKAN_ANDROID_NDK_WRAPPER_DIR}")
add_library(
VulkanWrapper
STATIC
${VULKAN_WRAPPER_DIR}/vulkan_wrapper.h
${VULKAN_WRAPPER_DIR}/vulkan_wrapper.cpp)
add_library(VulkanWrapper STATIC ${VULKAN_WRAPPER_DIR}/vulkan_wrapper.h
${VULKAN_WRAPPER_DIR}/vulkan_wrapper.cpp)
target_include_directories(VulkanWrapper PUBLIC .)
target_include_directories(VulkanWrapper PUBLIC "${VULKAN_INCLUDE_DIR}")
@ -33,7 +33,10 @@ else()
find_package(Vulkan)
if(NOT Vulkan_FOUND)
message(FATAL_ERROR "USE_VULKAN requires either Vulkan installed on system path or environment var VULKAN_SDK set.")
message(
FATAL_ERROR
"USE_VULKAN requires either Vulkan installed on system path or environment var VULKAN_SDK set."
)
endif()
list(APPEND Vulkan_INCLUDES ${Vulkan_INCLUDE_DIRS})

View File

@ -1,11 +1,14 @@
if(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
message(FATAL_ERROR "Cannot find install manifest: @CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
message(
FATAL_ERROR
"Cannot find install manifest: @CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt"
)
endif(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
if(NOT DEFINED CMAKE_INSTALL_PREFIX)
set (CMAKE_INSTALL_PREFIX "@CMAKE_INSTALL_PREFIX@")
set(CMAKE_INSTALL_PREFIX "@CMAKE_INSTALL_PREFIX@")
endif()
message(${CMAKE_INSTALL_PREFIX})
message(${CMAKE_INSTALL_PREFIX})
file(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files)
string(REGEX REPLACE "\n" ";" files "${files}")
@ -13,10 +16,10 @@ foreach(file ${files})
message(STATUS "Uninstalling $ENV{DESTDIR}${file}")
if(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
exec_program(
"@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\""
"@CMAKE_COMMAND@" ARGS
"-E remove \"$ENV{DESTDIR}${file}\""
OUTPUT_VARIABLE rm_out
RETURN_VALUE rm_retval
)
RETURN_VALUE rm_retval)
if(NOT "${rm_retval}" STREQUAL 0)
message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}")
endif(NOT "${rm_retval}" STREQUAL 0)

View File

@ -1,32 +1,36 @@
# This file is based off of the Platform/Darwin.cmake and Platform/UnixPaths.cmake
# files which are included with CMake 2.8.4
# It has been altered for iOS development
# This file is based off of the Platform/Darwin.cmake and
# Platform/UnixPaths.cmake files which are included with CMake 2.8.4 It has been
# altered for iOS development
# Options:
#
# IOS_PLATFORM = OS (default) or SIMULATOR
# This decides if SDKS will be selected from the iPhoneOS.platform or iPhoneSimulator.platform folders
# OS - the default, used to build for iPhone and iPad physical devices, which have an arm arch.
# SIMULATOR - used to build for the Simulator platforms, which now uses arm64 arch.
# IOS_PLATFORM = OS (default) or SIMULATOR This decides if SDKS will be selected
# from the iPhoneOS.platform or iPhoneSimulator.platform folders OS - the
# default, used to build for iPhone and iPad physical devices, which have an arm
# arch. SIMULATOR - used to build for the Simulator platforms, which now uses
# arm64 arch.
#
# CMAKE_IOS_DEVELOPER_ROOT = automatic(default) or /path/to/platform/Developer folder
# By default this location is automatcially chosen based on the IOS_PLATFORM value above.
# If set manually, it will override the default location and force the user of a particular Developer Platform
# CMAKE_IOS_DEVELOPER_ROOT = automatic(default) or /path/to/platform/Developer
# folder By default this location is automatcially chosen based on the
# IOS_PLATFORM value above. If set manually, it will override the default
# location and force the user of a particular Developer Platform
#
# CMAKE_IOS_SDK_ROOT = automatic(default) or /path/to/platform/Developer/SDKs/SDK folder
# By default this location is automatcially chosen based on the CMAKE_IOS_DEVELOPER_ROOT value.
# In this case it will always be the most up-to-date SDK found in the CMAKE_IOS_DEVELOPER_ROOT path.
# If set manually, this will force the use of a specific SDK version
# CMAKE_IOS_SDK_ROOT = automatic(default) or
# /path/to/platform/Developer/SDKs/SDK folder By default this location is
# automatcially chosen based on the CMAKE_IOS_DEVELOPER_ROOT value. In this case
# it will always be the most up-to-date SDK found in the
# CMAKE_IOS_DEVELOPER_ROOT path. If set manually, this will force the use of a
# specific SDK version
# Macros:
#
# set_xcode_property (TARGET XCODE_PROPERTY XCODE_VALUE)
# A convenience macro for setting xcode specific properties on targets
# example: set_xcode_property (myioslib IPHONEOS_DEPLOYMENT_TARGET "3.1")
# set_xcode_property (TARGET XCODE_PROPERTY XCODE_VALUE) A convenience macro for
# setting xcode specific properties on targets example: set_xcode_property
# (myioslib IPHONEOS_DEPLOYMENT_TARGET "3.1")
#
# find_host_package (PROGRAM ARGS)
# A macro used to find executable programs on the host system, not within the iOS environment.
# Thanks to the android-cmake project for providing the command
# find_host_package (PROGRAM ARGS) A macro used to find executable programs on
# the host system, not within the iOS environment. Thanks to the android-cmake
# project for providing the command
# Standard settings
set(CMAKE_SYSTEM_NAME Darwin)
@ -36,51 +40,68 @@ set(APPLE True)
set(IOS True)
# Required as of cmake 2.8.10
set(CMAKE_OSX_DEPLOYMENT_TARGET "" CACHE STRING "Force unset of the deployment target for iOS" FORCE)
set(CMAKE_OSX_DEPLOYMENT_TARGET
""
CACHE STRING "Force unset of the deployment target for iOS" FORCE)
# Determine the cmake host system version so we know where to find the iOS SDKs
find_program(CMAKE_UNAME uname /bin /usr/bin /usr/local/bin)
if(CMAKE_UNAME)
execute_process(COMMAND uname -r OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE)
string(REGEX REPLACE "^([0-9]+)\\.([0-9]+).*$" "\\1" DARWIN_MAJOR_VERSION "${CMAKE_HOST_SYSTEM_VERSION}")
execute_process(
COMMAND uname -r
OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE)
string(REGEX REPLACE "^([0-9]+)\\.([0-9]+).*$" "\\1" DARWIN_MAJOR_VERSION
"${CMAKE_HOST_SYSTEM_VERSION}")
endif(CMAKE_UNAME)
# Force the compilers to gcc for iOS
set(CMAKE_C_COMPILER /usr/bin/gcc CACHE STRING "")
set(CMAKE_CXX_COMPILER /usr/bin/g++ CACHE STRING "")
set(CMAKE_AR ar CACHE FILEPATH "" FORCE)
set(CMAKE_RANLIB ranlib CACHE FILEPATH "" FORCE)
set(PKG_CONFIG_EXECUTABLE pkg-config CACHE FILEPATH "" FORCE)
set(CMAKE_C_COMPILER
/usr/bin/gcc
CACHE STRING "")
set(CMAKE_CXX_COMPILER
/usr/bin/g++
CACHE STRING "")
set(CMAKE_AR
ar
CACHE FILEPATH "" FORCE)
set(CMAKE_RANLIB
ranlib
CACHE FILEPATH "" FORCE)
set(PKG_CONFIG_EXECUTABLE
pkg-config
CACHE FILEPATH "" FORCE)
# Setup iOS platform unless specified manually with IOS_PLATFORM
if(NOT IOS_PLATFORM)
set(IOS_PLATFORM "OS")
set(IOS_PLATFORM "OS")
endif(NOT IOS_PLATFORM)
set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform")
set(IOS_PLATFORM
${IOS_PLATFORM}
CACHE STRING "Type of iOS Platform")
# Check the platform selection and setup for developer root
if(${IOS_PLATFORM} STREQUAL "OS")
set(IOS_PLATFORM_LOCATION "iPhoneOS.platform")
set(XCODE_IOS_PLATFORM iphoneos)
set(IOS_PLATFORM_LOCATION "iPhoneOS.platform")
set(XCODE_IOS_PLATFORM iphoneos)
# This causes the installers to properly locate the output libraries
set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphoneos")
# This causes the installers to properly locate the output libraries
set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphoneos")
elseif(${IOS_PLATFORM} STREQUAL "SIMULATOR")
set(IOS_PLATFORM_LOCATION "iPhoneSimulator.platform")
set(XCODE_IOS_PLATFORM iphonesimulator)
set(IOS_PLATFORM_LOCATION "iPhoneSimulator.platform")
set(XCODE_IOS_PLATFORM iphonesimulator)
# This causes the installers to properly locate the output libraries
set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphonesimulator")
# This causes the installers to properly locate the output libraries
set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphonesimulator")
elseif(${IOS_PLATFORM} STREQUAL "WATCHOS")
set(IOS_PLATFORM_LOCATION "WatchOS.platform")
set(XCODE_IOS_PLATFORM watchos)
set(IOS_PLATFORM_LOCATION "WatchOS.platform")
set(XCODE_IOS_PLATFORM watchos)
# This causes the installers to properly locate the output libraries
set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-watchos")
# This causes the installers to properly locate the output libraries
set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-watchos")
else(${IOS_PLATFORM} STREQUAL "OS")
message(FATAL_ERROR
"Unsupported IOS_PLATFORM value selected. "
"Please choose OS, SIMULATOR, or WATCHOS.")
message(FATAL_ERROR "Unsupported IOS_PLATFORM value selected. "
"Please choose OS, SIMULATOR, or WATCHOS.")
endif()
# All iOS/Darwin specific settings - some may be redundant
@ -93,87 +114,115 @@ set(CMAKE_DL_LIBS "")
set(CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG "-compatibility_version ")
set(CMAKE_C_OSX_CURRENT_VERSION_FLAG "-current_version ")
set(CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG "${CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG}")
set(CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG
"${CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG}")
set(CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}")
if(IOS_DEPLOYMENT_TARGET)
set(XCODE_IOS_PLATFORM_VERSION_FLAGS "-m${XCODE_IOS_PLATFORM}-version-min=${IOS_DEPLOYMENT_TARGET}")
set(XCODE_IOS_PLATFORM_VERSION_FLAGS
"-m${XCODE_IOS_PLATFORM}-version-min=${IOS_DEPLOYMENT_TARGET}")
endif()
# Hidden visibility is required for cxx on iOS
set(CMAKE_C_FLAGS_INIT "${XCODE_IOS_PLATFORM_VERSION_FLAGS}")
set(CMAKE_CXX_FLAGS_INIT "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -fvisibility-inlines-hidden")
set(CMAKE_CXX_FLAGS_INIT
"${XCODE_IOS_PLATFORM_VERSION_FLAGS} -fvisibility-inlines-hidden")
set(CMAKE_C_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_C_LINK_FLAGS}")
set(CMAKE_CXX_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_CXX_LINK_FLAGS}")
set(CMAKE_C_LINK_FLAGS
"${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_C_LINK_FLAGS}"
)
set(CMAKE_CXX_LINK_FLAGS
"${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_CXX_LINK_FLAGS}"
)
set(CMAKE_PLATFORM_HAS_INSTALLNAME 1)
set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib -headerpad_max_install_names")
set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS
"-dynamiclib -headerpad_max_install_names")
set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle -headerpad_max_install_names")
set(CMAKE_SHARED_MODULE_LOADER_C_FLAG "-Wl,-bundle_loader,")
set(CMAKE_SHARED_MODULE_LOADER_CXX_FLAG "-Wl,-bundle_loader,")
set(CMAKE_FIND_LIBRARY_SUFFIXES ".dylib" ".so" ".a")
# hack: if a new cmake (which uses CMAKE_INSTALL_NAME_TOOL) runs on an old build tree
# (where install_name_tool was hardcoded) and where CMAKE_INSTALL_NAME_TOOL isn't in the cache
# and still cmake didn't fail in CMakeFindBinUtils.cmake (because it isn't rerun)
# hardcode CMAKE_INSTALL_NAME_TOOL here to install_name_tool, so it behaves as it did before, Alex
# hack: if a new cmake (which uses CMAKE_INSTALL_NAME_TOOL) runs on an old build
# tree (where install_name_tool was hardcoded) and where CMAKE_INSTALL_NAME_TOOL
# isn't in the cache and still cmake didn't fail in CMakeFindBinUtils.cmake
# (because it isn't rerun) hardcode CMAKE_INSTALL_NAME_TOOL here to
# install_name_tool, so it behaves as it did before, Alex
if(NOT CMAKE_INSTALL_NAME_TOOL)
find_program(CMAKE_INSTALL_NAME_TOOL install_name_tool)
find_program(CMAKE_INSTALL_NAME_TOOL install_name_tool)
endif(NOT CMAKE_INSTALL_NAME_TOOL)
# Setup iOS deployment target
set(IOS_DEPLOYMENT_TARGET ${IOS_DEPLOYMENT_TARGET} CACHE STRING "Minimum iOS version")
set(IOS_DEPLOYMENT_TARGET
${IOS_DEPLOYMENT_TARGET}
CACHE STRING "Minimum iOS version")
# Setup iOS developer location unless specified manually with CMAKE_IOS_DEVELOPER_ROOT
# Note Xcode 4.3 changed the installation location, choose the most recent one available
# Setup iOS developer location unless specified manually with
# CMAKE_IOS_DEVELOPER_ROOT Note Xcode 4.3 changed the installation location,
# choose the most recent one available
execute_process(
COMMAND /usr/bin/xcode-select -print-path
OUTPUT_VARIABLE CMAKE_XCODE_DEVELOPER_DIR
OUTPUT_STRIP_TRAILING_WHITESPACE
)
set(XCODE_POST_43_ROOT "${CMAKE_XCODE_DEVELOPER_DIR}/Platforms/${IOS_PLATFORM_LOCATION}/Developer")
COMMAND /usr/bin/xcode-select -print-path
OUTPUT_VARIABLE CMAKE_XCODE_DEVELOPER_DIR
OUTPUT_STRIP_TRAILING_WHITESPACE)
set(XCODE_POST_43_ROOT
"${CMAKE_XCODE_DEVELOPER_DIR}/Platforms/${IOS_PLATFORM_LOCATION}/Developer")
set(XCODE_PRE_43_ROOT "/Developer/Platforms/${IOS_PLATFORM_LOCATION}/Developer")
if(NOT CMAKE_IOS_DEVELOPER_ROOT)
if(EXISTS ${XCODE_POST_43_ROOT})
set(CMAKE_IOS_DEVELOPER_ROOT ${XCODE_POST_43_ROOT})
elseif(EXISTS ${XCODE_PRE_43_ROOT})
set(CMAKE_IOS_DEVELOPER_ROOT ${XCODE_PRE_43_ROOT})
endif(EXISTS ${XCODE_POST_43_ROOT})
if(EXISTS ${XCODE_POST_43_ROOT})
set(CMAKE_IOS_DEVELOPER_ROOT ${XCODE_POST_43_ROOT})
elseif(EXISTS ${XCODE_PRE_43_ROOT})
set(CMAKE_IOS_DEVELOPER_ROOT ${XCODE_PRE_43_ROOT})
endif(EXISTS ${XCODE_POST_43_ROOT})
endif(NOT CMAKE_IOS_DEVELOPER_ROOT)
set(CMAKE_IOS_DEVELOPER_ROOT ${CMAKE_IOS_DEVELOPER_ROOT} CACHE PATH "Location of iOS Platform")
set(CMAKE_IOS_DEVELOPER_ROOT
${CMAKE_IOS_DEVELOPER_ROOT}
CACHE PATH "Location of iOS Platform")
# Find and use the most recent iOS sdk unless specified manually with CMAKE_IOS_SDK_ROOT
# Find and use the most recent iOS sdk unless specified manually with
# CMAKE_IOS_SDK_ROOT
if(NOT CMAKE_IOS_SDK_ROOT)
file(GLOB _CMAKE_IOS_SDKS "${CMAKE_IOS_DEVELOPER_ROOT}/SDKs/*")
if(_CMAKE_IOS_SDKS)
list(SORT _CMAKE_IOS_SDKS)
list(REVERSE _CMAKE_IOS_SDKS)
list(GET _CMAKE_IOS_SDKS 0 CMAKE_IOS_SDK_ROOT)
else(_CMAKE_IOS_SDKS)
message(FATAL_ERROR "No iOS SDK's found in default search path ${CMAKE_IOS_DEVELOPER_ROOT}. Manually set CMAKE_IOS_SDK_ROOT or install the iOS SDK.")
endif(_CMAKE_IOS_SDKS)
message(STATUS "Toolchain using default iOS SDK: ${CMAKE_IOS_SDK_ROOT}")
file(GLOB _CMAKE_IOS_SDKS "${CMAKE_IOS_DEVELOPER_ROOT}/SDKs/*")
if(_CMAKE_IOS_SDKS)
list(SORT _CMAKE_IOS_SDKS)
list(REVERSE _CMAKE_IOS_SDKS)
list(GET _CMAKE_IOS_SDKS 0 CMAKE_IOS_SDK_ROOT)
else(_CMAKE_IOS_SDKS)
message(
FATAL_ERROR
"No iOS SDK's found in default search path ${CMAKE_IOS_DEVELOPER_ROOT}. Manually set CMAKE_IOS_SDK_ROOT or install the iOS SDK."
)
endif(_CMAKE_IOS_SDKS)
message(STATUS "Toolchain using default iOS SDK: ${CMAKE_IOS_SDK_ROOT}")
endif(NOT CMAKE_IOS_SDK_ROOT)
set(CMAKE_IOS_SDK_ROOT ${CMAKE_IOS_SDK_ROOT} CACHE PATH "Location of the selected iOS SDK")
set(CMAKE_IOS_SDK_ROOT
${CMAKE_IOS_SDK_ROOT}
CACHE PATH "Location of the selected iOS SDK")
# Set the sysroot default to the most recent SDK
set(CMAKE_OSX_SYSROOT ${CMAKE_IOS_SDK_ROOT} CACHE PATH "Sysroot used for iOS support")
set(CMAKE_OSX_SYSROOT
${CMAKE_IOS_SDK_ROOT}
CACHE PATH "Sysroot used for iOS support")
# set the architecture for iOS
if(IOS_PLATFORM STREQUAL "OS")
set(DEFAULT_IOS_ARCH "arm64")
set(DEFAULT_IOS_ARCH "arm64")
elseif(IOS_PLATFORM STREQUAL "SIMULATOR")
set(DEFAULT_IOS_ARCH "arm64")
set(DEFAULT_IOS_ARCH "arm64")
elseif(IOS_PLATFORM STREQUAL "WATCHOS")
set(DEFAULT_IOS_ARCH "armv7k;arm64_32")
set(DEFAULT_IOS_ARCH "armv7k;arm64_32")
endif()
set(IOS_ARCH ${DEFAULT_IOS_ARCH} CACHE STRING "Build architecture for iOS")
set(CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE STRING "Build architecture for iOS")
set(IOS_ARCH
${DEFAULT_IOS_ARCH}
CACHE STRING "Build architecture for iOS")
set(CMAKE_OSX_ARCHITECTURES
${IOS_ARCH}
CACHE STRING "Build architecture for iOS")
# Set the find root to the iOS developer roots and to user defined paths
set(CMAKE_FIND_ROOT_PATH ${CMAKE_IOS_DEVELOPER_ROOT} ${CMAKE_IOS_SDK_ROOT} ${CMAKE_PREFIX_PATH} CACHE STRING "iOS find search path root")
set(CMAKE_FIND_ROOT_PATH
${CMAKE_IOS_DEVELOPER_ROOT} ${CMAKE_IOS_SDK_ROOT} ${CMAKE_PREFIX_PATH}
CACHE STRING "iOS find search path root")
# default to searching for frameworks first
set(CMAKE_FIND_FRAMEWORK FIRST)
@ -182,8 +231,7 @@ set(CMAKE_FIND_FRAMEWORK FIRST)
set(CMAKE_SYSTEM_FRAMEWORK_PATH
${CMAKE_IOS_SDK_ROOT}/System/Library/Frameworks
${CMAKE_IOS_SDK_ROOT}/System/Library/PrivateFrameworks
${CMAKE_IOS_SDK_ROOT}/Developer/Library/Frameworks
)
${CMAKE_IOS_SDK_ROOT}/Developer/Library/Frameworks)
# only search the iOS sdks, not the remainder of the host filesystem
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY)
@ -192,20 +240,21 @@ set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
# This little macro lets you set any XCode specific property
macro(set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE)
set_property(TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY} ${XCODE_VALUE})
set_property(TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY}
${XCODE_VALUE})
endmacro(set_xcode_property)
# This macro lets you find executable programs on the host system
macro(find_host_package)
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER)
set(IOS FALSE)
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER)
set(IOS FALSE)
find_package(${ARGN})
find_package(${ARGN})
set(IOS TRUE)
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
set(IOS TRUE)
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
endmacro(find_host_package)

View File

@ -1,34 +1,47 @@
# Build with Compute Library backend for the Arm architecture
# Note: Compute Library is available from: https://github.com/ARM-software/ComputeLibrary
# and must be built separately. The location of the Compute Library build
# must be set with the env var ACL_ROOT_DIR. This path will be checked later
# as part of FindACL.cmake in oneDNN.
# Build with Compute Library backend for the Arm architecture Note: Compute
# Library is available from: https://github.com/ARM-software/ComputeLibrary and
# must be built separately. The location of the Compute Library build must be
# set with the env var ACL_ROOT_DIR. This path will be checked later as part of
# FindACL.cmake in oneDNN.
if(NOT USE_MKLDNN_ACL)
RETURN()
return()
endif()
set(DNNL_AARCH64_USE_ACL ON CACHE BOOL "" FORCE)
set(DNNL_AARCH64_USE_ACL
ON
CACHE BOOL "" FORCE)
# Check the Compute Library version number.
# Note: oneDNN / MKL-DNN v2.2 onwards will check the Compute Library version
# the version check here can be removed once PyTorch transitions to v2.2.
# Check the Compute Library version number. Note: oneDNN / MKL-DNN v2.2 onwards
# will check the Compute Library version the version check here can be removed
# once PyTorch transitions to v2.2.
set(ACL_MINIMUM_VERSION "21.02")
file(GLOB_RECURSE ACL_VERSION_FILE $ENV{ACL_ROOT_DIR}/*/arm_compute_version.embed)
file(GLOB_RECURSE ACL_VERSION_FILE
$ENV{ACL_ROOT_DIR}/*/arm_compute_version.embed)
if("${ACL_VERSION_FILE}" STREQUAL "")
message(WARNING "Build may fail: Could not determine ACL version (minimum required is ${ACL_MINIMUM_VERSION})")
message(
WARNING
"Build may fail: Could not determine ACL version (minimum required is ${ACL_MINIMUM_VERSION})"
)
else()
file(READ ${ACL_VERSION_FILE} ACL_VERSION_STRING)
string(REGEX MATCH "v([0-9]+\\.[0-9]+)" ACL_VERSION "${ACL_VERSION_STRING}")
set(ACL_VERSION "${CMAKE_MATCH_1}")
if("${ACL_VERSION}" VERSION_EQUAL "0.0")
# Unreleased ACL versions come with version string "v0.0-unreleased", and may not be compatible with oneDNN.
# It is recommended to use the latest release of ACL.
message(WARNING "Build may fail: Using unreleased ACL version (minimum required is ${ACL_MINIMUM_VERSION})")
# Unreleased ACL versions come with version string "v0.0-unreleased", and
# may not be compatible with oneDNN. It is recommended to use the latest
# release of ACL.
message(
WARNING
"Build may fail: Using unreleased ACL version (minimum required is ${ACL_MINIMUM_VERSION})"
)
elseif(${ACL_VERSION} VERSION_LESS ${ACL_MINIMUM_VERSION})
message(FATAL_ERROR "Detected ACL version ${ACL_VERSION}, but minimum required is ${ACL_MINIMUM_VERSION}")
message(
FATAL_ERROR
"Detected ACL version ${ACL_VERSION}, but minimum required is ${ACL_MINIMUM_VERSION}"
)
endif()
endif()

View File

@ -1,16 +1,16 @@
set(PYTORCH_FOUND_HIP FALSE)
# If ROCM_PATH is set, assume intention is to compile with
# ROCm support and error out if the ROCM_PATH does not exist.
# Else ROCM_PATH does not exist, assume a default of /opt/rocm
# In the latter case, if /opt/rocm does not exist emit status
# message and return.
# If ROCM_PATH is set, assume intention is to compile with ROCm support and
# error out if the ROCM_PATH does not exist. Else ROCM_PATH does not exist,
# assume a default of /opt/rocm In the latter case, if /opt/rocm does not exist
# emit status message and return.
if(DEFINED ENV{ROCM_PATH})
set(ROCM_PATH $ENV{ROCM_PATH})
if(NOT EXISTS ${ROCM_PATH})
message(FATAL_ERROR
"ROCM_PATH environment variable is set to ${ROCM_PATH} but does not exist.\n"
"Set a valid ROCM_PATH or unset ROCM_PATH environment variable to fix.")
message(
FATAL_ERROR
"ROCM_PATH environment variable is set to ${ROCM_PATH} but does not exist.\n"
"Set a valid ROCM_PATH or unset ROCM_PATH environment variable to fix.")
endif()
else()
if(UNIX)
@ -19,7 +19,8 @@ else()
set(ROCM_PATH C:/opt/rocm)
endif()
if(NOT EXISTS ${ROCM_PATH})
message(STATUS
message(
STATUS
"ROCM_PATH environment variable is not set and ${ROCM_PATH} does not exist.\n"
"Building without ROCm support.")
return()
@ -46,12 +47,15 @@ endif()
torch_hip_get_arch_list(PYTORCH_ROCM_ARCH)
if(PYTORCH_ROCM_ARCH STREQUAL "")
message(FATAL_ERROR "No GPU arch specified for ROCm build. Please use PYTORCH_ROCM_ARCH environment variable to specify GPU archs to build for.")
message(
FATAL_ERROR
"No GPU arch specified for ROCm build. Please use PYTORCH_ROCM_ARCH environment variable to specify GPU archs to build for."
)
endif()
message("Building PyTorch for GPU arch: ${PYTORCH_ROCM_ARCH}")
# Add HIP to the CMAKE Module Path
# needed because the find_package call to this module uses the Module mode search
# Add HIP to the CMAKE Module Path needed because the find_package call to this
# module uses the Module mode search
# https://cmake.org/cmake/help/latest/command/find_package.html#search-modes
if(UNIX)
set(CMAKE_MODULE_PATH ${ROCM_PATH}/lib/cmake/hip ${CMAKE_MODULE_PATH})
@ -59,8 +63,8 @@ else() # Win32
set(CMAKE_MODULE_PATH ${ROCM_PATH}/cmake/ ${CMAKE_MODULE_PATH})
endif()
# Add ROCM_PATH to CMAKE_PREFIX_PATH, needed because the find_package
# call to individual ROCM components uses the Config mode search
# Add ROCM_PATH to CMAKE_PREFIX_PATH, needed because the find_package call to
# individual ROCM components uses the Config mode search
list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH})
macro(find_package_and_print_version PACKAGE_NAME)
@ -75,28 +79,28 @@ macro(find_package_and_print_version PACKAGE_NAME)
endif()
endmacro()
# Find the HIP Package
# MODULE argument is added for clarity that CMake is searching
# for FindHIP.cmake in Module mode
# Find the HIP Package MODULE argument is added for clarity that CMake is
# searching for FindHIP.cmake in Module mode
find_package_and_print_version(HIP 1.0 MODULE)
if(HIP_FOUND)
set(PYTORCH_FOUND_HIP TRUE)
find_package_and_print_version(hip REQUIRED CONFIG)
# The rocm-core package was only introduced in ROCm 6.4, so we make it optional.
# The rocm-core package was only introduced in ROCm 6.4, so we make it
# optional.
find_package(rocm-core CONFIG)
# Some old consumer HIP SDKs do not distribute rocm_version.h, so we allow
# falling back to the hip version, which everyone should have.
# rocm_version.h lives in the rocm-core package and hip_version.h lives in the
# hip (lower-case) package. Both are probed above and will be in
# ROCM_INCLUDE_DIRS if available.
find_file(ROCM_VERSION_HEADER_PATH
# falling back to the hip version, which everyone should have. rocm_version.h
# lives in the rocm-core package and hip_version.h lives in the hip
# (lower-case) package. Both are probed above and will be in ROCM_INCLUDE_DIRS
# if available.
find_file(
ROCM_VERSION_HEADER_PATH
NAMES rocm-core/rocm_version.h hip/hip_version.h
NO_DEFAULT_PATH
PATHS ${ROCM_INCLUDE_DIRS}
)
PATHS ${ROCM_INCLUDE_DIRS})
if(ROCM_VERSION_HEADER_PATH MATCHES "rocm-core/rocm_version.h$")
set(ROCM_LIB_NAME "ROCM")
else()
@ -104,14 +108,20 @@ if(HIP_FOUND)
endif()
if(NOT ROCM_VERSION_HEADER_PATH)
message(FATAL_ERROR "Could not find hip/hip_version.h or rocm-core/rocm_version.h in ${ROCM_INCLUDE_DIRS}")
message(
FATAL_ERROR
"Could not find hip/hip_version.h or rocm-core/rocm_version.h in ${ROCM_INCLUDE_DIRS}"
)
endif()
get_filename_component(ROCM_HEADER_NAME ${ROCM_VERSION_HEADER_PATH} NAME)
if(EXISTS ${ROCM_VERSION_HEADER_PATH})
set(ROCM_HEADER_FILE ${ROCM_VERSION_HEADER_PATH})
else()
message(FATAL_ERROR "********************* ${ROCM_HEADER_NAME} could not be found ******************\n")
message(
FATAL_ERROR
"********************* ${ROCM_HEADER_NAME} could not be found ******************\n"
)
endif()
# Read the ROCM headerfile into a variable
@ -119,28 +129,34 @@ if(HIP_FOUND)
message(STATUS "Content: ${ROCM_HEADER_CONTENT}")
file(READ "${ROCM_HEADER_FILE}" ROCM_HEADER_CONTENT)
# Below we use a RegEx to find ROCM version numbers.
# Note that CMake does not support \s for blank space. That is
# why in the regular expressions below we have a blank space in
# the square brackets.
# There are three steps:
# 1. Match regular expression
# 2. Strip the non-numerical part of the string
# 3. Strip leading and trailing spaces
# Below we use a RegEx to find ROCM version numbers. Note that CMake does not
# support \s for blank space. That is why in the regular expressions below we
# have a blank space in the square brackets. There are three steps: 1. Match
# regular expression 2. Strip the non-numerical part of the string 3. Strip
# leading and trailing spaces
string(REGEX MATCH "${ROCM_LIB_NAME}_VERSION_MAJOR[ ]+[0-9]+" TEMP1 ${ROCM_HEADER_CONTENT})
string(REGEX MATCH "${ROCM_LIB_NAME}_VERSION_MAJOR[ ]+[0-9]+" TEMP1
${ROCM_HEADER_CONTENT})
string(REPLACE "${ROCM_LIB_NAME}_VERSION_MAJOR" "" TEMP2 ${TEMP1})
string(STRIP ${TEMP2} ROCM_VERSION_DEV_MAJOR)
string(REGEX MATCH "${ROCM_LIB_NAME}_VERSION_MINOR[ ]+[0-9]+" TEMP1 ${ROCM_HEADER_CONTENT})
string(REGEX MATCH "${ROCM_LIB_NAME}_VERSION_MINOR[ ]+[0-9]+" TEMP1
${ROCM_HEADER_CONTENT})
string(REPLACE "${ROCM_LIB_NAME}_VERSION_MINOR" "" TEMP2 ${TEMP1})
string(STRIP ${TEMP2} ROCM_VERSION_DEV_MINOR)
string(REGEX MATCH "${ROCM_LIB_NAME}_VERSION_PATCH[ ]+[0-9]+" TEMP1 ${ROCM_HEADER_CONTENT})
string(REGEX MATCH "${ROCM_LIB_NAME}_VERSION_PATCH[ ]+[0-9]+" TEMP1
${ROCM_HEADER_CONTENT})
string(REPLACE "${ROCM_LIB_NAME}_VERSION_PATCH" "" TEMP2 ${TEMP1})
string(STRIP ${TEMP2} ROCM_VERSION_DEV_PATCH)
# Create ROCM_VERSION_DEV_INT which is later used as a preprocessor macros
set(ROCM_VERSION_DEV "${ROCM_VERSION_DEV_MAJOR}.${ROCM_VERSION_DEV_MINOR}.${ROCM_VERSION_DEV_PATCH}")
math(EXPR ROCM_VERSION_DEV_INT "(${ROCM_VERSION_DEV_MAJOR}*10000) + (${ROCM_VERSION_DEV_MINOR}*100) + ${ROCM_VERSION_DEV_PATCH}")
set(ROCM_VERSION_DEV
"${ROCM_VERSION_DEV_MAJOR}.${ROCM_VERSION_DEV_MINOR}.${ROCM_VERSION_DEV_PATCH}"
)
math(
EXPR
ROCM_VERSION_DEV_INT
"(${ROCM_VERSION_DEV_MAJOR}*10000) + (${ROCM_VERSION_DEV_MINOR}*100) + ${ROCM_VERSION_DEV_PATCH}"
)
message("\n***** ROCm version from ${ROCM_HEADER_NAME} ****\n")
message("ROCM_VERSION_DEV: ${ROCM_VERSION_DEV}")
@ -149,13 +165,14 @@ if(HIP_FOUND)
message("ROCM_VERSION_DEV_PATCH: ${ROCM_VERSION_DEV_PATCH}")
message("ROCM_VERSION_DEV_INT: ${ROCM_VERSION_DEV_INT}")
math(EXPR TORCH_HIP_VERSION "(${HIP_VERSION_MAJOR} * 100) + ${HIP_VERSION_MINOR}")
math(EXPR TORCH_HIP_VERSION
"(${HIP_VERSION_MAJOR} * 100) + ${HIP_VERSION_MINOR}")
message("HIP_VERSION_MAJOR: ${HIP_VERSION_MAJOR}")
message("HIP_VERSION_MINOR: ${HIP_VERSION_MINOR}")
message("TORCH_HIP_VERSION: ${TORCH_HIP_VERSION}")
# Find ROCM components using Config mode
# These components will be searced for recursively in ${ROCM_PATH}
# Find ROCM components using Config mode These components will be searced for
# recursively in ${ROCM_PATH}
message("\n***** Library versions from cmake find_package *****\n")
find_package_and_print_version(amd_comgr REQUIRED)
find_package_and_print_version(rocrand REQUIRED)
@ -187,7 +204,7 @@ if(HIP_FOUND)
endif()
# Optional components.
find_package_and_print_version(hipsparselt) # Will be required when ready.
find_package_and_print_version(hipsparselt) # Will be required when ready.
list(REMOVE_DUPLICATES ROCM_INCLUDE_DIRS)
@ -198,32 +215,40 @@ if(HIP_FOUND)
set(PROJECT_RANDOM_BINARY_DIR "${PROJECT_BINARY_DIR}")
if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0")
# check whether hipblaslt provides HIPBLASLT_MATMUL_MATRIX_SCALE_OUTER_VEC_32F
# check whether hipblaslt provides
# HIPBLASLT_MATMUL_MATRIX_SCALE_OUTER_VEC_32F
set(file "${PROJECT_BINARY_DIR}/hipblaslt_test_outer_vec.cc")
file(WRITE ${file} ""
file(
WRITE ${file}
""
"#define LEGACY_HIPBLAS_DIRECT\n"
"#include <hipblaslt/hipblaslt.h>\n"
"int main() {\n"
" hipblasLtMatmulMatrixScale_t attr = HIPBLASLT_MATMUL_MATRIX_SCALE_OUTER_VEC_32F;\n"
" return 0;\n"
"}\n"
)
try_compile(hipblaslt_compile_result_outer_vec ${PROJECT_RANDOM_BINARY_DIR} ${file}
"}\n")
try_compile(
hipblaslt_compile_result_outer_vec ${PROJECT_RANDOM_BINARY_DIR}
${file}
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${ROCM_INCLUDE_DIRS}"
COMPILE_DEFINITIONS -D__HIP_PLATFORM_AMD__ -D__HIP_PLATFORM_HCC__
OUTPUT_VARIABLE hipblaslt_compile_output_outer_vec)
# check whether hipblaslt provides HIPBLASLT_MATMUL_DESC_A_SCALE_POINTER_VEC_EXT
# check whether hipblaslt provides
# HIPBLASLT_MATMUL_DESC_A_SCALE_POINTER_VEC_EXT
set(file "${PROJECT_BINARY_DIR}/hipblaslt_test_vec_ext.cc")
file(WRITE ${file} ""
file(
WRITE ${file}
""
"#define LEGACY_HIPBLAS_DIRECT\n"
"#include <hipblaslt/hipblaslt.h>\n"
"int main() {\n"
" hipblasLtMatmulDescAttributes_t attr = HIPBLASLT_MATMUL_DESC_A_SCALE_POINTER_VEC_EXT;\n"
" return 0;\n"
"}\n"
)
try_compile(hipblaslt_compile_result_vec_ext ${PROJECT_RANDOM_BINARY_DIR} ${file}
"}\n")
try_compile(
hipblaslt_compile_result_vec_ext ${PROJECT_RANDOM_BINARY_DIR}
${file}
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${ROCM_INCLUDE_DIRS}"
COMPILE_DEFINITIONS -D__HIP_PLATFORM_AMD__ -D__HIP_PLATFORM_HCC__
OUTPUT_VARIABLE hipblaslt_compile_output_vec_ext)
@ -239,8 +264,12 @@ if(HIP_FOUND)
else()
set(HIPBLASLT_OUTER_VEC OFF)
set(HIPBLASLT_VEC_EXT OFF)
message("hipblaslt is NOT using scale pointer outer vec: ${hipblaslt_compile_output_outer_vec}")
message("hipblaslt is NOT using scale pointer vec ext: ${hipblaslt_compile_output_vec_ext}")
message(
"hipblaslt is NOT using scale pointer outer vec: ${hipblaslt_compile_output_outer_vec}"
)
message(
"hipblaslt is NOT using scale pointer vec ext: ${hipblaslt_compile_output_vec_ext}"
)
endif()
endif()
endif()

View File

@ -6,46 +6,47 @@ if(TARGET torch::cudart)
endif()
# sccache is only supported in CMake master and not in the newest official
# release (3.11.3) yet. Hence we need our own Modules_CUDA_fix to enable sccache.
# release (3.11.3) yet. Hence we need our own Modules_CUDA_fix to enable
# sccache.
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/../Modules_CUDA_fix)
# We don't want to statically link cudart, because we rely on it's dynamic linkage in
# python (follow along torch/cuda/__init__.py and usage of cudaGetErrorName).
# Technically, we can link cudart here statically, and link libtorch_python.so
# to a dynamic libcudart.so, but that's just wasteful.
# However, on Windows, if this one gets switched off, the error "cuda: unknown error"
# will be raised when running the following code:
# >>> import torch
# >>> torch.cuda.is_available()
# >>> torch.cuda.current_device()
# More details can be found in the following links.
# https://github.com/pytorch/pytorch/issues/20635
# We don't want to statically link cudart, because we rely on it's dynamic
# linkage in python (follow along torch/cuda/__init__.py and usage of
# cudaGetErrorName). Technically, we can link cudart here statically, and link
# libtorch_python.so to a dynamic libcudart.so, but that's just wasteful.
# However, on Windows, if this one gets switched off, the error "cuda: unknown
# error" will be raised when running the following code: >>> import torch >>>
# torch.cuda.is_available() >>> torch.cuda.current_device() More details can be
# found in the following links. https://github.com/pytorch/pytorch/issues/20635
# https://github.com/pytorch/pytorch/issues/17108
if(NOT MSVC)
set(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE INTERNAL "")
set(CUDA_USE_STATIC_CUDA_RUNTIME
OFF
CACHE INTERNAL "")
endif()
# Find CUDA.
find_package(CUDA)
if(NOT CUDA_FOUND)
message(WARNING
"PyTorch: CUDA cannot be found. Depending on whether you are building "
"PyTorch or a PyTorch dependent library, the next warning / error will "
"give you more info.")
message(
WARNING
"PyTorch: CUDA cannot be found. Depending on whether you are building "
"PyTorch or a PyTorch dependent library, the next warning / error will "
"give you more info.")
set(CAFFE2_USE_CUDA OFF)
return()
endif()
# Enable CUDA language support
set(CUDAToolkit_ROOT "${CUDA_TOOLKIT_ROOT_DIR}")
# Pass clang as host compiler, which according to the docs
# Must be done before CUDA language is enabled, see
# Pass clang as host compiler, which according to the docs Must be done before
# CUDA language is enabled, see
# https://cmake.org/cmake/help/v3.15/variable/CMAKE_CUDA_HOST_COMPILER.html
if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}")
endif()
enable_language(CUDA)
if("X${CMAKE_CUDA_STANDARD}" STREQUAL "X" )
if("X${CMAKE_CUDA_STANDARD}" STREQUAL "X")
set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD})
endif()
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
@ -61,9 +62,11 @@ find_package(CUDAToolkit REQUIRED)
cmake_policy(POP)
if(NOT CMAKE_CUDA_COMPILER_VERSION VERSION_EQUAL CUDAToolkit_VERSION)
message(FATAL_ERROR "Found two conflicting CUDA versions:\n"
"V${CMAKE_CUDA_COMPILER_VERSION} in '${CUDA_INCLUDE_DIRS}' and\n"
"V${CUDAToolkit_VERSION} in '${CUDAToolkit_INCLUDE_DIRS}'")
message(
FATAL_ERROR
"Found two conflicting CUDA versions:\n"
"V${CMAKE_CUDA_COMPILER_VERSION} in '${CUDA_INCLUDE_DIRS}' and\n"
"V${CUDAToolkit_VERSION} in '${CUDAToolkit_INCLUDE_DIRS}'")
endif()
message(STATUS "PyTorch: CUDA detected: " ${CUDA_VERSION})
@ -74,10 +77,10 @@ if(CUDA_VERSION VERSION_LESS 12.0)
endif()
if(CUDA_FOUND)
# Sometimes, we may mismatch nvcc with the CUDA headers we are
# compiling with, e.g., if a ccache nvcc is fed to us by CUDA_NVCC_EXECUTABLE
# but the PATH is not consistent with CUDA_HOME. It's better safe
# than sorry: make sure everything is consistent.
# Sometimes, we may mismatch nvcc with the CUDA headers we are compiling with,
# e.g., if a ccache nvcc is fed to us by CUDA_NVCC_EXECUTABLE but the PATH is
# not consistent with CUDA_HOME. It's better safe than sorry: make sure
# everything is consistent.
if(MSVC AND CMAKE_GENERATOR MATCHES "Visual Studio")
# When using Visual Studio, it attempts to lock the whole binary dir when
# `try_run` is called, which will cause the build to fail.
@ -87,40 +90,44 @@ if(CUDA_FOUND)
set(PROJECT_RANDOM_BINARY_DIR "${PROJECT_BINARY_DIR}")
endif()
set(file "${PROJECT_BINARY_DIR}/detect_cuda_version.cc")
file(WRITE ${file} ""
file(
WRITE ${file}
""
"#include <cuda.h>\n"
"#include <cstdio>\n"
"int main() {\n"
" printf(\"%d.%d\", CUDA_VERSION / 1000, (CUDA_VERSION / 10) % 100);\n"
" return 0;\n"
"}\n"
)
"}\n")
if(NOT CMAKE_CROSSCOMPILING)
try_run(run_result compile_result ${PROJECT_RANDOM_BINARY_DIR} ${file}
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CUDA_INCLUDE_DIRS}"
LINK_LIBRARIES ${CUDA_LIBRARIES}
try_run(
run_result compile_result ${PROJECT_RANDOM_BINARY_DIR} ${file}
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CUDA_INCLUDE_DIRS}" LINK_LIBRARIES
${CUDA_LIBRARIES}
RUN_OUTPUT_VARIABLE cuda_version_from_header
COMPILE_OUTPUT_VARIABLE output_var
)
COMPILE_OUTPUT_VARIABLE output_var)
if(NOT compile_result)
message(FATAL_ERROR "PyTorch: Couldn't determine version from header: " ${output_var})
message(FATAL_ERROR "PyTorch: Couldn't determine version from header: "
${output_var})
endif()
message(STATUS "PyTorch: Header version is: " ${cuda_version_from_header})
if(NOT cuda_version_from_header STREQUAL ${CUDA_VERSION_STRING})
# Force CUDA to be processed for again next time
# TODO: I'm not sure if this counts as an implementation detail of
# FindCUDA
# Force CUDA to be processed for again next time TODO: I'm not sure if
# this counts as an implementation detail of FindCUDA
set(cuda_version_from_findcuda ${CUDA_VERSION_STRING})
unset(CUDA_TOOLKIT_ROOT_DIR_INTERNAL CACHE)
# Not strictly necessary, but for good luck.
unset(CUDA_VERSION CACHE)
# Error out
message(FATAL_ERROR "FindCUDA says CUDA version is ${cuda_version_from_findcuda} (usually determined by nvcc), "
"but the CUDA headers say the version is ${cuda_version_from_header}. This often occurs "
"when you set both CUDA_HOME and CUDA_NVCC_EXECUTABLE to "
"non-standard locations, without also setting PATH to point to the correct nvcc. "
"Perhaps, try re-running this command again with PATH=${CUDA_TOOLKIT_ROOT_DIR}/bin:$PATH. "
"See above log messages for more diagnostics, and see https://github.com/pytorch/pytorch/issues/8092 for more details.")
message(
FATAL_ERROR
"FindCUDA says CUDA version is ${cuda_version_from_findcuda} (usually determined by nvcc), "
"but the CUDA headers say the version is ${cuda_version_from_header}. This often occurs "
"when you set both CUDA_HOME and CUDA_NVCC_EXECUTABLE to "
"non-standard locations, without also setting PATH to point to the correct nvcc. "
"Perhaps, try re-running this command again with PATH=${CUDA_TOOLKIT_ROOT_DIR}/bin:$PATH. "
"See above log messages for more diagnostics, and see https://github.com/pytorch/pytorch/issues/8092 for more details."
)
endif()
endif()
endif()
@ -128,12 +135,15 @@ endif()
# ---[ CUDA libraries wrapper
# find lbnvrtc.so
set(CUDA_NVRTC_LIB "${CUDA_nvrtc_LIBRARY}" CACHE FILEPATH "")
set(CUDA_NVRTC_LIB
"${CUDA_nvrtc_LIBRARY}"
CACHE FILEPATH "")
if(CUDA_NVRTC_LIB AND NOT CUDA_NVRTC_SHORTHASH)
find_package(Python COMPONENTS Interpreter)
execute_process(
COMMAND Python::Interpreter -c
"import hashlib;hash=hashlib.sha256();hash.update(open('${CUDA_NVRTC_LIB}','rb').read());print(hash.hexdigest()[:8])"
COMMAND
Python::Interpreter -c
"import hashlib;hash=hashlib.sha256();hash.update(open('${CUDA_NVRTC_LIB}','rb').read());print(hash.hexdigest()[:8])"
RESULT_VARIABLE _retval
OUTPUT_VARIABLE CUDA_NVRTC_SHORTHASH)
if(NOT _retval EQUAL 0)
@ -145,61 +155,60 @@ if(CUDA_NVRTC_LIB AND NOT CUDA_NVRTC_SHORTHASH)
endif()
endif()
# Create new style imported libraries.
# Several of these libraries have a hardcoded path if CAFFE2_STATIC_LINK_CUDA
# is set. This path is where sane CUDA installations have their static
# libraries installed. This flag should only be used for binary builds, so
# end-users should never have this flag set.
# Create new style imported libraries. Several of these libraries have a
# hardcoded path if CAFFE2_STATIC_LINK_CUDA is set. This path is where sane CUDA
# installations have their static libraries installed. This flag should only be
# used for binary builds, so end-users should never have this flag set.
# cuda
add_library(caffe2::cuda INTERFACE IMPORTED)
set_property(
TARGET caffe2::cuda PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cuda_driver)
set_property(TARGET caffe2::cuda PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cuda_driver)
# cudart
add_library(torch::cudart INTERFACE IMPORTED)
if(CAFFE2_STATIC_LINK_CUDA)
set_property(
TARGET torch::cudart PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cudart_static)
set_property(TARGET torch::cudart PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cudart_static)
else()
set_property(
TARGET torch::cudart PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cudart)
set_property(TARGET torch::cudart PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cudart)
endif()
# cublas
add_library(caffe2::cublas INTERFACE IMPORTED)
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
set_property(
TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES
# NOTE: cublas is always linked dynamically
CUDA::cublas CUDA::cublasLt)
set_property(
TARGET caffe2::cublas APPEND PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cudart_static rt)
set_property(
TARGET caffe2::cublas
PROPERTY INTERFACE_LINK_LIBRARIES
# NOTE: cublas is always linked dynamically
CUDA::cublas CUDA::cublasLt)
set_property(
TARGET caffe2::cublas
APPEND
PROPERTY INTERFACE_LINK_LIBRARIES CUDA::cudart_static rt)
else()
set_property(
TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cublas CUDA::cublasLt)
set_property(TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cublas CUDA::cublasLt)
endif()
# cudnn interface
# static linking is handled by USE_STATIC_CUDNN environment variable
# cudnn interface static linking is handled by USE_STATIC_CUDNN environment
# variable
if(CAFFE2_USE_CUDNN)
if(USE_STATIC_CUDNN)
set(CUDNN_STATIC ON CACHE BOOL "")
set(CUDNN_STATIC
ON
CACHE BOOL "")
else()
set(CUDNN_STATIC OFF CACHE BOOL "")
set(CUDNN_STATIC
OFF
CACHE BOOL "")
endif()
find_package(CUDNN)
if(NOT CUDNN_FOUND)
message(WARNING
"Cannot find cuDNN library. Turning the option off")
message(WARNING "Cannot find cuDNN library. Turning the option off")
set(CAFFE2_USE_CUDNN OFF)
else()
if(CUDNN_VERSION VERSION_LESS "8.1.0")
@ -211,7 +220,7 @@ if(CAFFE2_USE_CUDNN)
target_include_directories(torch::cudnn INTERFACE ${CUDNN_INCLUDE_PATH})
if(CUDNN_STATIC AND NOT WIN32)
target_link_options(torch::cudnn INTERFACE
"-Wl,--exclude-libs,libcudnn_static.a")
"-Wl,--exclude-libs,libcudnn_static.a")
else()
target_link_libraries(torch::cudnn INTERFACE ${CUDNN_LIBRARY_PATH})
endif()
@ -223,24 +232,25 @@ if(CAFFE2_USE_CUSPARSELT)
find_package(CUSPARSELT)
if(NOT CUSPARSELT_FOUND)
message(WARNING
"Cannot find cuSPARSELt library. Turning the option off")
message(WARNING "Cannot find cuSPARSELt library. Turning the option off")
set(CAFFE2_USE_CUSPARSELT OFF)
else()
add_library(torch::cusparselt INTERFACE IMPORTED)
target_include_directories(torch::cusparselt INTERFACE ${CUSPARSELT_INCLUDE_PATH})
target_link_libraries(torch::cusparselt INTERFACE ${CUSPARSELT_LIBRARY_PATH})
target_include_directories(torch::cusparselt
INTERFACE ${CUSPARSELT_INCLUDE_PATH})
target_link_libraries(torch::cusparselt
INTERFACE ${CUSPARSELT_LIBRARY_PATH})
endif()
else()
message(STATUS "USE_CUSPARSELT is set to 0. Compiling without cuSPARSELt support")
message(
STATUS "USE_CUSPARSELT is set to 0. Compiling without cuSPARSELt support")
endif()
if(USE_CUDSS)
find_package(CUDSS)
if(NOT CUDSS_FOUND)
message(WARNING
"Cannot find CUDSS library. Turning the option off")
message(WARNING "Cannot find CUDSS library. Turning the option off")
set(USE_CUDSS OFF)
else()
add_library(torch::cudss INTERFACE IMPORTED)
@ -255,13 +265,11 @@ endif()
if(CAFFE2_USE_CUFILE)
add_library(torch::cufile INTERFACE IMPORTED)
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
set_property(
TARGET torch::cufile PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cuFile_static)
set_property(TARGET torch::cufile PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cuFile_static)
else()
set_property(
TARGET torch::cufile PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cuFile)
set_property(TARGET torch::cufile PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cuFile)
endif()
else()
message(STATUS "USE_CUFILE is set to 0. Compiling without cuFile support")
@ -270,32 +278,27 @@ endif()
# curand
add_library(caffe2::curand INTERFACE IMPORTED)
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
set_property(
TARGET caffe2::curand PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::curand_static)
set_property(TARGET caffe2::curand PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::curand_static)
else()
set_property(
TARGET caffe2::curand PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::curand)
set_property(TARGET caffe2::curand PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::curand)
endif()
# cufft
add_library(caffe2::cufft INTERFACE IMPORTED)
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
set_property(
TARGET caffe2::cufft PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cufft_static_nocallback)
set_property(TARGET caffe2::cufft PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cufft_static_nocallback)
else()
set_property(
TARGET caffe2::cufft PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cufft)
set_property(TARGET caffe2::cufft PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cufft)
endif()
# nvrtc
add_library(caffe2::nvrtc INTERFACE IMPORTED)
set_property(
TARGET caffe2::nvrtc PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::nvrtc caffe2::cuda)
set_property(TARGET caffe2::nvrtc PROPERTY INTERFACE_LINK_LIBRARIES CUDA::nvrtc
caffe2::cuda)
# Add onnx namespace definition to nvcc
if(ONNX_NAMESPACE)
@ -304,19 +307,23 @@ else()
list(APPEND CUDA_NVCC_FLAGS "-DONNX_NAMESPACE=onnx_c2")
endif()
# Don't activate VC env again for Ninja generators with MSVC on Windows if CUDAHOSTCXX is not defined
# by adding --use-local-env.
if(MSVC AND CMAKE_GENERATOR STREQUAL "Ninja" AND NOT DEFINED ENV{CUDAHOSTCXX})
# Don't activate VC env again for Ninja generators with MSVC on Windows if
# CUDAHOSTCXX is not defined by adding --use-local-env.
if(MSVC
AND CMAKE_GENERATOR STREQUAL "Ninja"
AND NOT DEFINED ENV{CUDAHOSTCXX})
list(APPEND CUDA_NVCC_FLAGS "--use-local-env")
endif()
# setting nvcc arch flags
torch_cuda_get_nvcc_gencode_flag(NVCC_FLAGS_EXTRA)
# CMake 3.18 adds integrated support for architecture selection, but we can't rely on it
# CMake 3.18 adds integrated support for architecture selection, but we can't
# rely on it
if(DEFINED CMAKE_CUDA_ARCHITECTURES)
message(WARNING
"pytorch is not compatible with `CMAKE_CUDA_ARCHITECTURES` and will ignore its value. "
"Please configure `TORCH_CUDA_ARCH_LIST` instead.")
message(
WARNING
"pytorch is not compatible with `CMAKE_CUDA_ARCHITECTURES` and will ignore its value. "
"Please configure `TORCH_CUDA_ARCH_LIST` instead.")
set(CMAKE_CUDA_ARCHITECTURES OFF)
endif()
@ -324,12 +331,11 @@ list(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA})
message(STATUS "Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA}")
# disable some nvcc diagnostic that appears in boost, glog, glags, opencv, etc.
foreach(diag cc_clobber_ignored
field_without_dll_interface
base_class_has_different_dll_interface
dll_interface_conflict_none_assumed
dll_interface_conflict_dllexport_assumed
bad_friend_decl)
foreach(
diag
cc_clobber_ignored field_without_dll_interface
base_class_has_different_dll_interface dll_interface_conflict_none_assumed
dll_interface_conflict_dllexport_assumed bad_friend_decl)
list(APPEND SUPPRESS_WARNING_FLAGS --diag_suppress=${diag})
endforeach()
string(REPLACE ";" "," SUPPRESS_WARNING_FLAGS "${SUPPRESS_WARNING_FLAGS}")
@ -358,7 +364,8 @@ if(MSVC)
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-FS")
endif()
elseif(CUDA_DEVICE_DEBUG)
list(APPEND CUDA_NVCC_FLAGS "-g" "-G") # -G enables device code debugging symbols
list(APPEND CUDA_NVCC_FLAGS "-g" "-G") # -G enables device code debugging
# symbols
endif()
# Set expt-relaxed-constexpr to suppress Eigen warnings

View File

@ -11,72 +11,67 @@ if(TARGET gflags)
elseif(GFLAGS_FOUND)
message(STATUS "Caffe2: Found gflags with old-style gflag starget.")
add_library(gflags UNKNOWN IMPORTED)
set_property(
TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARY})
set_property(
TARGET gflags PROPERTY INTERFACE_INCLUDE_DIRECTORIES
${GFLAGS_INCLUDE_DIR})
set_property(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARY})
set_property(TARGET gflags PROPERTY INTERFACE_INCLUDE_DIRECTORIES
${GFLAGS_INCLUDE_DIR})
else()
message(STATUS
"Caffe2: Cannot find gflags automatically. Using legacy find.")
message(STATUS "Caffe2: Cannot find gflags automatically. Using legacy find.")
# - Try to find GFLAGS in the legacy way.
# * Try to find GFLAGS in the legacy way.
#
# The following variables are optionally searched for defaults
# GFLAGS_ROOT_DIR: Base directory where all GFLAGS components are found
# GFLAGS_ROOT_DIR: Base directory where all GFLAGS components are found
#
# The following are set after configuration is done:
# GFLAGS_FOUND
# GFLAGS_INCLUDE_DIRS
# GFLAGS_LIBRARIES
# GFLAGS_LIBRARYRARY_DIRS
# The following are set after configuration is done: GFLAGS_FOUND
# GFLAGS_INCLUDE_DIRS GFLAGS_LIBRARIES GFLAGS_LIBRARYRARY_DIRS
include(FindPackageHandleStandardArgs)
set(GFLAGS_ROOT_DIR "" CACHE PATH "Folder contains Gflags")
set(GFLAGS_ROOT_DIR
""
CACHE PATH "Folder contains Gflags")
# We are testing only a couple of files in the include directories
if(WIN32)
find_path(GFLAGS_INCLUDE_DIR gflags/gflags.h
PATHS ${GFLAGS_ROOT_DIR}/src/windows)
PATHS ${GFLAGS_ROOT_DIR}/src/windows)
else()
find_path(GFLAGS_INCLUDE_DIR gflags/gflags.h
PATHS ${GFLAGS_ROOT_DIR})
find_path(GFLAGS_INCLUDE_DIR gflags/gflags.h PATHS ${GFLAGS_ROOT_DIR})
endif()
if(WIN32)
find_library(GFLAGS_LIBRARY_RELEASE
NAMES libgflags
PATHS ${GFLAGS_ROOT_DIR}
PATH_SUFFIXES Release)
find_library(
GFLAGS_LIBRARY_RELEASE
NAMES libgflags
PATHS ${GFLAGS_ROOT_DIR}
PATH_SUFFIXES Release)
find_library(GFLAGS_LIBRARY_DEBUG
NAMES libgflags-debug
PATHS ${GFLAGS_ROOT_DIR}
PATH_SUFFIXES Debug)
set(GFLAGS_LIBRARY optimized ${GFLAGS_LIBRARY_RELEASE} debug ${GFLAGS_LIBRARY_DEBUG})
find_library(
GFLAGS_LIBRARY_DEBUG
NAMES libgflags-debug
PATHS ${GFLAGS_ROOT_DIR}
PATH_SUFFIXES Debug)
set(GFLAGS_LIBRARY optimized ${GFLAGS_LIBRARY_RELEASE} debug
${GFLAGS_LIBRARY_DEBUG})
else()
find_library(GFLAGS_LIBRARY gflags)
endif()
find_package_handle_standard_args(
gflags DEFAULT_MSG GFLAGS_INCLUDE_DIR GFLAGS_LIBRARY)
find_package_handle_standard_args(gflags DEFAULT_MSG GFLAGS_INCLUDE_DIR
GFLAGS_LIBRARY)
if(GFLAGS_FOUND)
message(
STATUS
"Caffe2: Found gflags (include: ${GFLAGS_INCLUDE_DIR}, "
"library: ${GFLAGS_LIBRARY})")
message(STATUS "Caffe2: Found gflags (include: ${GFLAGS_INCLUDE_DIR}, "
"library: ${GFLAGS_LIBRARY})")
add_library(gflags UNKNOWN IMPORTED)
set_property(
TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARY})
set_property(
TARGET gflags PROPERTY INTERFACE_INCLUDE_DIRECTORIES
${GFLAGS_INCLUDE_DIR})
set_property(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARY})
set_property(TARGET gflags PROPERTY INTERFACE_INCLUDE_DIRECTORIES
${GFLAGS_INCLUDE_DIR})
endif()
endif()
# After above, we should have the gflags target now.
if(NOT TARGET gflags)
message(WARNING
message(
WARNING
"Caffe2: gflags cannot be found. Depending on whether you are building "
"Caffe2 or a Caffe2 dependent library, the next warning / error will "
"give you more info.")

View File

@ -10,60 +10,56 @@ if(TARGET glog::glog)
message(STATUS "Caffe2: Found glog with new-style glog target.")
elseif(GLOG_FOUND)
message(
STATUS
STATUS
"Caffe2: Found glog with old-style glog starget. Glog never shipped "
"old style glog targets, so somewhere in your cmake path there might "
"be a custom Findglog.cmake file that got triggered. We will make a "
"best effort to create the new style glog target for you.")
add_library(glog::glog UNKNOWN IMPORTED)
set_property(
TARGET glog::glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARY})
set_property(
TARGET glog::glog PROPERTY INTERFACE_INCLUDE_DIRECTORIES
${GLOG_INCLUDE_DIR})
set_property(TARGET glog::glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARY})
set_property(TARGET glog::glog PROPERTY INTERFACE_INCLUDE_DIRECTORIES
${GLOG_INCLUDE_DIR})
else()
message(STATUS "Caffe2: Cannot find glog automatically. Using legacy find.")
# - Try to find Glog
# * Try to find Glog
#
# The following variables are optionally searched for defaults
# GLOG_ROOT_DIR: Base directory where all GLOG components are found
# The following variables are optionally searched for defaults GLOG_ROOT_DIR:
# Base directory where all GLOG components are found
#
# The following are set after configuration is done:
# GLOG_FOUND
# GLOG_INCLUDE_DIRS
# GLOG_LIBRARIES
# GLOG_LIBRARYRARY_DIRS
# The following are set after configuration is done: GLOG_FOUND
# GLOG_INCLUDE_DIRS GLOG_LIBRARIES GLOG_LIBRARYRARY_DIRS
include(FindPackageHandleStandardArgs)
set(GLOG_ROOT_DIR "" CACHE PATH "Folder contains Google glog")
set(GLOG_ROOT_DIR
""
CACHE PATH "Folder contains Google glog")
if(NOT WIN32)
find_path(GLOG_INCLUDE_DIR glog/logging.h
PATHS ${GLOG_ROOT_DIR})
find_path(GLOG_INCLUDE_DIR glog/logging.h PATHS ${GLOG_ROOT_DIR})
endif()
find_library(GLOG_LIBRARY glog
PATHS ${GLOG_ROOT_DIR}
PATH_SUFFIXES lib lib64)
find_library(
GLOG_LIBRARY glog
PATHS ${GLOG_ROOT_DIR}
PATH_SUFFIXES lib lib64)
find_package_handle_standard_args(glog DEFAULT_MSG GLOG_INCLUDE_DIR GLOG_LIBRARY)
find_package_handle_standard_args(glog DEFAULT_MSG GLOG_INCLUDE_DIR
GLOG_LIBRARY)
if(GLOG_FOUND)
message(STATUS
"Caffe2: Found glog (include: ${GLOG_INCLUDE_DIR}, "
"library: ${GLOG_LIBRARY})")
message(STATUS "Caffe2: Found glog (include: ${GLOG_INCLUDE_DIR}, "
"library: ${GLOG_LIBRARY})")
add_library(glog::glog UNKNOWN IMPORTED)
set_property(
TARGET glog::glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARY})
set_property(
TARGET glog::glog PROPERTY INTERFACE_INCLUDE_DIRECTORIES
${GLOG_INCLUDE_DIR})
set_property(TARGET glog::glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARY})
set_property(TARGET glog::glog PROPERTY INTERFACE_INCLUDE_DIRECTORIES
${GLOG_INCLUDE_DIR})
endif()
endif()
# After above, we should have the glog::glog target now.
if(NOT TARGET glog::glog)
message(WARNING
message(
WARNING
"Caffe2: glog cannot be found. Depending on whether you are building "
"Caffe2 or a Caffe2 dependent library, the next warning / error will "
"give you more info.")

View File

@ -16,11 +16,12 @@ foreach(MKL_LIB IN LISTS MKL_LIBRARIES)
endif()
endforeach()
# TODO: This is a hack, it will not pick up architecture dependent
# MKL libraries correctly; see https://github.com/pytorch/pytorch/issues/73008
# TODO: This is a hack, it will not pick up architecture dependent MKL libraries
# correctly; see https://github.com/pytorch/pytorch/issues/73008
set_property(
TARGET caffe2::mkl PROPERTY INTERFACE_LINK_DIRECTORIES
${MKL_ROOT}/lib ${MKL_ROOT}/lib/intel64 ${MKL_ROOT}/lib/intel64_win ${MKL_ROOT}/lib/win-x64)
TARGET caffe2::mkl
PROPERTY INTERFACE_LINK_DIRECTORIES ${MKL_ROOT}/lib ${MKL_ROOT}/lib/intel64
${MKL_ROOT}/lib/intel64_win ${MKL_ROOT}/lib/win-x64)
if(UNIX)
if(USE_STATIC_MKL)
@ -33,7 +34,8 @@ if(UNIX)
# Match archive libraries starting with "libmkl_"
if(MKL_LIB_NAME MATCHES "^libmkl_" AND MKL_LIB_NAME MATCHES ".a$")
target_link_options(caffe2::mkl INTERFACE "-Wl,--exclude-libs,${MKL_LIB_NAME}")
target_link_options(caffe2::mkl INTERFACE
"-Wl,--exclude-libs,${MKL_LIB_NAME}")
endif()
endforeach()
endif()

View File

@ -10,9 +10,7 @@ if(NOT TARGET caffe2::mkldnn)
add_library(caffe2::mkldnn INTERFACE IMPORTED)
endif()
set_property(
TARGET caffe2::mkldnn PROPERTY INTERFACE_INCLUDE_DIRECTORIES
${MKLDNN_INCLUDE_DIR})
set_property(
TARGET caffe2::mkldnn PROPERTY INTERFACE_LINK_LIBRARIES
${MKLDNN_LIBRARIES})
set_property(TARGET caffe2::mkldnn PROPERTY INTERFACE_INCLUDE_DIRECTORIES
${MKLDNN_INCLUDE_DIR})
set_property(TARGET caffe2::mkldnn PROPERTY INTERFACE_LINK_LIBRARIES
${MKLDNN_LIBRARIES})

View File

@ -6,71 +6,92 @@ if(NOT Protobuf_FOUND)
find_package(Protobuf MODULE QUIET)
endif()
if((TARGET protobuf::libprotobuf OR TARGET protobuf::libprotobuf-lite) AND TARGET protobuf::protoc)
if((TARGET protobuf::libprotobuf OR TARGET protobuf::libprotobuf-lite)
AND TARGET protobuf::protoc)
# Hooray. This is the most ideal situation, meaning that you either have a
# Protobuf config file installed (like on Windows), or you are using a
# modern CMake that ships with a FindProtobuf.cmake file that produces
# modern targets.
# Protobuf config file installed (like on Windows), or you are using a modern
# CMake that ships with a FindProtobuf.cmake file that produces modern
# targets.
message(STATUS "Caffe2: Found protobuf with new-style protobuf targets.")
elseif(Protobuf_FOUND OR PROTOBUF_FOUND)
# If the modern targets are not present, we will generate them for you for
# backward compatibility. This is backported from CMake's new FindProtobuf.cmake
# content.
# backward compatibility. This is backported from CMake's new
# FindProtobuf.cmake content.
if((NOT PROTOBUF_LIBRARY) AND (NOT PROTOBUF_LITE_LIBRARY))
message(FATAL_ERROR
message(
FATAL_ERROR
"Caffe2: Found protobuf with old style targets, but could not find targets."
" PROTOBUF_LIBRARY: " ${PROTOBUF_LIBRARY}
" PROTOBUF_LITE_LIBRARY: " ${PROTOBUF_LITE_LIBRARY}
" Protobuf_LIBRARY: " ${Protobuf_LIBRARY}
" Protobuf_LITE_LIBRARY: " ${Protobuf_LITE_LIBRARY})
" PROTOBUF_LIBRARY: "
${PROTOBUF_LIBRARY}
" PROTOBUF_LITE_LIBRARY: "
${PROTOBUF_LITE_LIBRARY}
" Protobuf_LIBRARY: "
${Protobuf_LIBRARY}
" Protobuf_LITE_LIBRARY: "
${Protobuf_LITE_LIBRARY})
endif()
message(STATUS "Caffe2: Found protobuf with old-style protobuf targets.")
if(PROTOBUF_LIBRARY)
if(NOT TARGET protobuf::libprotobuf)
add_library(protobuf::libprotobuf UNKNOWN IMPORTED)
set_target_properties(protobuf::libprotobuf PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${PROTOBUF_INCLUDE_DIRS}")
set_target_properties(
protobuf::libprotobuf PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
"${PROTOBUF_INCLUDE_DIRS}")
endif()
if(EXISTS "${PROTOBUF_LIBRARY}")
set_target_properties(protobuf::libprotobuf PROPERTIES
IMPORTED_LOCATION "${PROTOBUF_LIBRARY}")
set_target_properties(protobuf::libprotobuf
PROPERTIES IMPORTED_LOCATION "${PROTOBUF_LIBRARY}")
endif()
if(EXISTS "${PROTOBUF_LIBRARY_RELEASE}")
set_property(TARGET protobuf::libprotobuf APPEND PROPERTY
IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(protobuf::libprotobuf PROPERTIES
IMPORTED_LOCATION_RELEASE "${PROTOBUF_LIBRARY_RELEASE}")
set_property(
TARGET protobuf::libprotobuf
APPEND
PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(
protobuf::libprotobuf PROPERTIES IMPORTED_LOCATION_RELEASE
"${PROTOBUF_LIBRARY_RELEASE}")
endif()
if(EXISTS "${PROTOBUF_LIBRARY_DEBUG}")
set_property(TARGET protobuf::libprotobuf APPEND PROPERTY
IMPORTED_CONFIGURATIONS DEBUG)
set_target_properties(protobuf::libprotobuf PROPERTIES
IMPORTED_LOCATION_DEBUG "${PROTOBUF_LIBRARY_DEBUG}")
set_property(
TARGET protobuf::libprotobuf
APPEND
PROPERTY IMPORTED_CONFIGURATIONS DEBUG)
set_target_properties(
protobuf::libprotobuf PROPERTIES IMPORTED_LOCATION_DEBUG
"${PROTOBUF_LIBRARY_DEBUG}")
endif()
endif()
if(PROTOBUF_LITE_LIBRARY)
if(NOT TARGET protobuf::libprotobuf-lite)
add_library(protobuf::libprotobuf-lite UNKNOWN IMPORTED)
set_target_properties(protobuf::libprotobuf-lite PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${PROTOBUF_INCLUDE_DIRS}")
set_target_properties(
protobuf::libprotobuf-lite PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
"${PROTOBUF_INCLUDE_DIRS}")
endif()
if(EXISTS "${PROTOBUF_LITE_LIBRARY}")
set_target_properties(protobuf::libprotobuf-lite PROPERTIES
IMPORTED_LOCATION "${PROTOBUF_LITE_LIBRARY}")
set_target_properties(
protobuf::libprotobuf-lite PROPERTIES IMPORTED_LOCATION
"${PROTOBUF_LITE_LIBRARY}")
endif()
if(EXISTS "${PROTOBUF_LITE_LIBRARY_RELEASE}")
set_property(TARGET protobuf::libprotobuf-lite APPEND PROPERTY
IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(protobuf::libprotobuf-lite PROPERTIES
IMPORTED_LOCATION_RELEASE "${PROTOBUF_LITE_LIBRARY_RELEASE}")
set_property(
TARGET protobuf::libprotobuf-lite
APPEND
PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(
protobuf::libprotobuf-lite
PROPERTIES IMPORTED_LOCATION_RELEASE "${PROTOBUF_LITE_LIBRARY_RELEASE}")
endif()
if(EXISTS "${PROTOBUF_LITE_LIBRARY_DEBUG}")
set_property(TARGET protobuf::libprotobuf-lite APPEND PROPERTY
IMPORTED_CONFIGURATIONS DEBUG)
set_target_properties(protobuf::libprotobuf-lite PROPERTIES
IMPORTED_LOCATION_DEBUG "${PROTOBUF_LITE_LIBRARY_DEBUG}")
set_property(
TARGET protobuf::libprotobuf-lite
APPEND
PROPERTY IMPORTED_CONFIGURATIONS DEBUG)
set_target_properties(
protobuf::libprotobuf-lite PROPERTIES IMPORTED_LOCATION_DEBUG
"${PROTOBUF_LITE_LIBRARY_DEBUG}")
endif()
endif()
@ -78,14 +99,16 @@ elseif(Protobuf_FOUND OR PROTOBUF_FOUND)
if(NOT TARGET protobuf::protoc)
add_executable(protobuf::protoc IMPORTED)
endif()
set_property(TARGET protobuf::protoc PROPERTY
IMPORTED_LOCATION ${PROTOBUF_PROTOC_EXECUTABLE})
set_property(TARGET protobuf::protoc PROPERTY IMPORTED_LOCATION
${PROTOBUF_PROTOC_EXECUTABLE})
endif()
endif()
# After above, we should have the protobuf related target now.
if((NOT TARGET protobuf::libprotobuf) AND (NOT TARGET protobuf::libprotobuf-lite))
message(WARNING
if((NOT TARGET protobuf::libprotobuf) AND (NOT TARGET protobuf::libprotobuf-lite
))
message(
WARNING
"Protobuf cannot be found. Depending on whether you are building Caffe2 "
"or a Caffe2 dependent library, the next warning / error will give you "
"more info.")

View File

@ -1,37 +1,51 @@
################################################################################################
# ##############################################################################
# Exclude and prepend functionalities
function(exclude OUTPUT INPUT)
set(EXCLUDES ${ARGN})
foreach(EXCLUDE ${EXCLUDES})
list(REMOVE_ITEM INPUT "${EXCLUDE}")
endforeach()
set(${OUTPUT} ${INPUT} PARENT_SCOPE)
set(EXCLUDES ${ARGN})
foreach(EXCLUDE ${EXCLUDES})
list(REMOVE_ITEM INPUT "${EXCLUDE}")
endforeach()
set(${OUTPUT}
${INPUT}
PARENT_SCOPE)
endfunction(exclude)
function(prepend OUTPUT PREPEND)
set(OUT "")
foreach(ITEM ${ARGN})
list(APPEND OUT "${PREPEND}${ITEM}")
endforeach()
set(${OUTPUT} ${OUT} PARENT_SCOPE)
set(OUT "")
foreach(ITEM ${ARGN})
list(APPEND OUT "${PREPEND}${ITEM}")
endforeach()
set(${OUTPUT}
${OUT}
PARENT_SCOPE)
endfunction(prepend)
################################################################################################
# ##############################################################################
# Parses a version string that might have values beyond major, minor, and patch
# and set version variables for the library.
# Usage:
# caffe2_parse_version_str(<library_name> <version_string>)
# and set version variables for the library. Usage:
# caffe2_parse_version_str(<library_name> <version_string>)
function(caffe2_parse_version_str LIBNAME VERSIONSTR)
string(REGEX REPLACE "^([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MAJOR "${VERSIONSTR}")
string(REGEX REPLACE "^[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MINOR "${VERSIONSTR}")
string(REGEX REPLACE "[0-9]+\\.[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_PATCH "${VERSIONSTR}")
set(${LIBNAME}_VERSION_MAJOR ${${LIBNAME}_VERSION_MAJOR} ${ARGN} PARENT_SCOPE)
set(${LIBNAME}_VERSION_MINOR ${${LIBNAME}_VERSION_MINOR} ${ARGN} PARENT_SCOPE)
set(${LIBNAME}_VERSION_PATCH ${${LIBNAME}_VERSION_PATCH} ${ARGN} PARENT_SCOPE)
set(${LIBNAME}_VERSION "${${LIBNAME}_VERSION_MAJOR}.${${LIBNAME}_VERSION_MINOR}.${${LIBNAME}_VERSION_PATCH}" PARENT_SCOPE)
string(REGEX REPLACE "^([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MAJOR
"${VERSIONSTR}")
string(REGEX REPLACE "^[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MINOR
"${VERSIONSTR}")
string(REGEX REPLACE "[0-9]+\\.[0-9]+\\.([0-9]+).*$" "\\1"
${LIBNAME}_VERSION_PATCH "${VERSIONSTR}")
set(${LIBNAME}_VERSION_MAJOR
${${LIBNAME}_VERSION_MAJOR} ${ARGN}
PARENT_SCOPE)
set(${LIBNAME}_VERSION_MINOR
${${LIBNAME}_VERSION_MINOR} ${ARGN}
PARENT_SCOPE)
set(${LIBNAME}_VERSION_PATCH
${${LIBNAME}_VERSION_PATCH} ${ARGN}
PARENT_SCOPE)
set(${LIBNAME}_VERSION
"${${LIBNAME}_VERSION_MAJOR}.${${LIBNAME}_VERSION_MINOR}.${${LIBNAME}_VERSION_PATCH}"
PARENT_SCOPE)
endfunction()
###
#
# Removes common indentation from a block of text to produce code suitable for
# setting to `python -c`, or using with pycmd. This allows multiline code to be
# nested nicely in the surrounding code structure.
@ -39,9 +53,8 @@ endfunction()
# This function respsects Python_EXECUTABLE if it defined, otherwise it uses
# `python` and hopes for the best. An error will be thrown if it is not found.
#
# Args:
# outvar : variable that will hold the stdout of the python command
# text : text to remove indentation from
# Args: outvar : variable that will hold the stdout of the python command text :
# text to remove indentation from
#
function(dedent outvar text)
# Use Python_EXECUTABLE if it is defined, otherwise default to python
@ -50,7 +63,9 @@ function(dedent outvar text)
else()
set(_python_exe "${Python_EXECUTABLE}")
endif()
set(_fixup_cmd "import sys; from textwrap import dedent; print(dedent(sys.stdin.read()))")
set(_fixup_cmd
"import sys; from textwrap import dedent; print(dedent(sys.stdin.read()))"
)
file(WRITE "${CMAKE_BINARY_DIR}/indented.txt" "${text}")
execute_process(
COMMAND "${_python_exe}" -c "${_fixup_cmd}"
@ -60,14 +75,16 @@ function(dedent outvar text)
if(NOT _dedent_exitcode EQUAL 0)
message(ERROR " Failed to remove indentation from: \n\"\"\"\n${text}\n\"\"\"
Python dedent failed with error code: ${_dedent_exitcode}")
message(FATAL_ERROR " Python dedent failed with error code: ${_dedent_exitcode}")
message(
FATAL_ERROR " Python dedent failed with error code: ${_dedent_exitcode}")
endif()
# Remove supurflous newlines (artifacts of print)
string(STRIP "${_dedent_text}" _dedent_text)
set(${outvar} "${_dedent_text}" PARENT_SCOPE)
set(${outvar}
"${_dedent_text}"
PARENT_SCOPE)
endfunction()
function(pycmd_no_exit outvar exitcode cmd)
# Use Python_EXECUTABLE if it is defined, otherwise default to python
if("${Python_EXECUTABLE}" STREQUAL "")
@ -82,12 +99,15 @@ function(pycmd_no_exit outvar exitcode cmd)
OUTPUT_VARIABLE _output)
# Remove supurflous newlines (artifacts of print)
string(STRIP "${_output}" _output)
set(${outvar} "${_output}" PARENT_SCOPE)
set(${exitcode} "${_exitcode}" PARENT_SCOPE)
set(${outvar}
"${_output}"
PARENT_SCOPE)
set(${exitcode}
"${_exitcode}"
PARENT_SCOPE)
endfunction()
###
#
# Helper function to run `python -c "<cmd>"` and capture the results of stdout
#
# Runs a python command and populates an outvar with the result of stdout.
@ -97,135 +117,140 @@ endfunction()
# This function respsects Python_EXECUTABLE if it defined, otherwise it uses
# `python` and hopes for the best. An error will be thrown if it is not found.
#
# Args:
# outvar : variable that will hold the stdout of the python command
# cmd : text representing a (possibly multiline) block of python code
# Args: outvar : variable that will hold the stdout of the python command cmd :
# text representing a (possibly multiline) block of python code
#
function(pycmd outvar cmd)
dedent(_dedent_cmd "${cmd}")
pycmd_no_exit(_output _exitcode "${_dedent_cmd}")
if(NOT _exitcode EQUAL 0)
message(ERROR " Failed when running python code: \"\"\"\n${_dedent_cmd}\n\"\"\"")
message(ERROR
" Failed when running python code: \"\"\"\n${_dedent_cmd}\n\"\"\"")
message(FATAL_ERROR " Python command failed with error code: ${_exitcode}")
endif()
# Remove supurflous newlines (artifacts of print)
string(STRIP "${_output}" _output)
set(${outvar} "${_output}" PARENT_SCOPE)
set(${outvar}
"${_output}"
PARENT_SCOPE)
endfunction()
##############################################################################
# ##############################################################################
# Macro to update cached options.
macro(caffe2_update_option variable value)
if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO)
get_property(__help_string CACHE ${variable} PROPERTY HELPSTRING)
set(${variable} ${value} CACHE BOOL ${__help_string} FORCE)
get_property(
__help_string
CACHE ${variable}
PROPERTY HELPSTRING)
set(${variable}
${value}
CACHE BOOL ${__help_string} FORCE)
else()
set(${variable} ${value})
endif()
endmacro()
##############################################################################
# ##############################################################################
# Add an interface library definition that is dependent on the source.
#
# It's probably easiest to explain why this macro exists, by describing
# what things would look like if we didn't have this macro.
# It's probably easiest to explain why this macro exists, by describing what
# things would look like if we didn't have this macro.
#
# Let's suppose we want to statically link against torch. We've defined
# a library in cmake called torch, and we might think that we just
# target_link_libraries(my-app PUBLIC torch). This will result in a
# linker argument 'libtorch.a' getting passed to the linker.
# Let's suppose we want to statically link against torch. We've defined a
# library in cmake called torch, and we might think that we just
# target_link_libraries(my-app PUBLIC torch). This will result in a linker
# argument 'libtorch.a' getting passed to the linker.
#
# Unfortunately, this link command is wrong! We have static
# initializers in libtorch.a that would get improperly pruned by
# the default link settings. What we actually need is for you
# to do -Wl,--whole-archive,libtorch.a -Wl,--no-whole-archive to ensure
# that we keep all symbols, even if they are (seemingly) not used.
# Unfortunately, this link command is wrong! We have static initializers in
# libtorch.a that would get improperly pruned by the default link settings. What
# we actually need is for you to do -Wl,--whole-archive,libtorch.a
# -Wl,--no-whole-archive to ensure that we keep all symbols, even if they are
# (seemingly) not used.
#
# What caffe2_interface_library does is create an interface library
# that indirectly depends on the real library, but sets up the link
# arguments so that you get all of the extra link settings you need.
# The result is not a "real" library, and so we have to manually
# copy over necessary properties from the original target.
# What caffe2_interface_library does is create an interface library that
# indirectly depends on the real library, but sets up the link arguments so that
# you get all of the extra link settings you need. The result is not a "real"
# library, and so we have to manually copy over necessary properties from the
# original target.
#
# (The discussion above is about static libraries, but a similar
# situation occurs for dynamic libraries: if no symbols are used from
# a dynamic library, it will be pruned unless you are --no-as-needed)
# (The discussion above is about static libraries, but a similar situation
# occurs for dynamic libraries: if no symbols are used from a dynamic library,
# it will be pruned unless you are --no-as-needed)
macro(caffe2_interface_library SRC DST)
add_library(${DST} INTERFACE)
add_dependencies(${DST} ${SRC})
# Depending on the nature of the source library as well as the compiler,
# determine the needed compilation flags.
get_target_property(__src_target_type ${SRC} TYPE)
# Depending on the type of the source library, we will set up the
# link command for the specific SRC library.
# Depending on the type of the source library, we will set up the link command
# for the specific SRC library.
if(${__src_target_type} STREQUAL "STATIC_LIBRARY")
# In the case of static library, we will need to add whole-static flags.
target_link_libraries(${DST} INTERFACE $<LINK_LIBRARY:WHOLE_ARCHIVE,${SRC}>)
# Link all interface link libraries of the src target as well.
# For static library, we need to explicitly depend on all the libraries
# that are the dependent library of the source library. Note that we cannot
# use the populated INTERFACE_LINK_LIBRARIES property, because if one of the
# dependent library is not a target, cmake creates a $<LINK_ONLY:src> wrapper
# and then one is not able to find target "src". For more discussions, check
# https://cmake.org/Bug/print_bug_page.php?bug_id=15415
# https://cmake.org/pipermail/cmake-developers/2013-May/019019.html
# Link all interface link libraries of the src target as well. For static
# library, we need to explicitly depend on all the libraries that are the
# dependent library of the source library. Note that we cannot use the
# populated INTERFACE_LINK_LIBRARIES property, because if one of the
# dependent library is not a target, cmake creates a $<LINK_ONLY:src>
# wrapper and then one is not able to find target "src". For more
# discussions, check https://cmake.org/Bug/print_bug_page.php?bug_id=15415
# https://cmake.org/pipermail/cmake-developers/2013-May/019019.html
# Specifically the following quote
#
# """
# For STATIC libraries we can define that the PUBLIC/PRIVATE/INTERFACE keys
# are ignored for linking and that it always populates both LINK_LIBRARIES
# LINK_INTERFACE_LIBRARIES. Note that for STATIC libraries the
# LINK_LIBRARIES property will not be used for anything except build-order
# dependencies.
# """
target_link_libraries(${DST} INTERFACE
$<TARGET_PROPERTY:${SRC},LINK_LIBRARIES>)
# """ For STATIC libraries we can define that the PUBLIC/PRIVATE/INTERFACE
# keys are ignored for linking and that it always populates both
# LINK_LIBRARIES LINK_INTERFACE_LIBRARIES. Note that for STATIC libraries
# the LINK_LIBRARIES property will not be used for anything except
# build-order dependencies. """
target_link_libraries(${DST}
INTERFACE $<TARGET_PROPERTY:${SRC},LINK_LIBRARIES>)
elseif(${__src_target_type} STREQUAL "SHARED_LIBRARY")
if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
target_link_libraries(${DST} INTERFACE
"-Wl,--no-as-needed,\"$<TARGET_FILE:${SRC}>\" -Wl,--as-needed")
target_link_libraries(
${DST}
INTERFACE "-Wl,--no-as-needed,\"$<TARGET_FILE:${SRC}>\" -Wl,--as-needed"
)
else()
target_link_libraries(${DST} INTERFACE ${SRC})
endif()
# Link all interface link libraries of the src target as well.
# For shared libraries, we can simply depend on the INTERFACE_LINK_LIBRARIES
# property of the target.
target_link_libraries(${DST} INTERFACE
$<TARGET_PROPERTY:${SRC},INTERFACE_LINK_LIBRARIES>)
# Link all interface link libraries of the src target as well. For shared
# libraries, we can simply depend on the INTERFACE_LINK_LIBRARIES property
# of the target.
target_link_libraries(
${DST} INTERFACE $<TARGET_PROPERTY:${SRC},INTERFACE_LINK_LIBRARIES>)
else()
message(FATAL_ERROR
message(
FATAL_ERROR
"You made a CMake build file error: target " ${SRC}
" must be of type either STATIC_LIBRARY or SHARED_LIBRARY. However, "
"I got " ${__src_target_type} ".")
endif()
# For all other interface properties, manually inherit from the source target.
set_target_properties(${DST} PROPERTIES
INTERFACE_COMPILE_DEFINITIONS
$<TARGET_PROPERTY:${SRC},INTERFACE_COMPILE_DEFINITIONS>
INTERFACE_COMPILE_OPTIONS
$<TARGET_PROPERTY:${SRC},INTERFACE_COMPILE_OPTIONS>
INTERFACE_INCLUDE_DIRECTORIES
$<TARGET_PROPERTY:${SRC},INTERFACE_INCLUDE_DIRECTORIES>
INTERFACE_SYSTEM_INCLUDE_DIRECTORIES
$<TARGET_PROPERTY:${SRC},INTERFACE_SYSTEM_INCLUDE_DIRECTORIES>)
set_target_properties(
${DST}
PROPERTIES INTERFACE_COMPILE_DEFINITIONS
$<TARGET_PROPERTY:${SRC},INTERFACE_COMPILE_DEFINITIONS>
INTERFACE_COMPILE_OPTIONS
$<TARGET_PROPERTY:${SRC},INTERFACE_COMPILE_OPTIONS>
INTERFACE_INCLUDE_DIRECTORIES
$<TARGET_PROPERTY:${SRC},INTERFACE_INCLUDE_DIRECTORIES>
INTERFACE_SYSTEM_INCLUDE_DIRECTORIES
$<TARGET_PROPERTY:${SRC},INTERFACE_SYSTEM_INCLUDE_DIRECTORIES>)
endmacro()
##############################################################################
# ##############################################################################
# Creating a Caffe2 binary target with sources specified with relative path.
# Usage:
# caffe2_binary_target(target_name_or_src <src1> [<src2>] [<src3>] ...)
# Usage: caffe2_binary_target(target_name_or_src <src1> [<src2>] [<src3>] ...)
# If only target_name_or_src is specified, this target is build with one single
# source file and the target name is autogen from the filename. Otherwise, the
# target name is given by the first argument and the rest are the source files
# to build the target.
function(caffe2_binary_target target_name_or_src)
# https://cmake.org/cmake/help/latest/command/function.html
# Checking that ARGC is greater than # is the only way to ensure
# that ARGV# was passed to the function as an extra argument.
# https://cmake.org/cmake/help/latest/command/function.html Checking that ARGC
# is greater than # is the only way to ensure that ARGV# was passed to the
# function as an extra argument.
if(ARGC GREATER 1)
set(__target ${target_name_or_src})
prepend(__srcs "${CMAKE_CURRENT_SOURCE_DIR}/" "${ARGN}")
@ -257,11 +282,9 @@ function(caffe2_hip_binary_target target_name_or_src)
target_include_directories(${__target} PRIVATE ${Caffe2_HIP_INCLUDE})
endfunction()
##############################################################################
# ##############################################################################
# Multiplex between adding libraries for CUDA versus HIP (AMD Software Stack).
# Usage:
# torch_cuda_based_add_library(cuda_target)
# Usage: torch_cuda_based_add_library(cuda_target)
#
macro(torch_cuda_based_add_library cuda_target)
if(USE_ROCM)
@ -269,34 +292,38 @@ macro(torch_cuda_based_add_library cuda_target)
elseif(USE_CUDA)
add_library(${cuda_target} ${ARGN})
else()
endif()
endmacro()
##############################################################################
# Get the HIP arch flags specified by PYTORCH_ROCM_ARCH.
# Usage:
# torch_hip_get_arch_list(variable_to_store_flags)
# ##############################################################################
# Get the HIP arch flags specified by PYTORCH_ROCM_ARCH. Usage:
# torch_hip_get_arch_list(variable_to_store_flags)
#
macro(torch_hip_get_arch_list store_var)
if(DEFINED ENV{PYTORCH_ROCM_ARCH})
set(_TMP $ENV{PYTORCH_ROCM_ARCH})
else()
# Use arch of installed GPUs as default
execute_process(COMMAND "rocm_agent_enumerator" COMMAND bash "-c" "grep -v gfx000 | sort -u | xargs | tr -d '\n'"
RESULT_VARIABLE ROCM_AGENT_ENUMERATOR_RESULT
OUTPUT_VARIABLE ROCM_ARCH_INSTALLED)
execute_process(
COMMAND "rocm_agent_enumerator"
COMMAND bash "-c" "grep -v gfx000 | sort -u | xargs | tr -d '\n'"
RESULT_VARIABLE ROCM_AGENT_ENUMERATOR_RESULT
OUTPUT_VARIABLE ROCM_ARCH_INSTALLED)
if(NOT ROCM_AGENT_ENUMERATOR_RESULT EQUAL 0)
message(FATAL_ERROR " Could not detect ROCm arch for GPUs on machine. Result: '${ROCM_AGENT_ENUMERATOR_RESULT}'")
message(
FATAL_ERROR
" Could not detect ROCm arch for GPUs on machine. Result: '${ROCM_AGENT_ENUMERATOR_RESULT}'"
)
endif()
set(_TMP ${ROCM_ARCH_INSTALLED})
endif()
string(REPLACE " " ";" ${store_var} "${_TMP}")
endmacro()
##############################################################################
# Get the XPU arch flags specified by TORCH_XPU_ARCH_LIST.
# Usage:
# torch_xpu_get_arch_list(variable_to_store_flags)
# ##############################################################################
# Get the XPU arch flags specified by TORCH_XPU_ARCH_LIST. Usage:
# torch_xpu_get_arch_list(variable_to_store_flags)
#
macro(torch_xpu_get_arch_list store_var)
if(DEFINED ENV{TORCH_XPU_ARCH_LIST})
@ -304,19 +331,19 @@ macro(torch_xpu_get_arch_list store_var)
endif()
endmacro()
##############################################################################
# ##############################################################################
# Get the NVCC arch flags specified by TORCH_CUDA_ARCH_LIST and CUDA_ARCH_NAME.
# Usage:
# torch_cuda_get_nvcc_gencode_flag(variable_to_store_flags)
# Usage: torch_cuda_get_nvcc_gencode_flag(variable_to_store_flags)
#
macro(torch_cuda_get_nvcc_gencode_flag store_var)
# setting nvcc arch flags
# We need to support the explicitly and conveniently defined TORCH_CUDA_ARCH_LIST
# setting nvcc arch flags We need to support the explicitly and conveniently
# defined TORCH_CUDA_ARCH_LIST
if((NOT DEFINED TORCH_CUDA_ARCH_LIST) AND (DEFINED ENV{TORCH_CUDA_ARCH_LIST}))
set(TORCH_CUDA_ARCH_LIST $ENV{TORCH_CUDA_ARCH_LIST})
endif()
if(DEFINED CUDA_ARCH_NAME)
message(WARNING
message(
WARNING
"CUDA_ARCH_NAME is no longer used. Use TORCH_CUDA_ARCH_LIST instead. "
"Right now, CUDA_ARCH_NAME is ${CUDA_ARCH_NAME} and "
"TORCH_CUDA_ARCH_LIST is ${TORCH_CUDA_ARCH_LIST}.")
@ -331,11 +358,8 @@ macro(torch_cuda_get_nvcc_gencode_flag store_var)
cuda_select_nvcc_arch_flags(${store_var} ${TORCH_CUDA_ARCH_LIST})
endmacro()
##############################################################################
# Add standard compile options.
# Usage:
# torch_compile_options(lib_name)
# ##############################################################################
# Add standard compile options. Usage: torch_compile_options(lib_name)
function(torch_compile_options libname)
set_property(TARGET ${libname} PROPERTY CXX_STANDARD 17)
@ -349,78 +373,85 @@ function(torch_compile_options libname)
endif()
if(${MSVC_TOOLSET_VERSION} GREATER_EQUAL 142)
# Add /permissive- flag for conformance mode to the compiler.
# This will force more strict check to the code standard.
# 1. From MS official doc: https://learn.microsoft.com/en-us/cpp/build/reference/permissive-standards-conformance?view=msvc-170#remarks
# By default, the /permissive- option is set in new projects created by Visual Studio 2017 version 15.5 and later versions.
# We set the /permissive- flag from VS 2019 (MSVC_TOOLSET_VERSION 142) to avoid compiling issues for old toolkit.
# 2. For MSVC VERSION: https://cmake.org/cmake/help/latest/variable/MSVC_TOOLSET_VERSION.html
target_compile_options(${libname} PUBLIC $<$<COMPILE_LANGUAGE:CXX>:/permissive->)
# Add /permissive- flag for conformance mode to the compiler. This will
# force more strict check to the code standard. 1. From MS official doc:
# https://learn.microsoft.com/en-us/cpp/build/reference/permissive-standards-conformance?view=msvc-170#remarks
# By default, the /permissive- option is set in new projects created by
# Visual Studio 2017 version 15.5 and later versions. We set the
# /permissive- flag from VS 2019 (MSVC_TOOLSET_VERSION 142) to avoid
# compiling issues for old toolkit. 2. For MSVC VERSION:
# https://cmake.org/cmake/help/latest/variable/MSVC_TOOLSET_VERSION.html
target_compile_options(${libname}
PUBLIC $<$<COMPILE_LANGUAGE:CXX>:/permissive->)
endif()
# This option enables a token-based preprocessor that conforms to C99 and C++11 and later standards.
# This option is available since VS 2017.
# For MS official doc: https://learn.microsoft.com/en-us/cpp/build/reference/zc-preprocessor
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zc:preprocessor" PARENT_SCOPE)
# This option enables a token-based preprocessor that conforms to C99 and
# C++11 and later standards. This option is available since VS 2017. For MS
# official doc:
# https://learn.microsoft.com/en-us/cpp/build/reference/zc-preprocessor
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} /Zc:preprocessor"
PARENT_SCOPE)
if(${MSVC_TOOLSET_VERSION} GREATER_EQUAL 143)
# Add /d2implyavx512upperregs- to disable compiler over-aggressive optimization, which caused involeved AVX512 register on AVX2 machine.
# Reference: https://github.com/pytorch/pytorch/issues/145702#issuecomment-2874029459
target_compile_options(${libname} PUBLIC $<$<COMPILE_LANGUAGE:CXX>:/d2implyavx512upperregs->)
# Add /d2implyavx512upperregs- to disable compiler over-aggressive
# optimization, which caused involeved AVX512 register on AVX2 machine.
# Reference:
# https://github.com/pytorch/pytorch/issues/145702#issuecomment-2874029459
target_compile_options(
${libname} PUBLIC $<$<COMPILE_LANGUAGE:CXX>:/d2implyavx512upperregs->)
endif()
target_compile_options(${libname} PUBLIC
$<$<COMPILE_LANGUAGE:CXX>:
${MSVC_RUNTIME_LIBRARY_OPTION}
target_compile_options(
${libname}
PUBLIC
$<$<COMPILE_LANGUAGE:CXX>: ${MSVC_RUNTIME_LIBRARY_OPTION}
$<$<OR:$<CONFIG:Debug>,$<CONFIG:RelWithDebInfo>>:${MSVC_DEBINFO_OPTION}>
/EHsc
/bigobj>
)
/EHsc /bigobj>)
else()
set(private_compile_options
-Wall
-Wextra
-Wdeprecated
-Wunused
-Wno-unused-parameter
-Wno-missing-field-initializers
-Wno-array-bounds
-Wno-unknown-pragmas
-Wno-strict-overflow
-Wno-strict-aliasing
)
-Wall
-Wextra
-Wdeprecated
-Wunused
-Wno-unused-parameter
-Wno-missing-field-initializers
-Wno-array-bounds
-Wno-unknown-pragmas
-Wno-strict-overflow
-Wno-strict-aliasing)
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
list(APPEND private_compile_options -Wredundant-move)
endif()
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
list(APPEND private_compile_options -Wextra-semi -Wmove)
else()
list(APPEND private_compile_options
list(
APPEND
private_compile_options
# Considered to be flaky. See the discussion at
# https://github.com/pytorch/pytorch/pull/9608
-Wno-maybe-uninitialized)
endif()
if(WERROR)
list(APPEND private_compile_options
list(
APPEND
private_compile_options
-Werror
-Werror=ignored-attributes
-Werror=inconsistent-missing-override
-Werror=inconsistent-missing-destructor-override
-Werror=pedantic
-Werror=unused
-Wno-error=unused-parameter
)
-Wno-error=unused-parameter)
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
list(APPEND private_compile_options -Werror=unused-but-set-variable)
endif()
endif()
endif()
target_compile_options(${libname} PRIVATE
$<$<COMPILE_LANGUAGE:CXX>:${private_compile_options}>)
target_compile_options(
${libname} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${private_compile_options}>)
if(USE_CUDA)
foreach(option IN LISTS private_compile_options)
if(CMAKE_CUDA_HOST_COMPILER_ID STREQUAL "GNU")
@ -431,7 +462,8 @@ function(torch_compile_options libname)
continue()
endif()
endif()
target_compile_options(${libname} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler ${option}>)
target_compile_options(
${libname} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler ${option}>)
endforeach()
endif()
@ -443,73 +475,90 @@ function(torch_compile_options libname)
# Unfortunately, hidden visibility messes up some ubsan warnings because
# templated classes crossing library boundary get duplicated (but identical)
# definitions. It's easier to just disable it.
target_compile_options(${libname} PRIVATE
$<$<COMPILE_LANGUAGE:CXX>: -fvisibility=hidden>)
target_compile_options(${libname} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:
-fvisibility=hidden>)
endif()
# Use -O2 for release builds (-O3 doesn't improve perf, and -Os results in perf regression)
target_compile_options(${libname} PRIVATE
$<$<AND:$<COMPILE_LANGUAGE:CXX>,$<OR:$<CONFIG:Release>,$<CONFIG:RelWithDebInfo>>>:-O2>)
# Use -O2 for release builds (-O3 doesn't improve perf, and -Os results in
# perf regression)
target_compile_options(
${libname}
PRIVATE
$<$<AND:$<COMPILE_LANGUAGE:CXX>,$<OR:$<CONFIG:Release>,$<CONFIG:RelWithDebInfo>>>:-O2>
)
endfunction()
##############################################################################
# ##############################################################################
# Set old-style FindCuda.cmake compile flags from modern CMake cuda flags.
# Usage:
# torch_update_find_cuda_flags()
# Usage: torch_update_find_cuda_flags()
function(torch_update_find_cuda_flags)
# Convert -O2 -Xcompiler="-O2 -Wall" to "-O2;-Xcompiler=-O2,-Wall"
if(USE_CUDA)
separate_arguments(FLAGS UNIX_COMMAND "${CMAKE_CUDA_FLAGS}")
string(REPLACE " " "," FLAGS "${FLAGS}")
set(CUDA_NVCC_FLAGS ${FLAGS} PARENT_SCOPE)
set(CUDA_NVCC_FLAGS
${FLAGS}
PARENT_SCOPE)
separate_arguments(FLAGS_DEBUG UNIX_COMMAND "${CMAKE_CUDA_FLAGS_DEBUG}")
string(REPLACE " " "," FLAGS_DEBUG "${FLAGS_DEBUG}")
set(CUDA_NVCC_FLAGS_DEBUG "${FLAGS_DEBUG}" PARENT_SCOPE)
set(CUDA_NVCC_FLAGS_DEBUG
"${FLAGS_DEBUG}"
PARENT_SCOPE)
separate_arguments(FLAGS_RELEASE UNIX_COMMAND "${CMAKE_CUDA_FLAGS_RELEASE}")
string(REPLACE " " "," FLAGS_RELEASE "${FLAGS_RELEASE}")
set(CUDA_NVCC_FLAGS_RELEASE "${FLAGS_RELEASE}" PARENT_SCOPE)
set(CUDA_NVCC_FLAGS_RELEASE
"${FLAGS_RELEASE}"
PARENT_SCOPE)
separate_arguments(FLAGS_MINSIZEREL UNIX_COMMAND "${CMAKE_CUDA_FLAGS_MINSIZEREL}")
separate_arguments(FLAGS_MINSIZEREL UNIX_COMMAND
"${CMAKE_CUDA_FLAGS_MINSIZEREL}")
string(REPLACE " " "," FLAGS_MINSIZEREL "${FLAGS_MINSIZEREL}")
set(CUDA_NVCC_FLAGS_MINSIZEREL "${FLAGS_MINSIZEREL}" PARENT_SCOPE)
set(CUDA_NVCC_FLAGS_MINSIZEREL
"${FLAGS_MINSIZEREL}"
PARENT_SCOPE)
separate_arguments(FLAGS_RELWITHDEBINFO UNIX_COMMAND "${CMAKE_CUDA_FLAGS_RELWITHDEBINFO}")
separate_arguments(FLAGS_RELWITHDEBINFO UNIX_COMMAND
"${CMAKE_CUDA_FLAGS_RELWITHDEBINFO}")
string(REPLACE " " "," FLAGS_RELWITHDEBINFO "${FLAGS_RELWITHDEBINFO}")
set(CUDA_NVCC_FLAGS_RELWITHDEBINFO "${FLAGS_RELWITHDEBINFO}" PARENT_SCOPE)
set(CUDA_NVCC_FLAGS_RELWITHDEBINFO
"${FLAGS_RELWITHDEBINFO}"
PARENT_SCOPE)
message(STATUS "Converting CMAKE_CUDA_FLAGS to CUDA_NVCC_FLAGS:\n"
" CUDA_NVCC_FLAGS = ${FLAGS}\n"
" CUDA_NVCC_FLAGS_DEBUG = ${FLAGS_DEBUG}\n"
" CUDA_NVCC_FLAGS_RELEASE = ${FLAGS_RELEASE}\n"
" CUDA_NVCC_FLAGS_RELWITHDEBINFO = ${FLAGS_RELWITHDEBINFO}\n"
" CUDA_NVCC_FLAGS_MINSIZEREL = ${FLAGS_MINSIZEREL}")
message(
STATUS "Converting CMAKE_CUDA_FLAGS to CUDA_NVCC_FLAGS:\n"
" CUDA_NVCC_FLAGS = ${FLAGS}\n"
" CUDA_NVCC_FLAGS_DEBUG = ${FLAGS_DEBUG}\n"
" CUDA_NVCC_FLAGS_RELEASE = ${FLAGS_RELEASE}\n"
" CUDA_NVCC_FLAGS_RELWITHDEBINFO = ${FLAGS_RELWITHDEBINFO}\n"
" CUDA_NVCC_FLAGS_MINSIZEREL = ${FLAGS_MINSIZEREL}")
endif()
endfunction()
include(CheckCXXCompilerFlag)
##############################################################################
# CHeck if given flag is supported and append it to provided outputvar
# Also define HAS_UPPER_CASE_FLAG_NAME variable
# Usage:
# append_cxx_flag_if_supported("-Werror" CMAKE_CXX_FLAGS)
# ##############################################################################
# CHeck if given flag is supported and append it to provided outputvar Also
# define HAS_UPPER_CASE_FLAG_NAME variable Usage:
# append_cxx_flag_if_supported("-Werror" CMAKE_CXX_FLAGS)
function(append_cxx_flag_if_supported flag outputvar)
string(TOUPPER "HAS${flag}" _FLAG_NAME)
string(REGEX REPLACE "[=-]" "_" _FLAG_NAME "${_FLAG_NAME}")
# GCC silents unknown -Wno-XXX flags, so we detect the corresponding -WXXX.
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
string(REGEX REPLACE "Wno-" "W" new_flag "${flag}")
else()
set(new_flag ${flag})
endif()
check_cxx_compiler_flag("${new_flag}" ${_FLAG_NAME})
if(${_FLAG_NAME})
string(APPEND ${outputvar} " ${flag}")
set(${outputvar} "${${outputvar}}" PARENT_SCOPE)
endif()
string(TOUPPER "HAS${flag}" _FLAG_NAME)
string(REGEX REPLACE "[=-]" "_" _FLAG_NAME "${_FLAG_NAME}")
# GCC silents unknown -Wno-XXX flags, so we detect the corresponding -WXXX.
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
string(REGEX REPLACE "Wno-" "W" new_flag "${flag}")
else()
set(new_flag ${flag})
endif()
check_cxx_compiler_flag("${new_flag}" ${_FLAG_NAME})
if(${_FLAG_NAME})
string(APPEND ${outputvar} " ${flag}")
set(${outputvar}
"${${outputvar}}"
PARENT_SCOPE)
endif()
endfunction()
function(target_compile_options_if_supported target flag)

View File

@ -19,18 +19,14 @@ set(PYTORCH_FOUND_XPU TRUE)
# SYCL library interface
add_library(torch::sycl INTERFACE IMPORTED)
set_property(
TARGET torch::sycl PROPERTY INTERFACE_INCLUDE_DIRECTORIES
${SYCL_INCLUDE_DIR})
set_property(
TARGET torch::sycl PROPERTY INTERFACE_LINK_LIBRARIES
${SYCL_LIBRARY})
set_property(TARGET torch::sycl PROPERTY INTERFACE_INCLUDE_DIRECTORIES
${SYCL_INCLUDE_DIR})
set_property(TARGET torch::sycl PROPERTY INTERFACE_LINK_LIBRARIES
${SYCL_LIBRARY})
# xpurt
add_library(torch::xpurt INTERFACE IMPORTED)
set_property(
TARGET torch::xpurt PROPERTY INTERFACE_LINK_LIBRARIES
torch::sycl)
set_property(TARGET torch::xpurt PROPERTY INTERFACE_LINK_LIBRARIES torch::sycl)
# setting xpu arch flags
torch_xpu_get_arch_list(XPU_ARCH_FLAGS)
@ -39,7 +35,8 @@ set(TORCH_XPU_ARCH_LIST ${XPU_ARCH_FLAGS})
# Ensure USE_XPU is enabled.
string(APPEND XPU_HOST_CXX_FLAGS " -DUSE_XPU")
string(APPEND XPU_HOST_CXX_FLAGS " -DSYCL_COMPILER_VERSION=${SYCL_COMPILER_VERSION}")
string(APPEND XPU_HOST_CXX_FLAGS
" -DSYCL_COMPILER_VERSION=${SYCL_COMPILER_VERSION}")
if(DEFINED ENV{XPU_ENABLE_KINETO})
set(XPU_ENABLE_KINETO TRUE)
@ -53,4 +50,4 @@ if(WIN32)
endif()
else()
set(XPU_ENABLE_KINETO TRUE)
endif()
endif()

View File

@ -12,10 +12,12 @@ add_library(${PROJECT_NAME} MODULE ${FT_SOURCES})
target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
target_compile_definitions(${PROJECT_NAME} PRIVATE FUNCTORCH_BUILD_MAIN_LIB)
target_compile_definitions(${PROJECT_NAME} PRIVATE TORCH_EXTENSION_NAME=_C)
target_compile_definitions(${PROJECT_NAME} PRIVATE TORCH_API_INCLUDE_EXTENSION_H)
target_compile_definitions(${PROJECT_NAME}
PRIVATE TORCH_API_INCLUDE_EXTENSION_H)
target_compile_options(${PROJECT_NAME} PRIVATE ${TORCH_PYTHON_COMPILE_OPTIONS})
target_compile_options_if_supported(${PROJECT_NAME} "-Wmissing-prototypes")
target_compile_options_if_supported(${PROJECT_NAME} "-Werror=missing-prototypes")
target_compile_options_if_supported(${PROJECT_NAME}
"-Werror=missing-prototypes")
if(BUILD_LIBTORCHLESS)
target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIB} torch_python)
else()
@ -25,13 +27,16 @@ endif()
target_link_libraries(${PROJECT_NAME} PRIVATE pybind::pybind11)
set_target_properties(${PROJECT_NAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY
${CMAKE_BINARY_DIR}/functorch)
set_target_properties(${PROJECT_NAME} PROPERTIES INSTALL_RPATH "${_rpath_portable_origin}/../torch/lib")
${CMAKE_BINARY_DIR}/functorch)
set_target_properties(
${PROJECT_NAME} PROPERTIES INSTALL_RPATH
"${_rpath_portable_origin}/../torch/lib")
# Copy-pasted prefix/suffix logic for Python extensions from
# https://github.com/pytorch/pytorch/blob/33bb8ae350611760139457b85842b1d7edf9aa11/caffe2/CMakeLists.txt#L1975
# https://github.com/pytorch/pytorch/blob/33bb8ae350611760139457b85842b1d7edf9aa11/caffe2/CMakeLists.txt#L2022
# TODO: It would be good to be able to use Python3_add_library target, but it does not work in many cases
# TODO: It would be good to be able to use Python3_add_library target, but it
# does not work in many cases
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" DEBUG_POSTFIX "")
if(WIN32)
set_target_properties(${PROJECT_NAME} PROPERTIES SUFFIX ".pyd")
@ -40,6 +45,7 @@ else()
endif()
# Needed to link functorch on MacOS
if(NOT ${TORCH_PYTHON_LINK_FLAGS} STREQUAL "")
set_target_properties(${PROJECT_NAME} PROPERTIES LINK_FLAGS ${TORCH_PYTHON_LINK_FLAGS})
set_target_properties(${PROJECT_NAME} PROPERTIES LINK_FLAGS
${TORCH_PYTHON_LINK_FLAGS})
endif()
install(TARGETS ${PROJECT_NAME} DESTINATION "${CMAKE_CURRENT_SOURCE_DIR}")

View File

@ -2,24 +2,21 @@ set(AOTI_ABI_CHECK_TEST_ROOT ${TORCH_ROOT}/test/cpp/aoti_abi_check)
# Build the cpp gtest binary containing the cpp-only tests.
set(AOTI_ABI_CHECK_TEST_SRCS
${AOTI_ABI_CHECK_TEST_ROOT}/main.cpp
${AOTI_ABI_CHECK_TEST_ROOT}/test_cast.cpp
${AOTI_ABI_CHECK_TEST_ROOT}/test_dtype.cpp
${AOTI_ABI_CHECK_TEST_ROOT}/test_macros.cpp
${AOTI_ABI_CHECK_TEST_ROOT}/test_math.cpp
${AOTI_ABI_CHECK_TEST_ROOT}/test_rand.cpp
${AOTI_ABI_CHECK_TEST_ROOT}/test_vec.cpp
)
${AOTI_ABI_CHECK_TEST_ROOT}/main.cpp
${AOTI_ABI_CHECK_TEST_ROOT}/test_cast.cpp
${AOTI_ABI_CHECK_TEST_ROOT}/test_dtype.cpp
${AOTI_ABI_CHECK_TEST_ROOT}/test_macros.cpp
${AOTI_ABI_CHECK_TEST_ROOT}/test_math.cpp
${AOTI_ABI_CHECK_TEST_ROOT}/test_rand.cpp
${AOTI_ABI_CHECK_TEST_ROOT}/test_vec.cpp)
add_executable(test_aoti_abi_check
${AOTI_ABI_CHECK_TEST_SRCS}
)
add_executable(test_aoti_abi_check ${AOTI_ABI_CHECK_TEST_SRCS})
# TODO temporary until we can delete the old gtest polyfills.
target_compile_definitions(test_aoti_abi_check PRIVATE USE_GTEST)
# WARNING: DO NOT LINK torch!!!
# The purpose is to check if the used aten/c10 headers are writtern in a header-only way
# WARNING: DO NOT LINK torch!!! The purpose is to check if the used aten/c10
# headers are writtern in a header-only way
target_link_libraries(test_aoti_abi_check PRIVATE gtest_main)
target_include_directories(test_aoti_abi_check PRIVATE ${ATen_CPU_INCLUDE})
@ -27,6 +24,9 @@ if(INSTALL_TEST)
install(TARGETS test_aoti_abi_check DESTINATION bin)
# Install PDB files for MSVC builds
if(MSVC AND BUILD_SHARED_LIBS)
install(FILES $<TARGET_PDB_FILE:test_aoti_abi_check> DESTINATION bin OPTIONAL)
install(
FILES $<TARGET_PDB_FILE:test_aoti_abi_check>
DESTINATION bin
OPTIONAL)
endif()
endif()

View File

@ -1,47 +1,40 @@
set(AOT_INDUCTOR_TEST_ROOT ${TORCH_ROOT}/test/cpp/aoti_inference)
# Build custom TorchScript op for AOTInductor
add_library(aoti_custom_class SHARED aoti_custom_class.cpp)
set_target_properties(aoti_custom_class PROPERTIES
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
set_target_properties(aoti_custom_class PROPERTIES LIBRARY_OUTPUT_DIRECTORY
${CMAKE_CURRENT_BINARY_DIR})
if(USE_CUDA)
target_compile_definitions(aoti_custom_class PRIVATE USE_CUDA)
elseif(USE_ROCM)
target_compile_definitions(aoti_custom_class PRIVATE USE_ROCM)
target_compile_definitions(aoti_custom_class PRIVATE USE_ROCM)
endif()
# Link against LibTorch
target_link_libraries(aoti_custom_class torch)
# the custom command that generates the TorchScript module
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/script_data.pt
${CMAKE_CURRENT_BINARY_DIR}/script_model_cpu.pt
${CMAKE_CURRENT_BINARY_DIR}/script_model_cuda.pt
# This script requires the torch package to be installed.
COMMAND python ${AOT_INDUCTOR_TEST_ROOT}/compile_model.py
DEPENDS torch torch_python aoti_custom_class ${AOT_INDUCTOR_TEST_ROOT}/compile_model.py
)
add_custom_target(aoti_script_model ALL
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/script_data.pt
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/script_model_cpu.pt
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/script_model_cuda.pt
)
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/script_data.pt
${CMAKE_CURRENT_BINARY_DIR}/script_model_cpu.pt
${CMAKE_CURRENT_BINARY_DIR}/script_model_cuda.pt
# This script requires the torch package to be installed.
COMMAND python ${AOT_INDUCTOR_TEST_ROOT}/compile_model.py
DEPENDS torch torch_python aoti_custom_class
${AOT_INDUCTOR_TEST_ROOT}/compile_model.py)
add_custom_target(
aoti_script_model ALL
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/script_data.pt
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/script_model_cpu.pt
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/script_model_cuda.pt)
add_dependencies(aoti_script_model aoti_custom_class)
# Build the cpp gtest binary containing the cpp-only tests.
set(INDUCTOR_TEST_SRCS
${AOT_INDUCTOR_TEST_ROOT}/test.cpp
)
set(INDUCTOR_TEST_SRCS ${AOT_INDUCTOR_TEST_ROOT}/test.cpp)
add_executable(test_aoti_inference
${TORCH_ROOT}/test/cpp/common/main.cpp
${INDUCTOR_TEST_SRCS}
data.pt
script_data.pt
script_model_cpu.pt
script_model_cuda.pt
)
add_executable(
test_aoti_inference
${TORCH_ROOT}/test/cpp/common/main.cpp ${INDUCTOR_TEST_SRCS} data.pt
script_data.pt script_model_cpu.pt script_model_cuda.pt)
add_dependencies(test_aoti_inference aoti_custom_class aoti_script_model)
# TODO temporary until we can delete the old gtest polyfills.
@ -49,32 +42,32 @@ target_compile_definitions(test_aoti_inference PRIVATE USE_GTEST)
# Define a custom command to generate the library
add_custom_command(
OUTPUT data.pt
COMMAND python ${AOT_INDUCTOR_TEST_ROOT}/test.py
DEPENDS ${AOT_INDUCTOR_TEST_ROOT}/test.py
)
OUTPUT data.pt
COMMAND python ${AOT_INDUCTOR_TEST_ROOT}/test.py
DEPENDS ${AOT_INDUCTOR_TEST_ROOT}/test.py)
target_link_libraries(test_aoti_inference PRIVATE
torch
gtest_main
-Wl,--no-as-needed aoti_custom_class
)
target_link_libraries(
test_aoti_inference PRIVATE torch gtest_main -Wl,--no-as-needed
aoti_custom_class)
if(USE_CUDA)
target_include_directories(test_aoti_inference PRIVATE ${ATen_CUDA_INCLUDE})
target_compile_definitions(test_aoti_inference PRIVATE USE_CUDA)
elseif(USE_ROCM)
target_include_directories(test_aoti_inference PRIVATE ${ATen_HIP_INCLUDE})
target_compile_definitions(test_aoti_inference PRIVATE USE_ROCM)
target_include_directories(test_aoti_inference PRIVATE ${ATen_HIP_INCLUDE})
target_compile_definitions(test_aoti_inference PRIVATE USE_ROCM)
endif()
target_compile_definitions(test_aoti_inference PRIVATE
CMAKE_CURRENT_BINARY_DIR=${CMAKE_CURRENT_BINARY_DIR}
)
target_compile_definitions(
test_aoti_inference
PRIVATE CMAKE_CURRENT_BINARY_DIR=${CMAKE_CURRENT_BINARY_DIR})
if(INSTALL_TEST)
install(TARGETS test_aoti_inference DESTINATION bin)
# Install PDB files for MSVC builds
if(MSVC AND BUILD_SHARED_LIBS)
install(FILES $<TARGET_PDB_FILE:test_aoti_inference> DESTINATION bin OPTIONAL)
install(
FILES $<TARGET_PDB_FILE:test_aoti_inference>
DESTINATION bin
OPTIONAL)
endif()
endif()

View File

@ -1,49 +1,48 @@
set(TORCH_API_TEST_DIR "${TORCH_ROOT}/test/cpp/api")
set(TORCH_API_TEST_SOURCES
${TORCH_ROOT}/test/cpp/common/main.cpp
${TORCH_API_TEST_DIR}/autograd.cpp
${TORCH_API_TEST_DIR}/any.cpp
${TORCH_API_TEST_DIR}/dataloader.cpp
${TORCH_API_TEST_DIR}/enum.cpp
${TORCH_API_TEST_DIR}/expanding-array.cpp
${TORCH_API_TEST_DIR}/fft.cpp
${TORCH_API_TEST_DIR}/functional.cpp
${TORCH_API_TEST_DIR}/init.cpp
${TORCH_API_TEST_DIR}/integration.cpp
${TORCH_API_TEST_DIR}/ivalue.cpp
${TORCH_API_TEST_DIR}/jit.cpp
${TORCH_API_TEST_DIR}/memory.cpp
${TORCH_API_TEST_DIR}/meta_tensor.cpp
${TORCH_API_TEST_DIR}/misc.cpp
${TORCH_API_TEST_DIR}/module.cpp
${TORCH_API_TEST_DIR}/moduledict.cpp
${TORCH_API_TEST_DIR}/modulelist.cpp
${TORCH_API_TEST_DIR}/modules.cpp
${TORCH_API_TEST_DIR}/nested.cpp
${TORCH_API_TEST_DIR}/parameterdict.cpp
${TORCH_API_TEST_DIR}/parameterlist.cpp
${TORCH_API_TEST_DIR}/namespace.cpp
${TORCH_API_TEST_DIR}/nn_utils.cpp
${TORCH_API_TEST_DIR}/optim.cpp
${TORCH_API_TEST_DIR}/ordered_dict.cpp
${TORCH_API_TEST_DIR}/rnn.cpp
${TORCH_API_TEST_DIR}/sequential.cpp
${TORCH_API_TEST_DIR}/transformer.cpp
${TORCH_API_TEST_DIR}/serialize.cpp
${TORCH_API_TEST_DIR}/special.cpp
${TORCH_API_TEST_DIR}/static.cpp
${TORCH_API_TEST_DIR}/support.cpp
${TORCH_API_TEST_DIR}/tensor_cuda.cpp
${TORCH_API_TEST_DIR}/tensor_indexing.cpp
${TORCH_API_TEST_DIR}/tensor_options_cuda.cpp
${TORCH_API_TEST_DIR}/tensor_options.cpp
${TORCH_API_TEST_DIR}/tensor.cpp
${TORCH_API_TEST_DIR}/torch_include.cpp
${TORCH_API_TEST_DIR}/inference_mode.cpp
${TORCH_API_TEST_DIR}/grad_mode.cpp
${TORCH_API_TEST_DIR}/operations.cpp
${TORCH_API_TEST_DIR}/nested_int.cpp
)
${TORCH_ROOT}/test/cpp/common/main.cpp
${TORCH_API_TEST_DIR}/autograd.cpp
${TORCH_API_TEST_DIR}/any.cpp
${TORCH_API_TEST_DIR}/dataloader.cpp
${TORCH_API_TEST_DIR}/enum.cpp
${TORCH_API_TEST_DIR}/expanding-array.cpp
${TORCH_API_TEST_DIR}/fft.cpp
${TORCH_API_TEST_DIR}/functional.cpp
${TORCH_API_TEST_DIR}/init.cpp
${TORCH_API_TEST_DIR}/integration.cpp
${TORCH_API_TEST_DIR}/ivalue.cpp
${TORCH_API_TEST_DIR}/jit.cpp
${TORCH_API_TEST_DIR}/memory.cpp
${TORCH_API_TEST_DIR}/meta_tensor.cpp
${TORCH_API_TEST_DIR}/misc.cpp
${TORCH_API_TEST_DIR}/module.cpp
${TORCH_API_TEST_DIR}/moduledict.cpp
${TORCH_API_TEST_DIR}/modulelist.cpp
${TORCH_API_TEST_DIR}/modules.cpp
${TORCH_API_TEST_DIR}/nested.cpp
${TORCH_API_TEST_DIR}/parameterdict.cpp
${TORCH_API_TEST_DIR}/parameterlist.cpp
${TORCH_API_TEST_DIR}/namespace.cpp
${TORCH_API_TEST_DIR}/nn_utils.cpp
${TORCH_API_TEST_DIR}/optim.cpp
${TORCH_API_TEST_DIR}/ordered_dict.cpp
${TORCH_API_TEST_DIR}/rnn.cpp
${TORCH_API_TEST_DIR}/sequential.cpp
${TORCH_API_TEST_DIR}/transformer.cpp
${TORCH_API_TEST_DIR}/serialize.cpp
${TORCH_API_TEST_DIR}/special.cpp
${TORCH_API_TEST_DIR}/static.cpp
${TORCH_API_TEST_DIR}/support.cpp
${TORCH_API_TEST_DIR}/tensor_cuda.cpp
${TORCH_API_TEST_DIR}/tensor_indexing.cpp
${TORCH_API_TEST_DIR}/tensor_options_cuda.cpp
${TORCH_API_TEST_DIR}/tensor_options.cpp
${TORCH_API_TEST_DIR}/tensor.cpp
${TORCH_API_TEST_DIR}/torch_include.cpp
${TORCH_API_TEST_DIR}/inference_mode.cpp
${TORCH_API_TEST_DIR}/grad_mode.cpp
${TORCH_API_TEST_DIR}/operations.cpp
${TORCH_API_TEST_DIR}/nested_int.cpp)
if(USE_CUDA OR USE_ROCM)
list(APPEND TORCH_API_TEST_SOURCES ${TORCH_API_TEST_DIR}/parallel.cpp)
endif()
@ -57,8 +56,8 @@ if(USE_CUDA)
endif()
if(NOT MSVC)
# Clang has an unfixed bug leading to spurious missing braces
# warnings, see https://bugs.llvm.org/show_bug.cgi?id=21629
# Clang has an unfixed bug leading to spurious missing braces warnings, see
# https://bugs.llvm.org/show_bug.cgi?id=21629
target_compile_options_if_supported(test_api "-Wno-missing-braces")
# Considered to be flaky. See the discussion at
# https://github.com/pytorch/pytorch/pull/9608
@ -67,17 +66,24 @@ if(NOT MSVC)
target_compile_options_if_supported(test_api "-Wno-unused-but-set-parameter")
# Add -Wno-error=nonnull for GCC 12+
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12)
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION
VERSION_GREATER_EQUAL 12)
target_compile_options_if_supported(test_api "-Wno-error=nonnull")
endif()
endif()
if(INSTALL_TEST)
set_target_properties(test_api PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
set_target_properties(
test_api
PROPERTIES INSTALL_RPATH
"${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
install(TARGETS test_api DESTINATION bin)
# Install PDB files for MSVC builds
if(MSVC AND BUILD_SHARED_LIBS)
install(FILES $<TARGET_PDB_FILE:test_api> DESTINATION bin OPTIONAL)
install(
FILES $<TARGET_PDB_FILE:test_api>
DESTINATION bin
OPTIONAL)
endif()
endif()

View File

@ -1,6 +1,7 @@
if(USE_CUDA)
add_library(c10d_cuda_test CUDATest.cu)
target_include_directories(c10d_cuda_test PRIVATE $<BUILD_INTERFACE:${TORCH_SRC_DIR}/csrc/distributed>)
target_include_directories(
c10d_cuda_test PRIVATE $<BUILD_INTERFACE:${TORCH_SRC_DIR}/csrc/distributed>)
target_link_libraries(c10d_cuda_test torch_cuda)
add_dependencies(c10d_cuda_test torch_cuda)
endif()
@ -12,49 +13,82 @@ function(c10d_add_test test_src)
set(multiValues LINK_LIBRARIES)
include(CMakeParseArguments)
cmake_parse_arguments(${prefix} "${noValues}" "${singleValues}" "${multiValues}" ${ARGN})
cmake_parse_arguments(${prefix} "${noValues}" "${singleValues}"
"${multiValues}" ${ARGN})
get_filename_component(test_name ${test_src} NAME_WE)
add_executable(${test_name} "${test_src}")
target_include_directories(${test_name} PRIVATE
target_include_directories(
${test_name}
PRIVATE
$<BUILD_INTERFACE:${TORCH_SRC_DIR}/csrc/distributed>
$<TARGET_PROPERTY:fmt::fmt-header-only,INTERFACE_INCLUDE_DIRECTORIES>
)
target_link_libraries(${test_name} PRIVATE
fmt::fmt-header-only
${ARG_LINK_LIBRARIES}
)
$<TARGET_PROPERTY:fmt::fmt-header-only,INTERFACE_INCLUDE_DIRECTORIES>)
target_link_libraries(${test_name} PRIVATE fmt::fmt-header-only
${ARG_LINK_LIBRARIES})
add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
if(ARG_INSTALL_TEST)
set_target_properties(${test_name} PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
set_target_properties(
${test_name}
PROPERTIES INSTALL_RPATH
"${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
install(TARGETS ${test_name} DESTINATION bin)
endif()
endfunction()
c10d_add_test(BackoffTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST OFF)
c10d_add_test(FileStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST})
c10d_add_test(TCPStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST})
c10d_add_test(BackoffTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST
OFF)
c10d_add_test(FileStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main
INSTALL_TEST ${INSTALL_TEST})
c10d_add_test(TCPStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST
${INSTALL_TEST})
if(NOT WIN32)
c10d_add_test(HashStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST})
c10d_add_test(HashStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main
INSTALL_TEST ${INSTALL_TEST})
endif()
if(USE_CUDA)
if(USE_GLOO AND USE_C10D_GLOO)
c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main INSTALL_TEST ${INSTALL_TEST})
c10d_add_test(ProcessGroupGlooAsyncTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main INSTALL_TEST ${INSTALL_TEST})
c10d_add_test(
ProcessGroupGlooTest.cpp
LINK_LIBRARIES
torch_cpu
c10d_cuda_test
gtest_main
INSTALL_TEST
${INSTALL_TEST})
c10d_add_test(
ProcessGroupGlooAsyncTest.cpp
LINK_LIBRARIES
torch_cpu
c10d_cuda_test
gtest_main
INSTALL_TEST
${INSTALL_TEST})
endif()
if(USE_NCCL AND USE_C10D_NCCL)
# NCCL is a private dependency of libtorch, but the tests include some
# private headers of libtorch, which in turn include NCCL. As a hacky
# alternative to making NCCL a public dependency of libtorch, we make it
# a private dependency of the tests as well.
# alternative to making NCCL a public dependency of libtorch, we make it a
# private dependency of the tests as well.
c10d_add_test(
ProcessGroupNCCLTest.cpp
LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
LINK_LIBRARIES
torch_cpu
c10d_cuda_test
gtest_main
__caffe2_nccl
INSTALL_TEST
${INSTALL_TEST})
c10d_add_test(
ProcessGroupNCCLErrorsTest.cpp
LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
LINK_LIBRARIES
torch_cpu
c10d_cuda_test
gtest_main
__caffe2_nccl
INSTALL_TEST
${INSTALL_TEST})
if(INSTALL_TEST)
install(TARGETS c10d_cuda_test DESTINATION lib)
endif()
@ -62,33 +96,45 @@ if(USE_CUDA)
if(USE_UCC AND USE_C10D_UCC)
# UCC is a private dependency of libtorch, but the tests include some
# private headers of libtorch, which in turn include UCC. As a hacky
# alternative to making UCC a public dependency of libtorch, we make it
# a private dependency of the tests as well.
# alternative to making UCC a public dependency of libtorch, we make it a
# private dependency of the tests as well.
c10d_add_test(
ProcessGroupUCCTest.cpp
LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_ucc INSTALL_TEST ${INSTALL_TEST})
LINK_LIBRARIES
torch_cpu
c10d_cuda_test
gtest_main
__caffe2_ucc
INSTALL_TEST
${INSTALL_TEST})
if(INSTALL_TEST)
install(TARGETS c10d_cuda_test DESTINATION lib)
endif()
endif()
else()
if(USE_GLOO AND USE_C10D_GLOO)
c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST OFF)
c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu gtest_main
INSTALL_TEST OFF)
endif()
endif()
if(USE_MPI AND USE_C10D_MPI)
add_definitions(-DMPIEXEC=${MPIEXEC})
# MPI is a private dependency of libtorch, but the tests include some
# private headers of libtorch, which in turn include MPI. As a hacky
# alternative to making MPI a public dependency of libtorch, we make it
# a private dependency of the tests as well.
c10d_add_test(ProcessGroupMPITest.cpp LINK_LIBRARIES torch_cpu MPI::MPI_CXX INSTALL_TEST ${INSTALL_TEST})
# MPI is a private dependency of libtorch, but the tests include some private
# headers of libtorch, which in turn include MPI. As a hacky alternative to
# making MPI a public dependency of libtorch, we make it a private dependency
# of the tests as well.
c10d_add_test(ProcessGroupMPITest.cpp LINK_LIBRARIES torch_cpu MPI::MPI_CXX
INSTALL_TEST ${INSTALL_TEST})
endif()
if(LINUX AND USE_GLOO AND USE_C10D_GLOO)
if(LINUX
AND USE_GLOO
AND USE_C10D_GLOO)
add_executable(example_allreduce example/allreduce.cpp)
target_include_directories(example_allreduce PRIVATE $<BUILD_INTERFACE:${TORCH_SRC_DIR}/csrc/distributed>)
target_include_directories(
example_allreduce
PRIVATE $<BUILD_INTERFACE:${TORCH_SRC_DIR}/csrc/distributed>)
target_link_libraries(example_allreduce torch_cpu)
if(USE_CUDA)
target_link_libraries(example_allreduce torch_cuda)

View File

@ -1,9 +1,8 @@
if(USE_DISTRIBUTED AND NOT WIN32)
set(DIST_AUTOGRAD_TEST_DIR "${TORCH_ROOT}/test/cpp/dist_autograd")
set(DIST_AUTOGRAD_TEST_SOURCES
${TORCH_ROOT}/test/cpp/common/main.cpp
${DIST_AUTOGRAD_TEST_DIR}/test_dist_autograd.cpp
)
${TORCH_ROOT}/test/cpp/common/main.cpp
${DIST_AUTOGRAD_TEST_DIR}/test_dist_autograd.cpp)
add_executable(test_dist_autograd ${DIST_AUTOGRAD_TEST_SOURCES})
target_include_directories(test_dist_autograd PRIVATE ${ATen_CPU_INCLUDE})
@ -14,11 +13,17 @@ if(USE_DISTRIBUTED AND NOT WIN32)
endif()
if(INSTALL_TEST)
set_target_properties(test_dist_autograd PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
set_target_properties(
test_dist_autograd
PROPERTIES INSTALL_RPATH
"${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
install(TARGETS test_dist_autograd DESTINATION bin)
# Install PDB files for MSVC builds
if(MSVC AND BUILD_SHARED_LIBS)
install(FILES $<TARGET_PDB_FILE:test_dist_autograd> DESTINATION bin OPTIONAL)
install(
FILES $<TARGET_PDB_FILE:test_dist_autograd>
DESTINATION bin
OPTIONAL)
endif()
endif()
endif()

View File

@ -1,33 +1,30 @@
set(JIT_TEST_ROOT ${TORCH_ROOT}/test/cpp/jit)
# Build separate libraries the define custom classes/operators used from our Python tests.
# These are intended to be used with torch.ops.load_library() in our Python test suite.
add_library(torchbind_test SHARED
${JIT_TEST_ROOT}/test_custom_class_registrations.h
${JIT_TEST_ROOT}/test_custom_class_registrations.cpp
)
# Build separate libraries the define custom classes/operators used from our
# Python tests. These are intended to be used with torch.ops.load_library() in
# our Python test suite.
add_library(
torchbind_test SHARED ${JIT_TEST_ROOT}/test_custom_class_registrations.h
${JIT_TEST_ROOT}/test_custom_class_registrations.cpp)
target_link_libraries(torchbind_test torch)
add_library(jitbackend_test SHARED ${JIT_TEST_ROOT}/test_backend_lib.cpp)
target_link_libraries(jitbackend_test torch)
set(BACKEND_WITH_COMPILER_SRCS
${JIT_TEST_ROOT}/test_backend_compiler_lib.cpp
${JIT_TEST_ROOT}/test_backend_compiler_preprocess.cpp
)
${JIT_TEST_ROOT}/test_backend_compiler_lib.cpp
${JIT_TEST_ROOT}/test_backend_compiler_preprocess.cpp)
if(USE_KINETO)
# Testing edge profiler for backend use
# profiler_edge should only be added when USE_KINETO flag is on
# Testing edge profiler for backend use profiler_edge should only be added
# when USE_KINETO flag is on
list(APPEND BACKEND_WITH_COMPILER_SRCS
${TORCH_SRC_DIR}/csrc/jit/mobile/profiler_edge.cpp)
${TORCH_SRC_DIR}/csrc/jit/mobile/profiler_edge.cpp)
endif()
add_library(backend_with_compiler SHARED
${BACKEND_WITH_COMPILER_SRCS}
)
add_library(backend_with_compiler SHARED ${BACKEND_WITH_COMPILER_SRCS})
if(USE_KINETO)
set_target_properties(backend_with_compiler PROPERTIES COMPILE_FLAGS
"-DUSE_KINETO")
"-DUSE_KINETO")
endif()
target_link_libraries(backend_with_compiler torch)
@ -39,68 +36,64 @@ endif()
# Build the cpp gtest binary containing the cpp-only tests.
set(JIT_TEST_SRCS
${JIT_TEST_ROOT}/source_range_test.cpp
${JIT_TEST_ROOT}/test_add_if_then_else.cpp
${JIT_TEST_ROOT}/test_alias_analysis.cpp
${JIT_TEST_ROOT}/test_argument_spec.cpp
${JIT_TEST_ROOT}/test_autodiff.cpp
${JIT_TEST_ROOT}/test_load_upgraders.cpp
${JIT_TEST_ROOT}/test_op_replacement.cpp
${JIT_TEST_ROOT}/test_upgrader_utils.cpp
${JIT_TEST_ROOT}/test_backend.cpp
${JIT_TEST_ROOT}/test_class_import.cpp
${JIT_TEST_ROOT}/test_class_parser.cpp
${JIT_TEST_ROOT}/test_class_type.cpp
${JIT_TEST_ROOT}/test_code_template.cpp
${JIT_TEST_ROOT}/test_concat_opt.cpp
${JIT_TEST_ROOT}/test_constant_pooling.cpp
${JIT_TEST_ROOT}/test_cleanup_passes.cpp
${JIT_TEST_ROOT}/test_create_autodiff_subgraphs.cpp
${JIT_TEST_ROOT}/test_custom_class.cpp
${JIT_TEST_ROOT}/test_custom_class_registrations.h
${JIT_TEST_ROOT}/test_custom_class_registrations.cpp
${JIT_TEST_ROOT}/test_custom_operators.cpp
${JIT_TEST_ROOT}/test_dce.cpp
${JIT_TEST_ROOT}/test_fuser.cpp
${JIT_TEST_ROOT}/test_graph_executor.cpp
${JIT_TEST_ROOT}/test_graph_iterator.cpp
${JIT_TEST_ROOT}/test_cs_debug_info_serialization.cpp
${JIT_TEST_ROOT}/test_inliner.cpp
${JIT_TEST_ROOT}/test_interface.cpp
${JIT_TEST_ROOT}/test_interpreter.cpp
${JIT_TEST_ROOT}/test_ir.cpp
${JIT_TEST_ROOT}/test_irparser.cpp
${JIT_TEST_ROOT}/test_jit_type.cpp
${JIT_TEST_ROOT}/test_lexer.cpp
${JIT_TEST_ROOT}/test_lite_interpreter.cpp
${JIT_TEST_ROOT}/test_lite_interpreter_direct.cpp
${JIT_TEST_ROOT}/test_lite_trainer.cpp
${JIT_TEST_ROOT}/test_memory_dag.cpp
${JIT_TEST_ROOT}/test_misc.cpp
${JIT_TEST_ROOT}/test_mobile_type_parser.cpp
${JIT_TEST_ROOT}/test_module_api.cpp
${JIT_TEST_ROOT}/test_peephole_optimize.cpp
${JIT_TEST_ROOT}/test_qualified_name.cpp
${JIT_TEST_ROOT}/test_save_load.cpp
${JIT_TEST_ROOT}/test_schema_info.cpp
${JIT_TEST_ROOT}/test_schema_matching.cpp
${JIT_TEST_ROOT}/test_stack_opt.cpp
${JIT_TEST_ROOT}/test_subgraph_matcher.cpp
${JIT_TEST_ROOT}/test_subgraph_rewriter.cpp
${JIT_TEST_ROOT}/test_subgraph_utils.cpp
${JIT_TEST_ROOT}/test_union.cpp
${JIT_TEST_ROOT}/test_utils.cpp
${JIT_TEST_ROOT}/test_script_profile.cpp
${JIT_TEST_ROOT}/test_shape_analysis.cpp
${JIT_TEST_ROOT}/test_jit_logging_levels.cpp
${JIT_TEST_ROOT}/test_file_format.cpp
${JIT_TEST_ROOT}/test_flatbuffer.cpp
)
${JIT_TEST_ROOT}/source_range_test.cpp
${JIT_TEST_ROOT}/test_add_if_then_else.cpp
${JIT_TEST_ROOT}/test_alias_analysis.cpp
${JIT_TEST_ROOT}/test_argument_spec.cpp
${JIT_TEST_ROOT}/test_autodiff.cpp
${JIT_TEST_ROOT}/test_load_upgraders.cpp
${JIT_TEST_ROOT}/test_op_replacement.cpp
${JIT_TEST_ROOT}/test_upgrader_utils.cpp
${JIT_TEST_ROOT}/test_backend.cpp
${JIT_TEST_ROOT}/test_class_import.cpp
${JIT_TEST_ROOT}/test_class_parser.cpp
${JIT_TEST_ROOT}/test_class_type.cpp
${JIT_TEST_ROOT}/test_code_template.cpp
${JIT_TEST_ROOT}/test_concat_opt.cpp
${JIT_TEST_ROOT}/test_constant_pooling.cpp
${JIT_TEST_ROOT}/test_cleanup_passes.cpp
${JIT_TEST_ROOT}/test_create_autodiff_subgraphs.cpp
${JIT_TEST_ROOT}/test_custom_class.cpp
${JIT_TEST_ROOT}/test_custom_class_registrations.h
${JIT_TEST_ROOT}/test_custom_class_registrations.cpp
${JIT_TEST_ROOT}/test_custom_operators.cpp
${JIT_TEST_ROOT}/test_dce.cpp
${JIT_TEST_ROOT}/test_fuser.cpp
${JIT_TEST_ROOT}/test_graph_executor.cpp
${JIT_TEST_ROOT}/test_graph_iterator.cpp
${JIT_TEST_ROOT}/test_cs_debug_info_serialization.cpp
${JIT_TEST_ROOT}/test_inliner.cpp
${JIT_TEST_ROOT}/test_interface.cpp
${JIT_TEST_ROOT}/test_interpreter.cpp
${JIT_TEST_ROOT}/test_ir.cpp
${JIT_TEST_ROOT}/test_irparser.cpp
${JIT_TEST_ROOT}/test_jit_type.cpp
${JIT_TEST_ROOT}/test_lexer.cpp
${JIT_TEST_ROOT}/test_lite_interpreter.cpp
${JIT_TEST_ROOT}/test_lite_interpreter_direct.cpp
${JIT_TEST_ROOT}/test_lite_trainer.cpp
${JIT_TEST_ROOT}/test_memory_dag.cpp
${JIT_TEST_ROOT}/test_misc.cpp
${JIT_TEST_ROOT}/test_mobile_type_parser.cpp
${JIT_TEST_ROOT}/test_module_api.cpp
${JIT_TEST_ROOT}/test_peephole_optimize.cpp
${JIT_TEST_ROOT}/test_qualified_name.cpp
${JIT_TEST_ROOT}/test_save_load.cpp
${JIT_TEST_ROOT}/test_schema_info.cpp
${JIT_TEST_ROOT}/test_schema_matching.cpp
${JIT_TEST_ROOT}/test_stack_opt.cpp
${JIT_TEST_ROOT}/test_subgraph_matcher.cpp
${JIT_TEST_ROOT}/test_subgraph_rewriter.cpp
${JIT_TEST_ROOT}/test_subgraph_utils.cpp
${JIT_TEST_ROOT}/test_union.cpp
${JIT_TEST_ROOT}/test_utils.cpp
${JIT_TEST_ROOT}/test_script_profile.cpp
${JIT_TEST_ROOT}/test_shape_analysis.cpp
${JIT_TEST_ROOT}/test_jit_logging_levels.cpp
${JIT_TEST_ROOT}/test_file_format.cpp
${JIT_TEST_ROOT}/test_flatbuffer.cpp)
add_executable(test_jit
${TORCH_ROOT}/test/cpp/common/main.cpp
${JIT_TEST_SRCS}
)
add_executable(test_jit ${TORCH_ROOT}/test/cpp/common/main.cpp ${JIT_TEST_SRCS})
# We also build with UBSAN flag in build_asan.h
if(USE_ASAN)
@ -108,9 +101,7 @@ if(USE_ASAN)
target_link_libraries(test_jit PRIVATE "-fsanitize=undefined")
endif()
target_link_libraries(
test_jit PRIVATE flatbuffers)
target_link_libraries(test_jit PRIVATE flatbuffers)
# TODO temporary until we can delete the old gtest polyfills.
target_compile_definitions(test_jit PRIVATE USE_GTEST)
@ -125,7 +116,8 @@ if(USE_MKLDNN)
target_link_libraries(test_jit PRIVATE caffe2::mkldnn)
endif()
set(JIT_TEST_DEPENDENCIES torch gtest_main jitbackend_test backend_with_compiler gmock)
set(JIT_TEST_DEPENDENCIES torch gtest_main jitbackend_test
backend_with_compiler gmock)
if(MSVC)
list(APPEND JIT_TEST_DEPENDENCIES onnx_library)
@ -135,30 +127,39 @@ target_link_libraries(test_jit PRIVATE ${JIT_TEST_DEPENDENCIES})
target_include_directories(test_jit PRIVATE ${ATen_CPU_INCLUDE})
if(LINUX)
#Update to target_link_options when CMake version can be upgraded
target_link_libraries(test_jit PRIVATE "-Wl,--no-as-needed,$<TARGET_FILE:jitbackend_test>,$<TARGET_FILE:backend_with_compiler>,--as-needed")
# Update to target_link_options when CMake version can be upgraded
target_link_libraries(
test_jit
PRIVATE
"-Wl,--no-as-needed,$<TARGET_FILE:jitbackend_test>,$<TARGET_FILE:backend_with_compiler>,--as-needed"
)
endif()
if(USE_CUDA)
target_compile_definitions(test_jit PRIVATE USE_CUDA)
# Suppress sign compare checks for NVFUSER JIT tests
if(NOT MSVC)
target_compile_options(test_jit PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-Wno-sign-compare>)
target_compile_options(test_jit
PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-Wno-sign-compare>)
endif()
elseif(USE_ROCM)
target_link_libraries(test_jit PRIVATE
hiprtc::hiprtc
hip::amdhip64
${TORCH_CUDA_LIBRARIES})
target_link_libraries(test_jit PRIVATE hiprtc::hiprtc hip::amdhip64
${TORCH_CUDA_LIBRARIES})
target_compile_definitions(test_jit PRIVATE USE_ROCM)
endif()
if(INSTALL_TEST)
set_target_properties(test_jit PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
set_target_properties(
test_jit
PROPERTIES INSTALL_RPATH
"${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
install(TARGETS test_jit DESTINATION bin)
# Install PDB files for MSVC builds
if(MSVC AND BUILD_SHARED_LIBS)
install(FILES $<TARGET_PDB_FILE:test_jit> DESTINATION bin OPTIONAL)
install(
FILES $<TARGET_PDB_FILE:test_jit>
DESTINATION bin
OPTIONAL)
endif()
endif()

View File

@ -2,28 +2,23 @@ set(LAZY_TEST_ROOT ${TORCH_ROOT}/test/cpp/lazy)
# Build the cpp gtest binary containing the cpp-only tests.
set(LAZY_TEST_SRCS
${LAZY_TEST_ROOT}/test_backend_device.cpp
${LAZY_TEST_ROOT}/test_cache.cpp
${LAZY_TEST_ROOT}/test_ir.cpp
${LAZY_TEST_ROOT}/test_ir_util.cpp
${LAZY_TEST_ROOT}/test_misc.cpp
${LAZY_TEST_ROOT}/test_permutation_util.cpp
${LAZY_TEST_ROOT}/test_shape.cpp
${LAZY_TEST_ROOT}/test_trie_cache.cpp
${LAZY_TEST_ROOT}/test_util.cpp
${LAZY_TEST_ROOT}/test_lazy_graph_executor.cpp
)
${LAZY_TEST_ROOT}/test_backend_device.cpp
${LAZY_TEST_ROOT}/test_cache.cpp
${LAZY_TEST_ROOT}/test_ir.cpp
${LAZY_TEST_ROOT}/test_ir_util.cpp
${LAZY_TEST_ROOT}/test_misc.cpp
${LAZY_TEST_ROOT}/test_permutation_util.cpp
${LAZY_TEST_ROOT}/test_shape.cpp
${LAZY_TEST_ROOT}/test_trie_cache.cpp
${LAZY_TEST_ROOT}/test_util.cpp
${LAZY_TEST_ROOT}/test_lazy_graph_executor.cpp)
if(BUILD_LAZY_TS_BACKEND)
list(APPEND LAZY_TEST_SRCS
${LAZY_TEST_ROOT}/test_lazy_ops.cpp
${LAZY_TEST_ROOT}/test_lazy_ops_util.cpp
)
list(APPEND LAZY_TEST_SRCS ${LAZY_TEST_ROOT}/test_lazy_ops.cpp
${LAZY_TEST_ROOT}/test_lazy_ops_util.cpp)
endif()
add_executable(test_lazy
${TORCH_ROOT}/test/cpp/common/main.cpp
${LAZY_TEST_SRCS}
)
add_executable(test_lazy ${TORCH_ROOT}/test/cpp/common/main.cpp
${LAZY_TEST_SRCS})
# TODO temporary until we can delete the old gtest polyfills.
target_compile_definitions(test_lazy PRIVATE USE_GTEST)
@ -36,19 +31,23 @@ target_include_directories(test_lazy PRIVATE ${ATen_CPU_INCLUDE})
if(USE_CUDA)
target_compile_definitions(test_lazy PRIVATE USE_CUDA)
elseif(USE_ROCM)
target_link_libraries(test_lazy PRIVATE
hiprtc::hiprtc
hip::amdhip64
${TORCH_CUDA_LIBRARIES})
target_link_libraries(test_lazy PRIVATE hiprtc::hiprtc hip::amdhip64
${TORCH_CUDA_LIBRARIES})
target_compile_definitions(test_lazy PRIVATE USE_ROCM)
endif()
if(INSTALL_TEST)
set_target_properties(test_lazy PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
set_target_properties(
test_lazy
PROPERTIES INSTALL_RPATH
"${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
install(TARGETS test_lazy DESTINATION bin)
# Install PDB files for MSVC builds
if(MSVC AND BUILD_SHARED_LIBS)
install(FILES $<TARGET_PDB_FILE:test_lazy> DESTINATION bin OPTIONAL)
install(
FILES $<TARGET_PDB_FILE:test_lazy>
DESTINATION bin
OPTIONAL)
endif()
endif()

View File

@ -1,30 +1,30 @@
set(
LITE_INTERPRETER_RUNTIME_TEST_DIR
"${TORCH_ROOT}/test/cpp/lite_interpreter_runtime")
set(LITE_INTERPRETER_RUNTIME_TEST_DIR
${TORCH_ROOT}/test/cpp/lite_interpreter_runtime/main.cpp
${TORCH_ROOT}/test/cpp/lite_interpreter_runtime/test_lite_interpreter_runtime.cpp
${TORCH_ROOT}/test/cpp/lite_interpreter_runtime/test_mobile_profiler.cpp
)
"${TORCH_ROOT}/test/cpp/lite_interpreter_runtime")
set(LITE_INTERPRETER_RUNTIME_TEST_DIR
${TORCH_ROOT}/test/cpp/lite_interpreter_runtime/main.cpp
${TORCH_ROOT}/test/cpp/lite_interpreter_runtime/test_lite_interpreter_runtime.cpp
${TORCH_ROOT}/test/cpp/lite_interpreter_runtime/test_mobile_profiler.cpp)
add_library(backend_with_compiler_runtime SHARED
${TORCH_ROOT}/test/cpp/jit/test_backend_compiler_lib.cpp
${TORCH_ROOT}/torch/csrc/jit/backends/backend_interface.cpp
)
add_library(
backend_with_compiler_runtime SHARED
${TORCH_ROOT}/test/cpp/jit/test_backend_compiler_lib.cpp
${TORCH_ROOT}/torch/csrc/jit/backends/backend_interface.cpp)
target_link_libraries(backend_with_compiler_runtime PRIVATE torch)
add_executable(
test_lite_interpreter_runtime
${LITE_INTERPRETER_RUNTIME_TEST_DIR})
target_include_directories(
test_lite_interpreter_runtime PRIVATE
${ATen_CPU_INCLUDE}
)
add_executable(test_lite_interpreter_runtime
${LITE_INTERPRETER_RUNTIME_TEST_DIR})
target_include_directories(test_lite_interpreter_runtime
PRIVATE ${ATen_CPU_INCLUDE})
target_link_libraries(test_lite_interpreter_runtime PRIVATE torch gtest_main backend_with_compiler_runtime)
target_link_libraries(test_lite_interpreter_runtime
PRIVATE torch gtest_main backend_with_compiler_runtime)
if(LINUX)
target_link_libraries(test_lite_interpreter_runtime PRIVATE "-Wl,--no-as-needed,$<TARGET_FILE:backend_with_compiler_runtime>,--as-needed")
target_link_libraries(
test_lite_interpreter_runtime
PRIVATE
"-Wl,--no-as-needed,$<TARGET_FILE:backend_with_compiler_runtime>,--as-needed"
)
endif()
if(INSTALL_TEST)
@ -33,6 +33,7 @@ if(INSTALL_TEST)
if(MSVC AND BUILD_SHARED_LIBS)
install(
FILES $<TARGET_PDB_FILE:test_lite_interpreter_runtime>
DESTINATION bin OPTIONAL)
DESTINATION bin
OPTIONAL)
endif()
endif()

View File

@ -4,41 +4,38 @@ file(GLOB_RECURSE NATIVERT_ALL_TEST_FILES "${NATIVERT_TEST_ROOT}/test_*.cpp")
# Build the cpp gtest binary containing the cpp-only tests.
set(NATIVERT_TEST_SRCS
${NATIVERT_ALL_TEST_FILES}
${TORCH_ROOT}/torch/nativert/graph/TensorMeta.cpp
${TORCH_ROOT}/torch/nativert/graph/Graph.cpp
${TORCH_ROOT}/torch/nativert/graph/GraphSignature.cpp
${TORCH_ROOT}/torch/nativert/graph/Serialization.cpp
${TORCH_ROOT}/torch/nativert/executor/OpKernel.cpp
${TORCH_ROOT}/torch/nativert/executor/PlacementUtils.cpp
${TORCH_ROOT}/torch/nativert/executor/Weights.cpp
${TORCH_ROOT}/torch/nativert/common/FileUtil.cpp
${TORCH_ROOT}/torch/nativert/executor/memory/FunctionSchema.cpp
${TORCH_ROOT}/torch/nativert/executor/ExecutionPlanner.cpp
${TORCH_ROOT}/torch/nativert/detail/ITree.cpp
${TORCH_ROOT}/torch/nativert/executor/ExecutionFrame.cpp
${TORCH_ROOT}/torch/nativert/kernels/C10Kernel.cpp
${TORCH_ROOT}/torch/nativert/executor/memory/GreedyBySize.cpp
${TORCH_ROOT}/torch/nativert/executor/memory/Bump.cpp
${TORCH_ROOT}/torch/nativert/executor/memory/DisjointStorageGroups.cpp
${TORCH_ROOT}/torch/nativert/executor/memory/LayoutPlanner.cpp
${TORCH_ROOT}/torch/nativert/executor/memory/LayoutManager.cpp
${TORCH_ROOT}/torch/nativert/executor/memory/AliasAnalyzer.cpp
${TORCH_ROOT}/torch/nativert/executor/Executor.cpp
${TORCH_ROOT}/torch/nativert/kernels/KernelFactory.cpp
${TORCH_ROOT}/torch/nativert/executor/ConstantFolder.cpp
${TORCH_ROOT}/torch/nativert/executor/GraphExecutorBase.cpp
${TORCH_ROOT}/torch/nativert/executor/SerialGraphExecutor.cpp
${TORCH_ROOT}/torch/nativert/executor/ParallelGraphExecutor.cpp
${TORCH_ROOT}/torch/nativert/kernels/AutoFunctionalizeKernel.cpp
${TORCH_ROOT}/torch/nativert/kernels/CallTorchBindKernel.cpp
${TORCH_ROOT}/torch/nativert/kernels/HigherOrderKernel.cpp
)
${NATIVERT_ALL_TEST_FILES}
${TORCH_ROOT}/torch/nativert/graph/TensorMeta.cpp
${TORCH_ROOT}/torch/nativert/graph/Graph.cpp
${TORCH_ROOT}/torch/nativert/graph/GraphSignature.cpp
${TORCH_ROOT}/torch/nativert/graph/Serialization.cpp
${TORCH_ROOT}/torch/nativert/executor/OpKernel.cpp
${TORCH_ROOT}/torch/nativert/executor/PlacementUtils.cpp
${TORCH_ROOT}/torch/nativert/executor/Weights.cpp
${TORCH_ROOT}/torch/nativert/common/FileUtil.cpp
${TORCH_ROOT}/torch/nativert/executor/memory/FunctionSchema.cpp
${TORCH_ROOT}/torch/nativert/executor/ExecutionPlanner.cpp
${TORCH_ROOT}/torch/nativert/detail/ITree.cpp
${TORCH_ROOT}/torch/nativert/executor/ExecutionFrame.cpp
${TORCH_ROOT}/torch/nativert/kernels/C10Kernel.cpp
${TORCH_ROOT}/torch/nativert/executor/memory/GreedyBySize.cpp
${TORCH_ROOT}/torch/nativert/executor/memory/Bump.cpp
${TORCH_ROOT}/torch/nativert/executor/memory/DisjointStorageGroups.cpp
${TORCH_ROOT}/torch/nativert/executor/memory/LayoutPlanner.cpp
${TORCH_ROOT}/torch/nativert/executor/memory/LayoutManager.cpp
${TORCH_ROOT}/torch/nativert/executor/memory/AliasAnalyzer.cpp
${TORCH_ROOT}/torch/nativert/executor/Executor.cpp
${TORCH_ROOT}/torch/nativert/kernels/KernelFactory.cpp
${TORCH_ROOT}/torch/nativert/executor/ConstantFolder.cpp
${TORCH_ROOT}/torch/nativert/executor/GraphExecutorBase.cpp
${TORCH_ROOT}/torch/nativert/executor/SerialGraphExecutor.cpp
${TORCH_ROOT}/torch/nativert/executor/ParallelGraphExecutor.cpp
${TORCH_ROOT}/torch/nativert/kernels/AutoFunctionalizeKernel.cpp
${TORCH_ROOT}/torch/nativert/kernels/CallTorchBindKernel.cpp
${TORCH_ROOT}/torch/nativert/kernels/HigherOrderKernel.cpp)
add_executable(test_nativert
${TORCH_ROOT}/test/cpp/common/main.cpp
${NATIVERT_TEST_SRCS}
)
add_executable(test_nativert ${TORCH_ROOT}/test/cpp/common/main.cpp
${NATIVERT_TEST_SRCS})
# TODO temporary until we can delete the old gtest polyfills.
target_compile_definitions(test_nativert PRIVATE USE_GTEST)
@ -52,19 +49,23 @@ target_include_directories(test_nativert PRIVATE ${ATen_CPU_INCLUDE})
if(USE_CUDA)
target_compile_definitions(test_nativert PRIVATE USE_CUDA)
elseif(USE_ROCM)
target_link_libraries(test_nativert PRIVATE
hiprtc::hiprtc
hip::amdhip64
${TORCH_CUDA_LIBRARIES})
target_link_libraries(test_nativert PRIVATE hiprtc::hiprtc hip::amdhip64
${TORCH_CUDA_LIBRARIES})
target_compile_definitions(test_nativert PRIVATE USE_ROCM)
endif()
if(INSTALL_TEST)
set_target_properties(test_nativert PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
set_target_properties(
test_nativert
PROPERTIES INSTALL_RPATH
"${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
install(TARGETS test_nativert DESTINATION bin)
# Install PDB files for MSVC builds
if(MSVC AND BUILD_SHARED_LIBS)
install(FILES $<TARGET_PDB_FILE:test_nativert> DESTINATION bin OPTIONAL)
install(
FILES $<TARGET_PDB_FILE:test_nativert>
DESTINATION bin
OPTIONAL)
endif()
endif()

View File

@ -1,35 +1,25 @@
set(TORCH_RPC_TEST_DIR "${TORCH_ROOT}/test/cpp/rpc")
set(TORCH_RPC_TEST_SOURCES
${TORCH_ROOT}/test/cpp/common/main.cpp
${TORCH_RPC_TEST_DIR}/e2e_test_base.cpp
${TORCH_RPC_TEST_DIR}/test_wire_serialization.cpp
)
set(TORCH_RPC_TEST_DEPENDENCY_LIBS
torch gtest_main
)
${TORCH_ROOT}/test/cpp/common/main.cpp
${TORCH_RPC_TEST_DIR}/e2e_test_base.cpp
${TORCH_RPC_TEST_DIR}/test_wire_serialization.cpp)
set(TORCH_RPC_TEST_DEPENDENCY_LIBS torch gtest_main)
if(USE_GLOO)
list(APPEND TORCH_RPC_TEST_SOURCES
${TORCH_RPC_TEST_DIR}/test_e2e_tensorpipe.cpp
)
${TORCH_RPC_TEST_DIR}/test_e2e_tensorpipe.cpp)
endif()
if(USE_TENSORPIPE)
list(APPEND TORCH_RPC_TEST_SOURCES
${TORCH_RPC_TEST_DIR}/test_tensorpipe_serialization.cpp
)
list(APPEND TORCH_RPC_TEST_DEPENDENCY_LIBS
tensorpipe
)
${TORCH_RPC_TEST_DIR}/test_tensorpipe_serialization.cpp)
list(APPEND TORCH_RPC_TEST_DEPENDENCY_LIBS tensorpipe)
endif()
add_executable(test_cpp_rpc ${TORCH_RPC_TEST_SOURCES})
target_include_directories(test_cpp_rpc PRIVATE ${ATen_CPU_INCLUDE})
target_include_directories(
test_cpp_rpc PRIVATE
${ATen_CPU_INCLUDE})
target_include_directories(
test_cpp_rpc PRIVATE
$<BUILD_INTERFACE:${TORCH_SRC_DIR}/csrc/distributed>)
test_cpp_rpc PRIVATE $<BUILD_INTERFACE:${TORCH_SRC_DIR}/csrc/distributed>)
target_link_libraries(test_cpp_rpc PRIVATE ${TORCH_RPC_TEST_DEPENDENCY_LIBS})
if(USE_CUDA)
@ -37,10 +27,16 @@ if(USE_CUDA)
endif()
if(INSTALL_TEST)
set_target_properties(test_cpp_rpc PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
set_target_properties(
test_cpp_rpc
PROPERTIES INSTALL_RPATH
"${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
install(TARGETS test_cpp_rpc DESTINATION bin)
# Install PDB files for MSVC builds
if(MSVC AND BUILD_SHARED_LIBS)
install(FILES $<TARGET_PDB_FILE:test_cpp_rpc> DESTINATION bin OPTIONAL)
install(
FILES $<TARGET_PDB_FILE:test_cpp_rpc>
DESTINATION bin
OPTIONAL)
endif()
endif()

View File

@ -1,30 +1,29 @@
set(TENSOREXPR_TEST_ROOT ${TORCH_ROOT}/test/cpp/tensorexpr)
set(TENSOREXPR_TEST_SRCS
${TENSOREXPR_TEST_ROOT}/test_approx.cpp
${TENSOREXPR_TEST_ROOT}/test_aten.cpp
${TENSOREXPR_TEST_ROOT}/test_boundsinference.cpp
${TENSOREXPR_TEST_ROOT}/test_conv.cpp
${TENSOREXPR_TEST_ROOT}/test_cpp_codegen.cpp
${TENSOREXPR_TEST_ROOT}/test_dynamic_shapes.cpp
${TENSOREXPR_TEST_ROOT}/test_expr.cpp
${TENSOREXPR_TEST_ROOT}/test_external_calls.cpp
${TENSOREXPR_TEST_ROOT}/test_graph_opt.cpp
${TENSOREXPR_TEST_ROOT}/test_ir_printer.cpp
${TENSOREXPR_TEST_ROOT}/test_ir_verifier.cpp
${TENSOREXPR_TEST_ROOT}/test_kernel.cpp
${TENSOREXPR_TEST_ROOT}/test_loopnest.cpp
${TENSOREXPR_TEST_ROOT}/test_memdependency.cpp
${TENSOREXPR_TEST_ROOT}/test_ops.cpp
${TENSOREXPR_TEST_ROOT}/test_quantization.cpp
${TENSOREXPR_TEST_ROOT}/test_memplanning.cpp
${TENSOREXPR_TEST_ROOT}/test_reductions.cpp
${TENSOREXPR_TEST_ROOT}/test_registerizer.cpp
${TENSOREXPR_TEST_ROOT}/test_simplify.cpp
${TENSOREXPR_TEST_ROOT}/test_te_fuser_pass.cpp
${TENSOREXPR_TEST_ROOT}/test_type.cpp
${TENSOREXPR_TEST_ROOT}/test_type_specializations.cpp
)
${TENSOREXPR_TEST_ROOT}/test_approx.cpp
${TENSOREXPR_TEST_ROOT}/test_aten.cpp
${TENSOREXPR_TEST_ROOT}/test_boundsinference.cpp
${TENSOREXPR_TEST_ROOT}/test_conv.cpp
${TENSOREXPR_TEST_ROOT}/test_cpp_codegen.cpp
${TENSOREXPR_TEST_ROOT}/test_dynamic_shapes.cpp
${TENSOREXPR_TEST_ROOT}/test_expr.cpp
${TENSOREXPR_TEST_ROOT}/test_external_calls.cpp
${TENSOREXPR_TEST_ROOT}/test_graph_opt.cpp
${TENSOREXPR_TEST_ROOT}/test_ir_printer.cpp
${TENSOREXPR_TEST_ROOT}/test_ir_verifier.cpp
${TENSOREXPR_TEST_ROOT}/test_kernel.cpp
${TENSOREXPR_TEST_ROOT}/test_loopnest.cpp
${TENSOREXPR_TEST_ROOT}/test_memdependency.cpp
${TENSOREXPR_TEST_ROOT}/test_ops.cpp
${TENSOREXPR_TEST_ROOT}/test_quantization.cpp
${TENSOREXPR_TEST_ROOT}/test_memplanning.cpp
${TENSOREXPR_TEST_ROOT}/test_reductions.cpp
${TENSOREXPR_TEST_ROOT}/test_registerizer.cpp
${TENSOREXPR_TEST_ROOT}/test_simplify.cpp
${TENSOREXPR_TEST_ROOT}/test_te_fuser_pass.cpp
${TENSOREXPR_TEST_ROOT}/test_type.cpp
${TENSOREXPR_TEST_ROOT}/test_type_specializations.cpp)
if(USE_CUDA)
list(APPEND TENSOREXPR_TEST_SRCS ${TENSOREXPR_TEST_ROOT}/test_cuda.cpp)
@ -34,10 +33,10 @@ if(USE_LLVM AND LLVM_FOUND)
list(APPEND TENSOREXPR_TEST_SRCS ${TENSOREXPR_TEST_ROOT}/test_llvm.cpp)
endif()
add_executable(test_tensorexpr
add_executable(
test_tensorexpr
${TORCH_ROOT}/test/cpp/common/main.cpp
${TENSOREXPR_TEST_ROOT}/padded_buffer.cpp
${TENSOREXPR_TEST_SRCS})
${TENSOREXPR_TEST_ROOT}/padded_buffer.cpp ${TENSOREXPR_TEST_SRCS})
target_link_libraries(test_tensorexpr PRIVATE torch gtest_main)
target_include_directories(test_tensorexpr PRIVATE ${ATen_CPU_INCLUDE})
@ -57,27 +56,36 @@ if(USE_CUDA)
target_compile_definitions(test_tensorexpr PRIVATE USE_CUDA)
target_compile_definitions(tutorial_tensorexpr PRIVATE USE_CUDA)
elseif(USE_ROCM)
target_link_libraries(test_tensorexpr PRIVATE
hiprtc::hiprtc
hip::amdhip64
${TORCH_CUDA_LIBRARIES})
target_link_libraries(test_tensorexpr PRIVATE hiprtc::hiprtc hip::amdhip64
${TORCH_CUDA_LIBRARIES})
target_compile_definitions(test_tensorexpr PRIVATE USE_ROCM)
target_link_libraries(tutorial_tensorexpr PRIVATE
hiprtc::hiprtc
hip::amdhip64
${TORCH_CUDA_LIBRARIES})
target_link_libraries(
tutorial_tensorexpr PRIVATE hiprtc::hiprtc hip::amdhip64
${TORCH_CUDA_LIBRARIES})
target_compile_definitions(tutorial_tensorexpr PRIVATE USE_ROCM)
endif()
if(INSTALL_TEST)
set_target_properties(test_tensorexpr PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
set_target_properties(
test_tensorexpr
PROPERTIES INSTALL_RPATH
"${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
install(TARGETS test_tensorexpr DESTINATION bin)
set_target_properties(tutorial_tensorexpr PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
set_target_properties(
tutorial_tensorexpr
PROPERTIES INSTALL_RPATH
"${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
install(TARGETS tutorial_tensorexpr DESTINATION bin)
# Install PDB files for MSVC builds
if(MSVC AND BUILD_SHARED_LIBS)
install(FILES $<TARGET_PDB_FILE:test_tensorexpr> DESTINATION bin OPTIONAL)
install(FILES $<TARGET_PDB_FILE:tutorial_tensorexpr> DESTINATION bin OPTIONAL)
install(
FILES $<TARGET_PDB_FILE:test_tensorexpr>
DESTINATION bin
OPTIONAL)
install(
FILES $<TARGET_PDB_FILE:tutorial_tensorexpr>
DESTINATION bin
OPTIONAL)
endif()
endif()

View File

@ -1,8 +1,6 @@
set(LIBRARY_NAME torch_openreg)
file(GLOB_RECURSE SOURCE_FILES
"${CMAKE_CURRENT_SOURCE_DIR}/*.cpp"
)
file(GLOB_RECURSE SOURCE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp")
add_library(${LIBRARY_NAME} SHARED ${SOURCE_FILES})

View File

@ -1,8 +1,6 @@
set(LIBRARY_NAME openreg)
file(GLOB_RECURSE SOURCE_FILES
"${CMAKE_CURRENT_SOURCE_DIR}/*.cpp"
)
file(GLOB_RECURSE SOURCE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp")
add_library(${LIBRARY_NAME} SHARED ${SOURCE_FILES})

View File

@ -1,8 +1,6 @@
set(LIBRARY_NAME torch_bindings)
file(GLOB_RECURSE SOURCE_FILES
"${CMAKE_CURRENT_SOURCE_DIR}/*.cpp"
)
file(GLOB_RECURSE SOURCE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp")
add_library(${LIBRARY_NAME} SHARED ${SOURCE_FILES})

View File

@ -3,8 +3,8 @@ cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
project(custom_backend)
if(USE_ROCM)
include(utils)
include(LoadHIP)
include(utils)
include(LoadHIP)
endif()
find_package(Torch REQUIRED)

View File

@ -3,8 +3,8 @@ cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
project(custom_ops)
if(USE_ROCM)
include(utils)
include(LoadHIP)
include(utils)
include(LoadHIP)
endif()
find_package(Torch REQUIRED)

View File

@ -1,5 +1,6 @@
# Build separate libraries the define custom classes/operators used from our Python tests.
# These are intended to be used with torch.ops.load_library() in our Python test suite.
# Build separate libraries the define custom classes/operators used from our
# Python tests. These are intended to be used with torch.ops.load_library() in
# our Python test suite.
add_library(aoti_custom_ops SHARED custom_ops.cpp)
target_link_libraries(aoti_custom_ops torch)

View File

@ -5,9 +5,8 @@ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/test/lib)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/test/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/test/bin)
# TODO(voz): Fix hack below
# Start hack
list(APPEND policies_new CMP0079)
# TODO(voz): Fix hack below Start hack
list(APPEND policies_new CMP0079)
foreach(policy ${policies_new})
if(POLICY ${policy})
@ -16,32 +15,37 @@ foreach(policy ${policies_new})
endforeach()
# End hack
################################
# ##############################################################################
# GTest
################################
# ##############################################################################
project(googletest-git NONE)
include(FetchContent)
FetchContent_Declare(
googletest
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG release-1.12.1
)
GIT_TAG release-1.12.1)
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
set(BUILD_GMOCK OFF CACHE BOOL "" FORCE)
set(BUILD_GTEST ON CACHE BOOL "" FORCE)
set(gtest_force_shared_crt
ON
CACHE BOOL "" FORCE)
set(BUILD_GMOCK
OFF
CACHE BOOL "" FORCE)
set(BUILD_GTEST
ON
CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googletest)
################################
# ##############################################################################
# Tests
################################
# ##############################################################################
# TODO(voz): This is a little assumptive of just this one test, rewrite with real dir includes
# TODO(voz): This is a little assumptive of just this one test, rewrite with
# real dir includes
include_directories(${ATEN_INCLUDE})
add_executable(test_cpp_prefix test_cpp_prefix.cpp ../../torchinductor/codegen/cpp_prefix.h)
add_executable(test_cpp_prefix test_cpp_prefix.cpp
../../torchinductor/codegen/cpp_prefix.h)
target_link_libraries(test_cpp_prefix gtest_main)
add_test(NAME test_cpp_prefix COMMAND test_cpp_prefix)

View File

@ -3,8 +3,8 @@ cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
project(jit_hooks)
if(USE_ROCM)
include(utils)
include(LoadHIP)
include(utils)
include(LoadHIP)
endif()
find_package(Torch REQUIRED)

View File

@ -2,7 +2,10 @@ cmake_minimum_required(VERSION 3.15)
project(custom_build_project)
set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.")
set(CMAKE_CXX_STANDARD
17
CACHE STRING
"The C++ standard whose features are requested to build this target.")
# Find torch library
find_package(Torch REQUIRED)
@ -13,12 +16,12 @@ target_include_directories(Predictor PUBLIC ${TORCH_INCLUDE_DIRS})
find_package(Threads REQUIRED)
target_link_libraries(Predictor
target_link_libraries(
Predictor
-Wl,-s
-Wl,--gc-sections
-Wl,--whole-archive
${TORCH_LIBRARIES}
-Wl,--no-whole-archive
Threads::Threads
${CMAKE_DL_LIBS}
)
${CMAKE_DL_LIBS})

View File

@ -3,10 +3,8 @@ cmake_minimum_required(VERSION 3.15)
set(TORCH_ROOT ${CMAKE_CURRENT_LIST_DIR}/../../..)
set(TEST_ROOT ${TORCH_ROOT}/test/mobile/lightweight_dispatch)
add_executable(test_codegen_unboxing
${TEST_ROOT}/test_lightweight_dispatch.cpp
${TEST_ROOT}/test_codegen_unboxing.cpp
)
add_executable(test_codegen_unboxing ${TEST_ROOT}/test_lightweight_dispatch.cpp
${TEST_ROOT}/test_codegen_unboxing.cpp)
target_include_directories(test_codegen_unboxing PRIVATE ${ATen_CPU_INCLUDE})
@ -14,9 +12,8 @@ target_compile_definitions(test_codegen_unboxing PRIVATE USE_GTEST)
set(TEST_UNBOXING_DEPENDENCIES torch gtest_main)
target_link_libraries(test_codegen_unboxing PRIVATE
${TEST_UNBOXING_DEPENDENCIES}
)
target_link_libraries(test_codegen_unboxing
PRIVATE ${TEST_UNBOXING_DEPENDENCIES})
if(INSTALL_TEST)
install(TARGETS test_codegen_unboxing DESTINATION bin)

View File

@ -1,23 +1,20 @@
set(MOBILE_NNC_TEST_ROOT ${TORCH_ROOT}/test/mobile/nnc)
set(MOBILE_NNC_TEST_SRCS
${MOBILE_NNC_TEST_ROOT}/test_context.cpp
${MOBILE_NNC_TEST_ROOT}/test_nnc_backend.cpp
${MOBILE_NNC_TEST_ROOT}/test_registry.cpp
)
${MOBILE_NNC_TEST_ROOT}/test_context.cpp
${MOBILE_NNC_TEST_ROOT}/test_nnc_backend.cpp
${MOBILE_NNC_TEST_ROOT}/test_registry.cpp)
add_executable(test_mobile_nnc
${TORCH_ROOT}/test/cpp/lite_interpreter_runtime/main.cpp
${MOBILE_NNC_TEST_SRCS}
)
add_executable(
test_mobile_nnc ${TORCH_ROOT}/test/cpp/lite_interpreter_runtime/main.cpp
${MOBILE_NNC_TEST_SRCS})
target_link_libraries(test_mobile_nnc PRIVATE torch gtest_main)
target_include_directories(test_mobile_nnc PRIVATE ${ATen_CPU_INCLUDE})
target_compile_definitions(test_mobile_nnc PRIVATE USE_GTEST)
add_executable(aot_model_compiler_test
${TORCH_ROOT}/binaries/aot_model_compiler.cc
)
${TORCH_ROOT}/binaries/aot_model_compiler.cc)
target_link_libraries(aot_model_compiler_test PRIVATE torch)
target_include_directories(aot_model_compiler_test PRIVATE ${ATen_CPU_INCLUDE})
@ -27,6 +24,9 @@ if(INSTALL_TEST)
install(TARGETS aot_model_compiler_test DESTINATION bin)
# Install PDB files for MSVC builds
if(MSVC AND BUILD_SHARED_LIBS)
install(FILES $<TARGET_PDB_FILE:test_mobile_nnc> DESTINATION bin OPTIONAL)
install(
FILES $<TARGET_PDB_FILE:test_mobile_nnc>
DESTINATION bin
OPTIONAL)
endif()
endif()

View File

@ -1,5 +1,5 @@
# This file used to build libtorch.so.
# Now it only builds the Torch python bindings.
# This file used to build libtorch.so. Now it only builds the Torch python
# bindings.
if(NOT CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO)
cmake_minimum_required(VERSION 3.27 FATAL_ERROR)
@ -21,9 +21,9 @@ if(NOT TORCH_INSTALL_LIB_DIR)
endif()
if(MSVC)
set(LIBSHM_SUBDIR libshm_windows)
set(LIBSHM_SUBDIR libshm_windows)
else()
set(LIBSHM_SUBDIR libshm)
set(LIBSHM_SUBDIR libshm)
endif()
set(LIBSHM_SRCDIR ${TORCH_SRC_DIR}/lib/${LIBSHM_SUBDIR})
@ -32,49 +32,38 @@ add_subdirectory(${LIBSHM_SRCDIR})
# Generate files
set(TOOLS_PATH "${TORCH_ROOT}/tools")
set(TORCH_PYTHON_SRCS
${GENERATED_THNN_CXX}
${GENERATED_CXX_PYTHON}
)
set(TORCH_PYTHON_SRCS ${GENERATED_THNN_CXX} ${GENERATED_CXX_PYTHON})
append_filelist("libtorch_python_core_sources" TORCH_PYTHON_SRCS)
# NB: This has to match the condition under which the JIT test directory
# is included (at the time of writing that's in caffe2/CMakeLists.txt).
# NB: This has to match the condition under which the JIT test directory is
# included (at the time of writing that's in caffe2/CMakeLists.txt).
if(BUILD_TEST)
add_definitions(-DBUILDING_TESTS)
list(APPEND TORCH_PYTHON_SRCS
${TORCH_ROOT}/test/cpp/jit/torch_python_test.cpp
)
add_definitions(-DBUILDING_TESTS)
list(APPEND TORCH_PYTHON_SRCS
${TORCH_ROOT}/test/cpp/jit/torch_python_test.cpp)
endif()
set(TORCH_PYTHON_INCLUDE_DIRECTORIES
${PYTHON_INCLUDE_DIR}
${TORCH_ROOT}
${TORCH_ROOT}/aten/src
${TORCH_ROOT}/aten/src/TH
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/aten/src
${CMAKE_BINARY_DIR}/caffe2/aten/src
${CMAKE_BINARY_DIR}/third_party
${CMAKE_BINARY_DIR}/third_party/onnx
${TORCH_ROOT}/third_party/valgrind-headers
${TORCH_ROOT}/third_party/gloo
${TORCH_ROOT}/third_party/onnx
${TORCH_ROOT}/third_party/flatbuffers/include
${TORCH_ROOT}/third_party/kineto/libkineto/include
${TORCH_ROOT}/third_party/cpp-httplib
${TORCH_ROOT}/third_party/nlohmann/include
${TORCH_SRC_DIR}/csrc
${TORCH_SRC_DIR}/csrc/api/include
${TORCH_SRC_DIR}/lib
${TORCH_SRC_DIR}/standalone
)
${TORCH_SRC_DIR}/standalone)
list(APPEND TORCH_PYTHON_INCLUDE_DIRECTORIES ${LIBSHM_SRCDIR})
@ -106,231 +95,205 @@ set(TORCH_PYTHON_COMPILE_OPTIONS)
set(TORCH_PYTHON_LINK_FLAGS "")
if(MSVC)
string(APPEND TORCH_PYTHON_LINK_FLAGS " /NODEFAULTLIB:LIBCMT.LIB")
list(APPEND TORCH_PYTHON_LINK_LIBRARIES ${PYTHON_LIBRARIES} onnx_library)
if(NOT CMAKE_BUILD_TYPE MATCHES "Release")
string(APPEND TORCH_PYTHON_LINK_FLAGS " /DEBUG:FULL")
endif()
string(APPEND TORCH_PYTHON_LINK_FLAGS " /NODEFAULTLIB:LIBCMT.LIB")
list(APPEND TORCH_PYTHON_LINK_LIBRARIES ${PYTHON_LIBRARIES} onnx_library)
if(NOT CMAKE_BUILD_TYPE MATCHES "Release")
string(APPEND TORCH_PYTHON_LINK_FLAGS " /DEBUG:FULL")
endif()
elseif(APPLE)
string(APPEND TORCH_PYTHON_LINK_FLAGS " -undefined dynamic_lookup")
string(APPEND TORCH_PYTHON_LINK_FLAGS " -undefined dynamic_lookup")
else()
list(APPEND TORCH_PYTHON_COMPILE_OPTIONS
-fno-strict-aliasing
-Wno-strict-aliasing)
list(APPEND TORCH_PYTHON_COMPILE_OPTIONS -fno-strict-aliasing
-Wno-strict-aliasing)
endif()
if(USE_ITT)
list(APPEND TORCH_PYTHON_SRCS
${TORCH_SRC_DIR}/csrc/itt.cpp
)
list(APPEND TORCH_PYTHON_SRCS ${TORCH_SRC_DIR}/csrc/itt.cpp)
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_ITT)
endif()
if(USE_CUDA)
include(${TORCH_ROOT}/cmake/public/cuda.cmake)
append_filelist("libtorch_python_cuda_core_sources" TORCH_PYTHON_SRCS)
list(APPEND TORCH_PYTHON_SRCS ${GENERATED_THNN_CXX_CUDA})
include(${TORCH_ROOT}/cmake/public/cuda.cmake)
append_filelist("libtorch_python_cuda_core_sources" TORCH_PYTHON_SRCS)
list(APPEND TORCH_PYTHON_SRCS ${GENERATED_THNN_CXX_CUDA})
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUDA)
if(USE_CUDNN)
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::cudnn)
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUDNN)
endif()
if(USE_CUSPARSELT)
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::cusparselt)
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUSPARSELT)
endif()
if(USE_CUFILE)
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::cufile)
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUFILE)
endif()
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUDA)
if(USE_CUDNN)
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::cudnn)
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUDNN)
endif()
if(USE_CUSPARSELT)
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::cusparselt)
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUSPARSELT)
endif()
if(USE_CUFILE)
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::cufile)
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUFILE)
endif()
if(TARGET torch::nvtx3)
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::nvtx3)
else()
if(TARGET torch::nvtoolsext)
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::nvtoolsext)
endif()
if(TARGET torch::nvtx3)
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::nvtx3)
else()
if(TARGET torch::nvtoolsext)
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::nvtoolsext)
endif()
endif()
endif()
if(USE_ROCM)
append_filelist("libtorch_python_cuda_core_sources" TORCH_PYTHON_SRCS)
list(APPEND TORCH_PYTHON_SRCS ${GENERATED_THNN_CXX_CUDA})
append_filelist("libtorch_python_cuda_core_sources" TORCH_PYTHON_SRCS)
list(APPEND TORCH_PYTHON_SRCS ${GENERATED_THNN_CXX_CUDA})
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS
USE_ROCM
__HIP_PLATFORM_AMD__
)
if(NOT WIN32)
list(APPEND TORCH_PYTHON_LINK_LIBRARIES ${ROCM_ROCTX_LIB})
endif()
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_ROCM __HIP_PLATFORM_AMD__)
if(NOT WIN32)
list(APPEND TORCH_PYTHON_LINK_LIBRARIES ${ROCM_ROCTX_LIB})
endif()
endif()
if(USE_XPU)
include(${TORCH_ROOT}/cmake/public/xpu.cmake)
append_filelist("libtorch_python_xpu_sources" TORCH_PYTHON_SRCS)
include(${TORCH_ROOT}/cmake/public/xpu.cmake)
append_filelist("libtorch_python_xpu_sources" TORCH_PYTHON_SRCS)
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_XPU)
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_XPU)
endif()
if(USE_CUDNN OR USE_ROCM)
list(APPEND TORCH_PYTHON_SRCS
${TORCH_SRC_DIR}/csrc/cuda/shared/cudnn.cpp
)
if(USE_STATIC_CUDNN)
set_source_files_properties(
${TORCH_SRC_DIR}/csrc/cuda/shared/cudnn.cpp
PROPERTIES COMPILE_DEFINITIONS "USE_STATIC_CUDNN"
)
endif()
list(APPEND TORCH_PYTHON_SRCS ${TORCH_SRC_DIR}/csrc/cuda/shared/cudnn.cpp)
if(USE_STATIC_CUDNN)
set_source_files_properties(
${TORCH_SRC_DIR}/csrc/cuda/shared/cudnn.cpp PROPERTIES COMPILE_DEFINITIONS
"USE_STATIC_CUDNN")
endif()
endif()
if(USE_CUSPARSELT)
list(APPEND TORCH_PYTHON_SRCS ${TORCH_SRC_DIR}/csrc/cuda/shared/cusparselt.cpp)
list(APPEND TORCH_PYTHON_SRCS
${TORCH_SRC_DIR}/csrc/cuda/shared/cusparselt.cpp)
endif()
if(USE_MPS)
list(APPEND TORCH_PYTHON_SRCS ${MPS_PYTHON_SRCS})
list(APPEND TORCH_PYTHON_SRCS ${MPS_PYTHON_SRCS})
endif()
if(USE_VALGRIND AND NOT WIN32)
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_VALGRIND)
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_VALGRIND)
endif()
# In the most recent CMake versions, a new 'TRANSFORM' subcommand of 'list' allows much of the boilerplate of defining the lists
# of type stub files to be omitted.
# For compatibility with older CMake versions, we omit it for now, but leave it as a comment in case compatibility with the older
# CMake versions is eventually dropped.
# set(Modules
# __init__
# activation
# adaptive
# batchnorm
# container
# conv
# distance
# dropout
# fold
# instancenorm
# linear
# loss
# module
# normalization
# padding
# pixelshuffle
# pooling
# rnn
# sparse
# upsampling
# )
# In the most recent CMake versions, a new 'TRANSFORM' subcommand of 'list'
# allows much of the boilerplate of defining the lists of type stub files to be
# omitted. For compatibility with older CMake versions, we omit it for now, but
# leave it as a comment in case compatibility with the older CMake versions is
# eventually dropped. set(Modules __init__ activation adaptive batchnorm
# container conv distance dropout fold instancenorm linear loss module
# normalization padding pixelshuffle pooling rnn sparse upsampling )
# list(TRANSFORM Modules PREPEND "${TORCH_SRC_DIR}/nn/modules/")
add_custom_target(torch_python_stubs DEPENDS
"${TORCH_SRC_DIR}/_C/__init__.pyi"
"${TORCH_SRC_DIR}/_C/_VariableFunctions.pyi"
"${TORCH_SRC_DIR}/nn/functional.pyi"
"${TORCH_SRC_DIR}/utils/data/datapipes/datapipe.pyi"
)
add_custom_target(
torch_python_stubs
DEPENDS "${TORCH_SRC_DIR}/_C/__init__.pyi"
"${TORCH_SRC_DIR}/_C/_VariableFunctions.pyi"
"${TORCH_SRC_DIR}/nn/functional.pyi"
"${TORCH_SRC_DIR}/utils/data/datapipes/datapipe.pyi")
file(GLOB_RECURSE torchgen_python "${PROJECT_SOURCE_DIR}/torchgen/*.py")
file(GLOB_RECURSE autograd_python "${TOOLS_PATH}/autograd/*.py")
file(GLOB_RECURSE pyi_python "${TOOLS_PATH}/pyi/*.py")
add_custom_command(
OUTPUT
"${TORCH_SRC_DIR}/_C/__init__.pyi"
"${TORCH_SRC_DIR}/_C/_VariableFunctions.pyi"
"${TORCH_SRC_DIR}/nn/functional.pyi"
COMMAND
"${Python_EXECUTABLE}" -mtools.pyi.gen_pyi
--native-functions-path "aten/src/ATen/native/native_functions.yaml"
--tags-path "aten/src/ATen/native/tags.yaml"
--deprecated-functions-path "tools/autograd/deprecated.yaml"
DEPENDS
"${TORCH_SRC_DIR}/_C/__init__.pyi.in"
"${TORCH_SRC_DIR}/_C/_VariableFunctions.pyi.in"
"${TORCH_SRC_DIR}/nn/functional.pyi.in"
"${TORCH_ROOT}/aten/src/ATen/native/native_functions.yaml"
"${TORCH_ROOT}/aten/src/ATen/native/tags.yaml"
"${TORCH_ROOT}/tools/autograd/deprecated.yaml"
"${TORCH_ROOT}/torch/_torch_docs.py"
"${TORCH_ROOT}/torch/_tensor_docs.py"
${pyi_python}
${autograd_python}
${torchgen_python}
WORKING_DIRECTORY
"${TORCH_ROOT}"
)
OUTPUT "${TORCH_SRC_DIR}/_C/__init__.pyi"
"${TORCH_SRC_DIR}/_C/_VariableFunctions.pyi"
"${TORCH_SRC_DIR}/nn/functional.pyi"
COMMAND
"${Python_EXECUTABLE}" -mtools.pyi.gen_pyi --native-functions-path
"aten/src/ATen/native/native_functions.yaml" --tags-path
"aten/src/ATen/native/tags.yaml" --deprecated-functions-path
"tools/autograd/deprecated.yaml"
DEPENDS "${TORCH_SRC_DIR}/_C/__init__.pyi.in"
"${TORCH_SRC_DIR}/_C/_VariableFunctions.pyi.in"
"${TORCH_SRC_DIR}/nn/functional.pyi.in"
"${TORCH_ROOT}/aten/src/ATen/native/native_functions.yaml"
"${TORCH_ROOT}/aten/src/ATen/native/tags.yaml"
"${TORCH_ROOT}/tools/autograd/deprecated.yaml"
"${TORCH_ROOT}/torch/_torch_docs.py"
"${TORCH_ROOT}/torch/_tensor_docs.py"
${pyi_python}
${autograd_python}
${torchgen_python}
WORKING_DIRECTORY "${TORCH_ROOT}")
file(GLOB_RECURSE datapipe_files "${TORCH_SRC_DIR}/utils/data/datapipes/*.py")
add_custom_command(
OUTPUT
"${TORCH_SRC_DIR}/utils/data/datapipes/datapipe.pyi"
COMMAND
${CMAKE_COMMAND} -E env PYTHONPATH="${TORCH_ROOT}"
"${Python_EXECUTABLE}" ${TORCH_SRC_DIR}/utils/data/datapipes/gen_pyi.py
DEPENDS
"${TORCH_SRC_DIR}/utils/data/datapipes/datapipe.pyi.in"
${datapipe_files}
WORKING_DIRECTORY
"${TORCH_ROOT}"
)
OUTPUT "${TORCH_SRC_DIR}/utils/data/datapipes/datapipe.pyi"
COMMAND
${CMAKE_COMMAND} -E env PYTHONPATH="${TORCH_ROOT}" "${Python_EXECUTABLE}"
${TORCH_SRC_DIR}/utils/data/datapipes/gen_pyi.py
DEPENDS "${TORCH_SRC_DIR}/utils/data/datapipes/datapipe.pyi.in"
${datapipe_files}
WORKING_DIRECTORY "${TORCH_ROOT}")
if(USE_DISTRIBUTED)
if(WIN32)
append_filelist("libtorch_python_distributed_core_sources" TORCH_PYTHON_SRCS)
else()
append_filelist("libtorch_python_distributed_sources" TORCH_PYTHON_SRCS)
endif()
# Disable certain warnings for GCC-9.X
if(CMAKE_COMPILER_IS_GNUCXX)
set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/autograd/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type")
set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/rpc/testing/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type")
set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/c10d/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type")
endif()
# NCCL is a private dependency of libtorch, but libtorch_python includes
# some private headers of libtorch, which in turn include NCCL. As a hacky
# alternative to making NCCL a public dependency of libtorch, we make it
# a private dependency of libtorch_python as well.
if(USE_NCCL)
list(APPEND TORCH_PYTHON_LINK_LIBRARIES __caffe2_nccl)
endif()
# Same for MPI.
if(USE_MPI)
list(APPEND TORCH_PYTHON_LINK_LIBRARIES MPI::MPI_CXX)
endif()
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_C10D)
if(WIN32)
append_filelist("libtorch_python_distributed_core_sources"
TORCH_PYTHON_SRCS)
else()
append_filelist("libtorch_python_distributed_sources" TORCH_PYTHON_SRCS)
endif()
# Disable certain warnings for GCC-9.X
if(CMAKE_COMPILER_IS_GNUCXX)
set_source_files_properties(
${TORCH_SRC_DIR}/csrc/distributed/autograd/init.cpp
PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type")
set_source_files_properties(
${TORCH_SRC_DIR}/csrc/distributed/rpc/testing/init.cpp
PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type")
set_source_files_properties(
${TORCH_SRC_DIR}/csrc/distributed/c10d/init.cpp
PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type")
endif()
# NCCL is a private dependency of libtorch, but libtorch_python includes some
# private headers of libtorch, which in turn include NCCL. As a hacky
# alternative to making NCCL a public dependency of libtorch, we make it a
# private dependency of libtorch_python as well.
if(USE_NCCL)
list(APPEND TORCH_PYTHON_LINK_LIBRARIES __caffe2_nccl)
endif()
# Same for MPI.
if(USE_MPI)
list(APPEND TORCH_PYTHON_LINK_LIBRARIES MPI::MPI_CXX)
endif()
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_C10D)
endif()
if(USE_NCCL AND NOT WIN32)
list(APPEND TORCH_PYTHON_SRCS
${TORCH_SRC_DIR}/csrc/cuda/python_nccl.cpp)
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_NCCL)
list(APPEND TORCH_PYTHON_SRCS ${TORCH_SRC_DIR}/csrc/cuda/python_nccl.cpp)
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_NCCL)
endif()
if(NOT MSVC)
# cudaProfilerInitialize must go away
set_source_files_properties(${TORCH_SRC_DIR}/csrc/cuda/shared/cudart.cpp PROPERTIES COMPILE_FLAGS "-Wno-deprecated-declarations")
set_source_files_properties(
${TORCH_SRC_DIR}/csrc/cuda/shared/cudart.cpp
PROPERTIES COMPILE_FLAGS "-Wno-deprecated-declarations")
endif()
# coreml
if(USE_COREML_DELEGATE)
list(APPEND TORCH_PYTHON_SRCS ${TORCH_SRC_DIR}/csrc/jit/backends/coreml/cpp/backend.cpp)
list(APPEND TORCH_PYTHON_SRCS ${TORCH_SRC_DIR}/csrc/jit/backends/coreml/cpp/preprocess.cpp)
list(APPEND TORCH_PYTHON_SRCS
${TORCH_SRC_DIR}/csrc/jit/backends/coreml/cpp/backend.cpp)
list(APPEND TORCH_PYTHON_SRCS
${TORCH_SRC_DIR}/csrc/jit/backends/coreml/cpp/preprocess.cpp)
endif()
add_library(torch_python SHARED ${TORCH_PYTHON_SRCS})
torch_compile_options(torch_python) # see cmake/public/utils.cmake
torch_compile_options(torch_python) # see cmake/public/utils.cmake
if(APPLE)
target_compile_options(torch_python PRIVATE
$<$<COMPILE_LANGUAGE:CXX>: -fvisibility=default>)
target_compile_options(torch_python PRIVATE $<$<COMPILE_LANGUAGE:CXX>:
-fvisibility=default>)
endif()
if(CAFFE2_USE_MKL AND BUILD_LIBTORCHLESS)
# Use the RPATH of the linked libraries
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
# we need to explicitly link caffe2::mkl in order to have the
# correct RPATH in torch_python for the split build
# we need to explicitly link caffe2::mkl in order to have the correct RPATH in
# torch_python for the split build
target_link_libraries(torch_python PRIVATE caffe2::mkl)
endif()
@ -346,8 +309,8 @@ if(USE_CUFILE AND NOT USE_ROCM)
endif()
if(HAVE_SOVERSION)
set_target_properties(torch_python PROPERTIES
VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION})
set_target_properties(torch_python PROPERTIES VERSION ${TORCH_VERSION}
SOVERSION ${TORCH_SOVERSION})
endif()
# in case of the split build we need to add compile definitions
@ -386,10 +349,15 @@ if(BUILD_LIBTORCHLESS)
target_compile_definitions(torch_python PRIVATE USE_TENSORPIPE)
endif()
set(EXPERIMENTAL_SINGLE_THREAD_POOL "0" CACHE STRING
"Experimental option to use a single thread pool for inter- and intra-op parallelism")
set(EXPERIMENTAL_SINGLE_THREAD_POOL
"0"
CACHE
STRING
"Experimental option to use a single thread pool for inter- and intra-op parallelism"
)
if("${EXPERIMENTAL_SINGLE_THREAD_POOL}")
target_compile_definitions(torch_python PRIVATE "-DAT_EXPERIMENTAL_SINGLE_THREAD_POOL=1")
target_compile_definitions(torch_python
PRIVATE "-DAT_EXPERIMENTAL_SINGLE_THREAD_POOL=1")
endif()
endif()
@ -397,54 +365,64 @@ endif()
add_dependencies(torch_python torch_python_stubs)
add_dependencies(torch_python flatbuffers)
if(USE_PRECOMPILED_HEADERS)
target_precompile_headers(torch_python PRIVATE
"$<$<COMPILE_LANGUAGE:CXX>:ATen/ATen.h>")
"$<$<COMPILE_LANGUAGE:CXX>:ATen/ATen.h>")
endif()
# Required workaround for generated sources
# See https://samthursfield.wordpress.com/2015/11/21/cmake-dependencies-between-targets-and-files-and-custom-commands/#custom-commands-in-different-directories
# Required workaround for generated sources See
# https://samthursfield.wordpress.com/2015/11/21/cmake-dependencies-between-targets-and-files-and-custom-commands/#custom-commands-in-different-directories
add_dependencies(torch_python generate-torch-sources)
set_source_files_properties(
${GENERATED_THNN_SOURCES}
${GENERATED_CXX_PYTHON}
PROPERTIES GENERATED TRUE
)
set_source_files_properties(${GENERATED_THNN_SOURCES} ${GENERATED_CXX_PYTHON}
PROPERTIES GENERATED TRUE)
# Disable certain warnings for GCC-9.X
if(CMAKE_COMPILER_IS_GNUCXX)
set_source_files_properties(${TORCH_SRC_DIR}/csrc/Module.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type")
set_source_files_properties(${TORCH_SRC_DIR}/csrc/autograd/python_variable.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type")
set_source_files_properties(
${TORCH_SRC_DIR}/csrc/Module.cpp PROPERTIES COMPILE_FLAGS
"-Wno-cast-function-type")
set_source_files_properties(
${TORCH_SRC_DIR}/csrc/autograd/python_variable.cpp
PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type")
endif()
# Preserve CUDA_GENCODE flags
if(USE_CUDA)
torch_cuda_get_nvcc_gencode_flag(_ARCH_FLAGS)
set_source_files_properties(${TORCH_SRC_DIR}/csrc/cuda/Module.cpp PROPERTIES COMPILE_FLAGS "-DCUDA_ARCH_FLAGS=\"${_ARCH_FLAGS_readable}\"")
set_source_files_properties(
${TORCH_SRC_DIR}/csrc/cuda/Module.cpp
PROPERTIES COMPILE_FLAGS "-DCUDA_ARCH_FLAGS=\"${_ARCH_FLAGS_readable}\"")
endif()
# Preserve HIP arch flags
if(USE_ROCM)
string(REPLACE ";" " " PYTORCH_ROCM_ARCH_readable "${PYTORCH_ROCM_ARCH}")
set_source_files_properties(${TORCH_SRC_DIR}/csrc/cuda/Module.cpp PROPERTIES COMPILE_FLAGS "-DCUDA_ARCH_FLAGS=\"${PYTORCH_ROCM_ARCH_readable}\"")
set_source_files_properties(
${TORCH_SRC_DIR}/csrc/cuda/Module.cpp
PROPERTIES COMPILE_FLAGS
"-DCUDA_ARCH_FLAGS=\"${PYTORCH_ROCM_ARCH_readable}\"")
endif()
# Preserve XPU arch flags
if(USE_XPU)
string(REPLACE "," " " _ARCH_FLAGS_readable "${TORCH_XPU_ARCH_LIST}")
set_source_files_properties(${TORCH_SRC_DIR}/csrc/xpu/Module.cpp PROPERTIES COMPILE_FLAGS "-DXPU_ARCH_FLAGS=\"${_ARCH_FLAGS_readable}\"")
set_source_files_properties(
${TORCH_SRC_DIR}/csrc/xpu/Module.cpp
PROPERTIES COMPILE_FLAGS "-DXPU_ARCH_FLAGS=\"${_ARCH_FLAGS_readable}\"")
endif()
target_compile_definitions(torch_python PRIVATE "-DTHP_BUILD_MAIN_LIB")
target_link_libraries(torch_python PRIVATE ${TORCH_LIB} ${TORCH_PYTHON_LINK_LIBRARIES})
target_link_libraries(torch_python PRIVATE ${TORCH_LIB}
${TORCH_PYTHON_LINK_LIBRARIES})
target_compile_definitions(torch_python PRIVATE ${TORCH_PYTHON_COMPILE_DEFINITIONS})
target_compile_definitions(torch_python
PRIVATE ${TORCH_PYTHON_COMPILE_DEFINITIONS})
target_compile_options(torch_python PRIVATE ${TORCH_PYTHON_COMPILE_OPTIONS})
target_include_directories(torch_python PUBLIC ${TORCH_PYTHON_INCLUDE_DIRECTORIES})
target_include_directories(torch_python
PUBLIC ${TORCH_PYTHON_INCLUDE_DIRECTORIES})
if(USE_UCC)
target_link_libraries(torch_python PRIVATE __caffe2_ucc)
@ -459,13 +437,15 @@ if(BUILD_ONEDNN_GRAPH)
endif()
if(${CMAKE_BUILD_TYPE} STREQUAL "RelWithAssert")
# Workaround numerous decret-without-a-gil warnings from JIT
# see https://github.com/pytorch/pytorch/issues/130073
target_compile_definitions(torch_python PRIVATE "-DPYBIND11_NO_ASSERT_GIL_HELD_INCREF_DECREF")
# Workaround numerous decret-without-a-gil warnings from JIT see
# https://github.com/pytorch/pytorch/issues/130073
target_compile_definitions(
torch_python PRIVATE "-DPYBIND11_NO_ASSERT_GIL_HELD_INCREF_DECREF")
endif()
if(NOT TORCH_PYTHON_LINK_FLAGS STREQUAL "")
set_target_properties(torch_python PROPERTIES LINK_FLAGS ${TORCH_PYTHON_LINK_FLAGS})
set_target_properties(torch_python PROPERTIES LINK_FLAGS
${TORCH_PYTHON_LINK_FLAGS})
endif()
install(TARGETS torch_python DESTINATION "${TORCH_INSTALL_LIB_DIR}")
@ -485,32 +465,34 @@ endif()
add_custom_target(
gen_torch_version ALL
"${Python_EXECUTABLE}" "${TOOLS_PATH}/generate_torch_version.py"
--is-debug=${TORCH_VERSION_DEBUG}
--cuda-version=${CUDA_VERSION}
--hip-version=${HIP_VERSION}
--xpu-version=${SYCL_COMPILER_VERSION}
--is-debug=${TORCH_VERSION_DEBUG} --cuda-version=${CUDA_VERSION}
--hip-version=${HIP_VERSION} --xpu-version=${SYCL_COMPILER_VERSION}
BYPRODUCTS ${TORCH_SRC_DIR}/version.py
COMMENT "Regenerating version file..."
WORKING_DIRECTORY ${TORCH_ROOT}
)
WORKING_DIRECTORY ${TORCH_ROOT})
add_dependencies(torch_python gen_torch_version)
# Skip building this library under MacOS, since it is currently failing to build on Mac
# Github issue #61930
# Skip building this library under MacOS, since it is currently failing to build
# on Mac Github issue #61930
if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
# Add Android Nnapi delegate library
add_library(nnapi_backend SHARED
${TORCH_SRC_DIR}/csrc/jit/backends/nnapi/nnapi_backend_lib.cpp
${TORCH_SRC_DIR}/csrc/jit/backends/nnapi/nnapi_backend_preprocess.cpp
)
add_library(
nnapi_backend SHARED
${TORCH_SRC_DIR}/csrc/jit/backends/nnapi/nnapi_backend_lib.cpp
${TORCH_SRC_DIR}/csrc/jit/backends/nnapi/nnapi_backend_preprocess.cpp)
# Pybind11 requires explicit linking of the torch_python library
if(BUILD_LIBTORCHLESS)
target_link_libraries(nnapi_backend PRIVATE ${TORCH_LIB})
else()
target_link_libraries(nnapi_backend PRIVATE torch)
endif()
target_link_libraries(nnapi_backend PRIVATE torch_python pybind::pybind11 fmt::fmt-header-only)
target_link_libraries(nnapi_backend PRIVATE torch_python pybind::pybind11
fmt::fmt-header-only)
endif()
set(TORCH_PYTHON_COMPILE_OPTIONS ${TORCH_PYTHON_COMPILE_OPTIONS} PARENT_SCOPE)
set(TORCH_PYTHON_LINK_FLAGS ${TORCH_PYTHON_LINK_FLAGS} PARENT_SCOPE)
set(TORCH_PYTHON_COMPILE_OPTIONS
${TORCH_PYTHON_COMPILE_OPTIONS}
PARENT_SCOPE)
set(TORCH_PYTHON_LINK_FLAGS
${TORCH_PYTHON_LINK_FLAGS}
PARENT_SCOPE)

View File

@ -1,14 +1,10 @@
set(
MODEL_TRACER_DIR
"${TORCH_ROOT}/torch/csrc/jit/mobile/model_tracer")
set(MODEL_TRACER_DIR "${TORCH_ROOT}/torch/csrc/jit/mobile/model_tracer")
list(APPEND MODEL_TRACER_SOURCES "")
append_filelist("torch_mobile_tracer_sources" MODEL_TRACER_SOURCES)
add_executable(
model_tracer
${MODEL_TRACER_SOURCES})
add_executable(model_tracer ${MODEL_TRACER_SOURCES})
target_link_libraries(model_tracer PRIVATE torch)

View File

@ -2,27 +2,28 @@ cmake_minimum_required(VERSION 3.27 FATAL_ERROR)
project(headeronly CXX)
set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.")
set(CMAKE_CXX_STANDARD
17
CACHE STRING
"The C++ standard whose features are requested to build this target.")
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
# Main build file for torch/headeronly, except there's no build cuz this lib is header-only!
# Main build file for torch/headeronly, except there's no build cuz this lib is
# header-only!
# ---[ Configure macro file.
set(C10_USE_GFLAGS ${USE_GFLAGS}) # used in cmake_macros.h.in
set(C10_USE_GLOG ${USE_GLOG}) # used in cmake_macros.h.in
set(C10_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # used in cmake_macros.h.in
set(C10_USE_NUMA ${USE_NUMA}) # used in cmake_macros.h.in
set(C10_USE_MSVC_STATIC_RUNTIME ${CAFFE2_USE_MSVC_STATIC_RUNTIME}) # used in cmake_macros.h.in
set(C10_USE_ROCM_KERNEL_ASSERT ${USE_ROCM_KERNEL_ASSERT}) # used in cmake_macros.h.in
configure_file(
${CMAKE_CURRENT_LIST_DIR}/macros/cmake_macros.h.in
${CMAKE_BINARY_DIR}/torch/headeronly/macros/cmake_macros.h)
set(C10_USE_MSVC_STATIC_RUNTIME ${CAFFE2_USE_MSVC_STATIC_RUNTIME}
)# used in cmake_macros.h.in
set(C10_USE_ROCM_KERNEL_ASSERT ${USE_ROCM_KERNEL_ASSERT}) # used in
# cmake_macros.h.in
configure_file(${CMAKE_CURRENT_LIST_DIR}/macros/cmake_macros.h.in
${CMAKE_BINARY_DIR}/torch/headeronly/macros/cmake_macros.h)
file(GLOB HEADERONLY_HEADERS
*.h
macros/*.h
util/*.h
)
file(GLOB HEADERONLY_HEADERS *.h macros/*.h util/*.h)
add_library(headeronly INTERFACE ${HEADERONLY_HEADERS})
@ -31,5 +32,8 @@ install(FILES ${CMAKE_BINARY_DIR}/torch/headeronly/macros/cmake_macros.h
if(NOT BUILD_LIBTORCHLESS)
# ---[ Installation copied from c10/CMakeLists.txt
install(TARGETS headeronly EXPORT Caffe2Targets DESTINATION lib)
install(
TARGETS headeronly
EXPORT Caffe2Targets
DESTINATION lib)
endif()

View File

@ -4,24 +4,27 @@ cmake_minimum_required(VERSION 3.27 FATAL_ERROR)
set(TORCH_ROOT ${CMAKE_CURRENT_LIST_DIR}/../../../)
if(NOT LIBSHM_INSTALL_LIB_SUBDIR)
set(LIBSHM_INSTALL_LIB_SUBDIR "lib" CACHE PATH "libshm install library directory")
set(LIBSHM_INSTALL_LIB_SUBDIR
"lib"
CACHE PATH "libshm install library directory")
endif()
add_library(shm SHARED core.cpp)
if(HAVE_SOVERSION)
set_target_properties(shm PROPERTIES
VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION})
set_target_properties(shm PROPERTIES VERSION ${TORCH_VERSION}
SOVERSION ${TORCH_SOVERSION})
endif()
target_include_directories(shm PUBLIC
${TORCH_ROOT}/torch/lib # provides "libshm/libshm.h"
target_include_directories(
shm PUBLIC ${TORCH_ROOT}/torch/lib # provides "libshm/libshm.h"
)
### Torch packages supposes libraries prefix is "lib"
set_target_properties(shm PROPERTIES
PREFIX "lib"
IMPORT_PREFIX "lib"
CXX_STANDARD 17)
# Torch packages supposes libraries prefix is "lib"
set_target_properties(
shm
PROPERTIES PREFIX "lib"
IMPORT_PREFIX "lib"
CXX_STANDARD 17)
target_link_libraries(shm PRIVATE ${TORCH_CPU_LIB})
if(UNIX AND NOT APPLE)
@ -33,22 +36,20 @@ if(UNIX AND NOT APPLE)
target_link_libraries(shm PUBLIC rt)
else()
message(STATUS "Checking if rt requires pthread")
# Sometimes, rt won't be available unless you also link against
# pthreads. In this case, the NEED_LIBRT test will fail, because
# check_library_exists isn't going to build the C file with the
# pthread file, and the build will fail, setting NEED_LIBRT to
# false (this is TOTALLY BOGUS, this situation should be an error
# situation, not a "oh, I guess rt is not supported", but it's
# not too easy to distinguish between the two situations). So,
# if it fails, we try again, but this time also with a dependency
# on pthread. If it succeeds this time, we know we not only need
# an rt dependency, but we also need pthread.
# Sometimes, rt won't be available unless you also link against pthreads. In
# this case, the NEED_LIBRT test will fail, because check_library_exists
# isn't going to build the C file with the pthread file, and the build will
# fail, setting NEED_LIBRT to false (this is TOTALLY BOGUS, this situation
# should be an error situation, not a "oh, I guess rt is not supported", but
# it's not too easy to distinguish between the two situations). So, if it
# fails, we try again, but this time also with a dependency on pthread. If
# it succeeds this time, we know we not only need an rt dependency, but we
# also need pthread.
#
# BTW, this test looks for shm_open, because that's what we
# really care about (not clock_gettime). I didn't change the
# site above though in case there was a reason we were testing
# against clock_gettime. In principle, the choice of symbol you
# test for shouldn't matter.
# BTW, this test looks for shm_open, because that's what we really care
# about (not clock_gettime). I didn't change the site above though in case
# there was a reason we were testing against clock_gettime. In principle,
# the choice of symbol you test for shouldn't matter.
set(CMAKE_REQUIRED_LIBRARIES Threads::Threads)
check_library_exists(rt shm_open "sys/mman.h" NEED_RT_AND_PTHREAD)
unset(CMAKE_REQUIRED_LIBRARIES)
@ -66,8 +67,8 @@ else()
# we need to link directly to c10 here otherwise we miss symbols
target_link_libraries(torch_shm_manager PRIVATE shm c10)
endif()
set_target_properties(torch_shm_manager PROPERTIES
INSTALL_RPATH "${_rpath_portable_origin}/../lib")
set_target_properties(
torch_shm_manager PROPERTIES INSTALL_RPATH "${_rpath_portable_origin}/../lib")
install(TARGETS shm LIBRARY DESTINATION ${LIBSHM_INSTALL_LIB_SUBDIR})
install(FILES libshm.h DESTINATION "include")

View File

@ -1,27 +1,32 @@
if(NOT LIBSHM_INSTALL_LIB_SUBDIR)
set(LIBSHM_INSTALL_BIN_SUBDIR "bin" CACHE PATH "libshm install binary directory")
set(LIBSHM_INSTALL_LIB_SUBDIR "lib" CACHE PATH "libshm install library directory")
set(LIBSHM_INSTALL_BIN_SUBDIR
"bin"
CACHE PATH "libshm install binary directory")
set(LIBSHM_INSTALL_LIB_SUBDIR
"lib"
CACHE PATH "libshm install library directory")
endif()
add_library(shm SHARED core.cpp)
target_compile_definitions(shm PRIVATE
"_CRT_SECURE_NO_DEPRECATE=1"
"SHM_EXPORTS"
)
target_compile_definitions(shm PRIVATE "_CRT_SECURE_NO_DEPRECATE=1"
"SHM_EXPORTS")
target_include_directories(shm PRIVATE
${CMAKE_BINARY_DIR}/aten/src # provides "ATen/TypeExtendedInterface.h" to ATen.h
${TORCH_ROOT}/torch/lib # provides "libshm/libshm.h"
${CMAKE_CURRENT_SOURCE_DIR}
)
target_include_directories(
shm
PRIVATE ${CMAKE_BINARY_DIR}/aten/src # provides "ATen/TypeExtendedInterface.h"
# to ATen.h
${TORCH_ROOT}/torch/lib # provides "libshm/libshm.h"
${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(shm torch c10)
install(TARGETS shm DESTINATION "${LIBSHM_INSTALL_LIB_SUBDIR}")
install(FILES libshm.h DESTINATION "include")
if(MSVC AND BUILD_SHARED_LIBS)
install(FILES $<TARGET_PDB_FILE:shm> DESTINATION "${LIBSHM_INSTALL_LIB_SUBDIR}" OPTIONAL)
install(
FILES $<TARGET_PDB_FILE:shm>
DESTINATION "${LIBSHM_INSTALL_LIB_SUBDIR}"
OPTIONAL)
endif()