mirror of
https://github.com/pytorch/pytorch.git
synced 2025-11-01 04:54:55 +08:00
Update
[ghstack-poisoned]
This commit is contained in:
110
CMakeLists.txt
110
CMakeLists.txt
@ -246,7 +246,8 @@ cmake_dependent_option(USE_STATIC_CUDNN "Use cuDNN static libraries" OFF
|
||||
"USE_CUDNN" OFF)
|
||||
cmake_dependent_option(USE_CUSPARSELT "Use cuSPARSELt" ON "USE_CUDA" OFF)
|
||||
cmake_dependent_option(USE_CUDSS "Use cuDSS" ON "USE_CUDA" OFF)
|
||||
# USE_ROCM is guarded against in Dependencies.cmake because USE_ROCM is not properly defined here
|
||||
# USE_ROCM is guarded against in Dependencies.cmake because USE_ROCM is not
|
||||
# properly defined here
|
||||
cmake_dependent_option(USE_CUFILE "Use cuFile" ON "USE_CUDA AND NOT WIN32" OFF)
|
||||
option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" ON)
|
||||
option(USE_KINETO "Use Kineto profiling library" ON)
|
||||
@ -262,8 +263,7 @@ option(USE_NATIVE_ARCH "Use -march=native" OFF)
|
||||
cmake_dependent_option(USE_MPS "Use MPS for macOS build" ON "MPS_FOUND" OFF)
|
||||
cmake_dependent_option(USE_NCCL "Use NCCL" ON
|
||||
"USE_CUDA OR USE_ROCM;UNIX;NOT APPLE" OFF)
|
||||
cmake_dependent_option(USE_XCCL "Use XCCL" ON
|
||||
"USE_XPU;UNIX;NOT APPLE" OFF)
|
||||
cmake_dependent_option(USE_XCCL "Use XCCL" ON "USE_XPU;UNIX;NOT APPLE" OFF)
|
||||
cmake_dependent_option(USE_RCCL "Use RCCL" ON USE_NCCL OFF)
|
||||
cmake_dependent_option(USE_STATIC_NCCL "Use static NCCL" OFF "USE_NCCL" OFF)
|
||||
cmake_dependent_option(USE_SYSTEM_NCCL "Use system-wide NCCL" OFF "USE_NCCL"
|
||||
@ -287,10 +287,9 @@ option(USE_PROF "Use profiling" OFF)
|
||||
option(USE_PYTORCH_QNNPACK "Use ATen/QNNPACK (quantized 8-bit operators)" ON)
|
||||
option(USE_SNPE "Use Qualcomm's SNPE library" OFF)
|
||||
option(USE_SYSTEM_EIGEN_INSTALL
|
||||
"Use system Eigen instead of the one under third_party" OFF)
|
||||
cmake_dependent_option(
|
||||
USE_VALGRIND "Use Valgrind. Only available on Linux." ON
|
||||
"LINUX" OFF)
|
||||
"Use system Eigen instead of the one under third_party" OFF)
|
||||
cmake_dependent_option(USE_VALGRIND "Use Valgrind. Only available on Linux." ON
|
||||
"LINUX" OFF)
|
||||
|
||||
if(NOT DEFINED USE_VULKAN)
|
||||
cmake_dependent_option(USE_VULKAN "Use Vulkan GPU backend" ON "ANDROID" OFF)
|
||||
@ -298,9 +297,8 @@ endif()
|
||||
|
||||
option(USE_SOURCE_DEBUG_ON_MOBILE "Enable" ON)
|
||||
option(USE_LITE_INTERPRETER_PROFILER "Enable" ON)
|
||||
cmake_dependent_option(
|
||||
USE_LITE_AOTI "Include AOTI sources" OFF
|
||||
"BUILD_LITE_INTERPRETER" OFF)
|
||||
cmake_dependent_option(USE_LITE_AOTI "Include AOTI sources" OFF
|
||||
"BUILD_LITE_INTERPRETER" OFF)
|
||||
option(USE_VULKAN_FP16_INFERENCE "Vulkan - Use fp16 inference" OFF)
|
||||
option(USE_VULKAN_RELAXED_PRECISION
|
||||
"Vulkan - Use relaxed precision math in the kernels (mediump)" OFF)
|
||||
@ -333,25 +331,27 @@ cmake_dependent_option(USE_SYSTEM_UCC "Use system-wide UCC" OFF "USE_UCC" OFF)
|
||||
cmake_dependent_option(USE_C10D_UCC "USE C10D UCC" ON "USE_DISTRIBUTED;USE_UCC"
|
||||
OFF)
|
||||
cmake_dependent_option(
|
||||
USE_GLOO "Use Gloo. Only available if USE_DISTRIBUTED is on." ON
|
||||
"USE_DISTRIBUTED" OFF)
|
||||
USE_GLOO "Use Gloo. Only available if USE_DISTRIBUTED is on." ON
|
||||
"USE_DISTRIBUTED" OFF)
|
||||
cmake_dependent_option(
|
||||
USE_GLOO_WITH_OPENSSL "Use Gloo with OpenSSL. Only available if USE_GLOO is on." OFF
|
||||
"USE_GLOO AND LINUX AND NOT INTERN_BUILD_MOBILE" OFF)
|
||||
USE_GLOO_WITH_OPENSSL
|
||||
"Use Gloo with OpenSSL. Only available if USE_GLOO is on." OFF
|
||||
"USE_GLOO AND LINUX AND NOT INTERN_BUILD_MOBILE" OFF)
|
||||
cmake_dependent_option(
|
||||
USE_GLOO_IBVERBS "Use Gloo with ibverbs backend. Only available if USE_GLOO is on." OFF
|
||||
"USE_GLOO AND LINUX AND NOT INTERN_BUILD_MOBILE" OFF)
|
||||
USE_GLOO_IBVERBS
|
||||
"Use Gloo with ibverbs backend. Only available if USE_GLOO is on." OFF
|
||||
"USE_GLOO AND LINUX AND NOT INTERN_BUILD_MOBILE" OFF)
|
||||
cmake_dependent_option(USE_C10D_GLOO "USE C10D GLOO" ON
|
||||
"USE_DISTRIBUTED;USE_GLOO" OFF)
|
||||
cmake_dependent_option(USE_C10D_NCCL "USE C10D NCCL" ON
|
||||
"USE_DISTRIBUTED;USE_NCCL" OFF)
|
||||
cmake_dependent_option(USE_C10D_XCCL "USE C10D XCCL" ON
|
||||
"USE_DISTRIBUTED;USE_XCCL" OFF)
|
||||
cmake_dependent_option(USE_C10D_MPI "USE C10D MPI" ON "USE_DISTRIBUTED;USE_MPI"
|
||||
OFF)
|
||||
cmake_dependent_option(
|
||||
USE_C10D_GLOO "USE C10D GLOO" ON "USE_DISTRIBUTED;USE_GLOO" OFF)
|
||||
cmake_dependent_option(
|
||||
USE_C10D_NCCL "USE C10D NCCL" ON "USE_DISTRIBUTED;USE_NCCL" OFF)
|
||||
cmake_dependent_option(
|
||||
USE_C10D_XCCL "USE C10D XCCL" ON "USE_DISTRIBUTED;USE_XCCL" OFF)
|
||||
cmake_dependent_option(
|
||||
USE_C10D_MPI "USE C10D MPI" ON "USE_DISTRIBUTED;USE_MPI" OFF)
|
||||
cmake_dependent_option(
|
||||
USE_TENSORPIPE "Use TensorPipe. Only available if USE_DISTRIBUTED is on." ON
|
||||
"USE_DISTRIBUTED AND NOT WIN32" OFF)
|
||||
USE_TENSORPIPE "Use TensorPipe. Only available if USE_DISTRIBUTED is on." ON
|
||||
"USE_DISTRIBUTED AND NOT WIN32" OFF)
|
||||
option(ONNX_ML "Enable traditional ONNX ML API." ON)
|
||||
option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" OFF)
|
||||
option(BUILD_LIBTORCH_CPU_WITH_DEBUG
|
||||
@ -373,8 +373,9 @@ cmake_dependent_option(
|
||||
cmake_dependent_option(BUILD_FUNCTORCH "Build Functorch" ON "BUILD_PYTHON" OFF)
|
||||
cmake_dependent_option(BUILD_BUNDLE_PTXAS "Bundle PTX into torch/bin fodler"
|
||||
OFF "USE_CUDA" OFF)
|
||||
cmake_dependent_option(USE_KLEIDIAI "Use KleidiAI for the ARM CPU & AARCH64 architecture." ON
|
||||
"CPU_AARCH64" OFF)
|
||||
cmake_dependent_option(
|
||||
USE_KLEIDIAI "Use KleidiAI for the ARM CPU & AARCH64 architecture." ON
|
||||
"CPU_AARCH64" OFF)
|
||||
|
||||
option(USE_MIMALLOC "Use mimalloc" OFF)
|
||||
# Enable third party mimalloc library to improve memory allocation performance
|
||||
@ -384,10 +385,9 @@ if(WIN32)
|
||||
set(USE_MIMALLOC ON)
|
||||
|
||||
# Not enable USE_MIMALLOC_ON_MKL due to it caused issue:
|
||||
# https://github.com/pytorch/pytorch/issues/138994
|
||||
# Will turn on when we can fix USE_STATIC_MKL lost functionality:
|
||||
# https://github.com/pytorch/pytorch/pull/138996
|
||||
# set(USE_MIMALLOC_ON_MKL ON)
|
||||
# https://github.com/pytorch/pytorch/issues/138994 Will turn on when we can
|
||||
# fix USE_STATIC_MKL lost functionality:
|
||||
# https://github.com/pytorch/pytorch/pull/138996 set(USE_MIMALLOC_ON_MKL ON)
|
||||
endif()
|
||||
|
||||
if(USE_CCACHE)
|
||||
@ -543,8 +543,7 @@ if(LINUX)
|
||||
string(STRIP "${ENV_LDFLAGS}" ENV_LDFLAGS)
|
||||
# Do not append linker flags passed via env var if they already there
|
||||
if(NOT ${CMAKE_SHARED_LINKER_FLAGS} MATCHES "${ENV_LDFLAGS}")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS
|
||||
"${CMAKE_SHARED_LINKER_FLAGS} ${ENV_LDFLAGS}")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${ENV_LDFLAGS}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -698,10 +697,10 @@ if(ANDROID
|
||||
endif()
|
||||
|
||||
if(USE_KLEIDIAI AND CMAKE_C_COMPILER_VERSION)
|
||||
if(CMAKE_C_COMPILER_VERSION VERSION_LESS 11)
|
||||
set(USE_KLEIDIAI OFF)
|
||||
message(WARNING "Disabling KleidiAI: Requires at least GCC 11 or Clang 11")
|
||||
endif()
|
||||
if(CMAKE_C_COMPILER_VERSION VERSION_LESS 11)
|
||||
set(USE_KLEIDIAI OFF)
|
||||
message(WARNING "Disabling KleidiAI: Requires at least GCC 11 or Clang 11")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# INTERN_BUILD_ATEN_OPS is used to control whether to build ATen/TH operators.
|
||||
@ -835,7 +834,7 @@ include(ExternalProject)
|
||||
# ---[ Dependencies ---[ FBGEMM doesn't work on x86 32bit and
|
||||
# CMAKE_SYSTEM_PROCESSOR thinks its 64bit
|
||||
if(USE_FBGEMM
|
||||
AND((CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND CMAKE_SIZEOF_VOID_P EQUAL
|
||||
AND ((CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND CMAKE_SIZEOF_VOID_P EQUAL
|
||||
4)
|
||||
OR CMAKE_SYSTEM_PROCESSOR STREQUAL "x86"))
|
||||
set(USE_FBGEMM OFF)
|
||||
@ -877,8 +876,10 @@ cmake_dependent_option(
|
||||
cmake_dependent_option(
|
||||
USE_MEM_EFF_ATTENTION
|
||||
"Enable memory-efficient attention for scaled dot product attention.\
|
||||
Will be disabled if not supported by the platform" ON
|
||||
"USE_CUDA OR USE_ROCM" OFF)
|
||||
Will be disabled if not supported by the platform"
|
||||
ON
|
||||
"USE_CUDA OR USE_ROCM"
|
||||
OFF)
|
||||
|
||||
#
|
||||
# Cannot be put into Dependencies.cmake due circular dependency:
|
||||
@ -910,7 +911,8 @@ if(USE_PYTORCH_QNNPACK)
|
||||
endif()
|
||||
|
||||
# Enable sleef on macOS with Apple silicon by default
|
||||
if((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") AND ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "arm64"))
|
||||
if((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") AND ("${CMAKE_SYSTEM_PROCESSOR}"
|
||||
STREQUAL "arm64"))
|
||||
message(STATUS "Running on macOS with Apple silicon")
|
||||
string(APPEND CMAKE_CXX_FLAGS " -DAT_BUILD_ARM_VEC256_WITH_SLEEF")
|
||||
add_definitions(-DAT_BUILD_ARM_VEC256_WITH_SLEEF)
|
||||
@ -918,12 +920,11 @@ endif()
|
||||
|
||||
# Enable sleef on Arm(R) architecture by default (except Android)
|
||||
if((NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Android")
|
||||
AND("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "aarch64"))
|
||||
AND ("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "aarch64"))
|
||||
string(APPEND CMAKE_CXX_FLAGS " -DAT_BUILD_ARM_VEC256_WITH_SLEEF")
|
||||
add_definitions(-DAT_BUILD_ARM_VEC256_WITH_SLEEF)
|
||||
endif()
|
||||
|
||||
|
||||
if(USE_XNNPACK)
|
||||
string(APPEND CMAKE_CXX_FLAGS " -DUSE_XNNPACK")
|
||||
endif()
|
||||
@ -1073,9 +1074,13 @@ if(NOT MSVC)
|
||||
endif()
|
||||
append_cxx_flag_if_supported("-Wno-maybe-uninitialized" CMAKE_CXX_FLAGS)
|
||||
append_cxx_flag_if_supported("-fstandalone-debug" CMAKE_CXX_FLAGS_DEBUG)
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" AND CMAKE_CXX_COMPILER_ID MATCHES "GNU")
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" AND CMAKE_CXX_COMPILER_ID MATCHES
|
||||
"GNU")
|
||||
if(CMAKE_BUILD_TYPE MATCHES Debug)
|
||||
message(Warning "Applying -Og optimization for aarch64 GCC debug build to workaround ICE")
|
||||
message(
|
||||
Warning
|
||||
"Applying -Og optimization for aarch64 GCC debug build to workaround ICE"
|
||||
)
|
||||
endif()
|
||||
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-omit-frame-pointer -Og")
|
||||
string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fno-omit-frame-pointer -Og")
|
||||
@ -1086,9 +1091,11 @@ if(NOT MSVC)
|
||||
append_cxx_flag_if_supported("-fno-math-errno" CMAKE_CXX_FLAGS)
|
||||
append_cxx_flag_if_supported("-fno-trapping-math" CMAKE_CXX_FLAGS)
|
||||
append_cxx_flag_if_supported("-Werror=format" CMAKE_CXX_FLAGS)
|
||||
if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 13)
|
||||
if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION
|
||||
VERSION_GREATER_EQUAL 13)
|
||||
append_cxx_flag_if_supported("-Wno-dangling-reference" CMAKE_CXX_FLAGS)
|
||||
append_cxx_flag_if_supported("-Wno-error=dangling-reference" CMAKE_CXX_FLAGS)
|
||||
append_cxx_flag_if_supported("-Wno-error=dangling-reference"
|
||||
CMAKE_CXX_FLAGS)
|
||||
endif()
|
||||
else()
|
||||
# Define export functions for AOTI.
|
||||
@ -1241,7 +1248,7 @@ if(USE_MIMALLOC AND USE_MIMALLOC_ON_MKL)
|
||||
endif()
|
||||
|
||||
# ---[ Main build
|
||||
add_subdirectory(torch/headeronly) # headeronly headers
|
||||
add_subdirectory(torch/headeronly) # headeronly headers
|
||||
add_subdirectory(c10)
|
||||
add_subdirectory(caffe2)
|
||||
|
||||
@ -1260,7 +1267,7 @@ endif()
|
||||
# access to Caffe2.
|
||||
|
||||
if((NOT USE_GLOG)
|
||||
OR(NOT USE_GFLAGS)
|
||||
OR (NOT USE_GFLAGS)
|
||||
OR BUILD_CUSTOM_PROTOBUF)
|
||||
message(WARNING "Generated cmake files are only fully tested if one builds "
|
||||
"with system glog, gflags, and protobuf. Other settings may "
|
||||
@ -1368,7 +1375,8 @@ if(DEFINED USE_CUSTOM_DEBINFO)
|
||||
# care about and caffe2/ for all test targets defined there
|
||||
if(BUILD_LIBTORCHLESS)
|
||||
caffe2_update_option(USE_CUDA OFF)
|
||||
set(ALL_PT_TARGETS "torch_python;${C10_LIB};${TORCH_CPU_LIB};${TORCH_LIB}")
|
||||
set(ALL_PT_TARGETS
|
||||
"torch_python;${C10_LIB};${TORCH_CPU_LIB};${TORCH_LIB}")
|
||||
else()
|
||||
# @todo test if we can remove this
|
||||
set(ALL_PT_TARGETS "torch_python;c10;torch_cpu;torch")
|
||||
|
||||
@ -1,8 +1,7 @@
|
||||
cmake_minimum_required(VERSION 3.5)
|
||||
option(BUILD_LITE_INTERPRETER "Master flag to build pytorch_jni_lite" ON)
|
||||
message(
|
||||
STATUS
|
||||
"BUILD_LITE_INTERPRETER (pytorch_jni_lite): ${BUILD_LITE_INTERPRETER}")
|
||||
STATUS "BUILD_LITE_INTERPRETER (pytorch_jni_lite): ${BUILD_LITE_INTERPRETER}")
|
||||
|
||||
if(BUILD_LITE_INTERPRETER)
|
||||
project(pytorch_jni_lite CXX)
|
||||
@ -14,7 +13,10 @@ endif()
|
||||
|
||||
include(GNUInstallDirs)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.")
|
||||
set(CMAKE_CXX_STANDARD
|
||||
17
|
||||
CACHE STRING
|
||||
"The C++ standard whose features are requested to build this target.")
|
||||
set(CMAKE_VERBOSE_MAKEFILE ON)
|
||||
message(STATUS "ANDROID_STL:${ANDROID_STL}")
|
||||
|
||||
@ -35,15 +37,18 @@ set(pytorch_android_DIR ${CMAKE_CURRENT_LIST_DIR}/src/main/cpp)
|
||||
|
||||
if(ANDROID_ABI)
|
||||
set(USE_VULKAN ON)
|
||||
set(libtorch_include_DIR ${pytorch_android_DIR}/libtorch_include/${ANDROID_ABI})
|
||||
set(libtorch_include_DIR
|
||||
${pytorch_android_DIR}/libtorch_include/${ANDROID_ABI})
|
||||
set(BUILD_SUBDIR ${ANDROID_ABI})
|
||||
elseif(BUILD_LIBTORCH_WITH_JNI)
|
||||
# Don't need LIBTORCH_HOME if we're building from within PyTorch.
|
||||
else()
|
||||
# Building against a pre-built libtorch.
|
||||
if(NOT LIBTORCH_HOME)
|
||||
message(FATAL_ERROR
|
||||
"pytorch_android requires LIBTORCH_HOME to be defined for non-Android builds.")
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"pytorch_android requires LIBTORCH_HOME to be defined for non-Android builds."
|
||||
)
|
||||
endif()
|
||||
set(libtorch_include_DIR ${LIBTORCH_HOME}/include)
|
||||
link_directories(${LIBTORCH_HOME}/lib)
|
||||
@ -52,39 +57,32 @@ endif()
|
||||
|
||||
message(STATUS "libtorch dir:${libtorch_DIR}")
|
||||
|
||||
configure_file(
|
||||
${pytorch_android_DIR}/cmake_macros.h.in
|
||||
${pytorch_android_DIR}/cmake_macros.h)
|
||||
|
||||
configure_file(${pytorch_android_DIR}/cmake_macros.h.in
|
||||
${pytorch_android_DIR}/cmake_macros.h)
|
||||
|
||||
if(BUILD_LITE_INTERPRETER)
|
||||
file(GLOB pytorch_android_SOURCES
|
||||
${pytorch_android_DIR}/pytorch_jni_lite.cpp
|
||||
${pytorch_android_DIR}/pytorch_jni_common.cpp
|
||||
${pytorch_android_DIR}/pytorch_jni_common.h
|
||||
)
|
||||
file(GLOB pytorch_android_SOURCES ${pytorch_android_DIR}/pytorch_jni_lite.cpp
|
||||
${pytorch_android_DIR}/pytorch_jni_common.cpp
|
||||
${pytorch_android_DIR}/pytorch_jni_common.h)
|
||||
else()
|
||||
file(GLOB pytorch_android_SOURCES
|
||||
${pytorch_android_DIR}/pytorch_jni_jit.cpp
|
||||
${pytorch_android_DIR}/pytorch_jni_common.cpp
|
||||
${pytorch_android_DIR}/pytorch_jni_common.h
|
||||
)
|
||||
file(GLOB pytorch_android_SOURCES ${pytorch_android_DIR}/pytorch_jni_jit.cpp
|
||||
${pytorch_android_DIR}/pytorch_jni_common.cpp
|
||||
${pytorch_android_DIR}/pytorch_jni_common.h)
|
||||
endif()
|
||||
add_library(${PYTORCH_JNI_TARGET} SHARED ${pytorch_android_SOURCES})
|
||||
|
||||
if(APPLE)
|
||||
# Need to add rpath so dlopen can find dependencies.
|
||||
add_custom_command(TARGET pytorch_jni
|
||||
POST_BUILD COMMAND
|
||||
${CMAKE_INSTALL_NAME_TOOL} -add_rpath "@loader_path"
|
||||
$<TARGET_FILE:pytorch_jni>)
|
||||
add_custom_command(
|
||||
TARGET pytorch_jni
|
||||
POST_BUILD
|
||||
COMMAND ${CMAKE_INSTALL_NAME_TOOL} -add_rpath "@loader_path"
|
||||
$<TARGET_FILE:pytorch_jni>)
|
||||
endif()
|
||||
|
||||
target_compile_options(${PYTORCH_JNI_TARGET} PRIVATE
|
||||
-fexceptions
|
||||
)
|
||||
target_compile_options(${PYTORCH_JNI_TARGET} PRIVATE -fexceptions)
|
||||
target_include_directories(${PYTORCH_JNI_TARGET} BEFORE
|
||||
PUBLIC $<BUILD_INTERFACE:${libtorch_include_DIR}>)
|
||||
PUBLIC $<BUILD_INTERFACE:${libtorch_include_DIR}>)
|
||||
|
||||
set(fbjni_DIR ${CMAKE_CURRENT_LIST_DIR}/../libs/fbjni/)
|
||||
set(fbjni_BUILD_DIR ${CMAKE_BINARY_DIR}/fbjni/${BUILD_SUBDIR})
|
||||
@ -102,8 +100,9 @@ if(ANDROID_ABI)
|
||||
function(import_static_lib name)
|
||||
add_library(${name} STATIC IMPORTED)
|
||||
set_property(
|
||||
TARGET ${name}
|
||||
PROPERTY IMPORTED_LOCATION
|
||||
TARGET ${name}
|
||||
PROPERTY
|
||||
IMPORTED_LOCATION
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/main/jniLibs/${ANDROID_ABI}/${name}.a)
|
||||
endfunction(import_static_lib)
|
||||
|
||||
@ -135,17 +134,10 @@ if(ANDROID_ABI)
|
||||
libpthreadpool
|
||||
libeigen_blas
|
||||
libcpuinfo
|
||||
libclog
|
||||
)
|
||||
libclog)
|
||||
else()
|
||||
# Prefer dynamic linking on the host
|
||||
set(pytorch_jni_LIBS
|
||||
fbjni
|
||||
torch
|
||||
torch_cpu
|
||||
c10
|
||||
cpuinfo
|
||||
)
|
||||
set(pytorch_jni_LIBS fbjni torch torch_cpu c10 cpuinfo)
|
||||
|
||||
if(USE_NNPACK)
|
||||
list(APPEND pytorch_jni_LIBS nnpack)
|
||||
@ -173,12 +165,16 @@ endif()
|
||||
|
||||
target_link_libraries(${PYTORCH_JNI_TARGET} ${pytorch_jni_LIBS})
|
||||
|
||||
install(TARGETS ${PYTORCH_JNI_TARGET}
|
||||
install(
|
||||
TARGETS ${PYTORCH_JNI_TARGET}
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) #For windows
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) # For windows
|
||||
|
||||
if(MSVC)
|
||||
install(FILES $<TARGET_PDB_FILE:pytorch_jni> DESTINATION ${CMAKE_INSTALL_LIBDIR} OPTIONAL)
|
||||
install(
|
||||
FILES $<TARGET_PDB_FILE:pytorch_jni>
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
OPTIONAL)
|
||||
install(TARGETS ${PYTORCH_JNI_TARGET} DESTINATION ${CMAKE_INSTALL_LIBDIR})
|
||||
endif()
|
||||
|
||||
@ -1,21 +1,19 @@
|
||||
cmake_minimum_required(VERSION 3.5)
|
||||
project(pytorch_vision_jni CXX)
|
||||
set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.")
|
||||
set(CMAKE_CXX_STANDARD
|
||||
17
|
||||
CACHE STRING
|
||||
"The C++ standard whose features are requested to build this target.")
|
||||
set(CMAKE_VERBOSE_MAKEFILE ON)
|
||||
|
||||
set(pytorch_vision_cpp_DIR ${CMAKE_CURRENT_LIST_DIR}/src/main/cpp)
|
||||
|
||||
file(GLOB pytorch_vision_SOURCES
|
||||
${pytorch_vision_cpp_DIR}/pytorch_vision_jni.cpp
|
||||
)
|
||||
${pytorch_vision_cpp_DIR}/pytorch_vision_jni.cpp)
|
||||
|
||||
add_library(pytorch_vision_jni SHARED
|
||||
${pytorch_vision_SOURCES}
|
||||
)
|
||||
add_library(pytorch_vision_jni SHARED ${pytorch_vision_SOURCES})
|
||||
|
||||
target_compile_options(pytorch_vision_jni PRIVATE
|
||||
-fexceptions
|
||||
)
|
||||
target_compile_options(pytorch_vision_jni PRIVATE -fexceptions)
|
||||
|
||||
set(BUILD_SUBDIR ${ANDROID_ABI})
|
||||
|
||||
|
||||
@ -5,17 +5,17 @@ endif()
|
||||
# Find modules
|
||||
if(NOT INTERN_BUILD_MOBILE)
|
||||
list(APPEND CMAKE_MODULE_PATH /usr/lib/x86_64-linux-gnu/)
|
||||
list(APPEND CMAKE_LIBRARY_PATH /usr/lib/x86_64-linux-gnu/ /usr/lib/aarch64-linux-gnu/)
|
||||
list(APPEND CMAKE_LIBRARY_PATH /usr/lib/x86_64-linux-gnu/
|
||||
/usr/lib/aarch64-linux-gnu/)
|
||||
endif()
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../cmake/Modules
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../cmake/public
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../cmake/Modules_CUDA_fix)
|
||||
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../cmake/Modules
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../cmake/public
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../cmake/Modules_CUDA_fix)
|
||||
|
||||
cmake_policy(SET CMP0012 NEW)
|
||||
|
||||
#############################################
|
||||
# ##############################################################################
|
||||
|
||||
set(ATen_CPU_SRCS)
|
||||
set(ATen_MTIA_SRCS)
|
||||
@ -47,20 +47,25 @@ set(ATen_CUDA_DEPENDENCY_LIBS)
|
||||
set(ATen_HIP_DEPENDENCY_LIBS)
|
||||
set(ATen_PUBLIC_CUDA_DEPENDENCY_LIBS)
|
||||
set(ATen_PUBLIC_HIP_DEPENDENCY_LIBS)
|
||||
set(ATEN_INSTALL_BIN_SUBDIR "bin" CACHE PATH "ATen install binary subdirectory")
|
||||
set(ATEN_INSTALL_LIB_SUBDIR "lib" CACHE PATH "ATen install library subdirectory")
|
||||
set(ATEN_INSTALL_INCLUDE_SUBDIR "include" CACHE PATH "ATen install include subdirectory")
|
||||
set(ATEN_INSTALL_BIN_SUBDIR
|
||||
"bin"
|
||||
CACHE PATH "ATen install binary subdirectory")
|
||||
set(ATEN_INSTALL_LIB_SUBDIR
|
||||
"lib"
|
||||
CACHE PATH "ATen install library subdirectory")
|
||||
set(ATEN_INSTALL_INCLUDE_SUBDIR
|
||||
"include"
|
||||
CACHE PATH "ATen install include subdirectory")
|
||||
set(MEM_EFF_ATTENTION_CUDA_SOURCES)
|
||||
|
||||
set(TH_LINK_STYLE STATIC)
|
||||
set(TH_CPU_INCLUDE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src
|
||||
${CMAKE_CURRENT_BINARY_DIR}/src
|
||||
${CMAKE_BINARY_DIR}/aten/src)
|
||||
set(TH_CPU_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/src
|
||||
${CMAKE_CURRENT_BINARY_DIR}/src ${CMAKE_BINARY_DIR}/aten/src)
|
||||
list(APPEND ATen_CPU_INCLUDE ${TH_CPU_INCLUDE})
|
||||
|
||||
if(USE_VULKAN)
|
||||
list(APPEND ATen_CPU_INCLUDE ${CMAKE_BINARY_DIR}/vulkan ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/VulkanMemoryAllocator)
|
||||
list(APPEND ATen_CPU_INCLUDE ${CMAKE_BINARY_DIR}/vulkan
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/VulkanMemoryAllocator)
|
||||
endif()
|
||||
|
||||
# Find the HIP package, set the HIP paths, load the HIP CMake.
|
||||
@ -73,12 +78,15 @@ endif()
|
||||
|
||||
# Both CUDA and ROCM are enabled and found. Report an error.
|
||||
if(USE_CUDA AND USE_ROCM)
|
||||
message(FATAL_ERROR "Both CUDA and ROCm are enabled and found. PyTorch can only be built with either of them. Please turn one off by using either USE_CUDA=OFF or USE_ROCM=OFF.")
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"Both CUDA and ROCm are enabled and found. PyTorch can only be built with either of them. Please turn one off by using either USE_CUDA=OFF or USE_ROCM=OFF."
|
||||
)
|
||||
endif()
|
||||
|
||||
if(USE_ROCM)
|
||||
# TODO: AT_HIP_ENABLED (change this once we represent HIP as HIP in
|
||||
# ATen proper)
|
||||
# TODO: AT_HIP_ENABLED (change this once we represent HIP as HIP in ATen
|
||||
# proper)
|
||||
set(AT_CUDA_ENABLED 1)
|
||||
add_subdirectory(src/THH)
|
||||
message("ROCm is enabled.")
|
||||
@ -109,45 +117,118 @@ else()
|
||||
set(AT_HIPSPARSELT_ENABLED 0)
|
||||
endif()
|
||||
|
||||
list(APPEND ATen_CPU_INCLUDE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src)
|
||||
list(APPEND ATen_CPU_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/src)
|
||||
add_subdirectory(src/ATen)
|
||||
|
||||
# Pass source, includes, and libs to parent
|
||||
set(ATen_CPU_SRCS ${ATen_CPU_SRCS} PARENT_SCOPE)
|
||||
set(ATen_CORE_SRCS ${ATen_CORE_SRCS} PARENT_SCOPE)
|
||||
set(ATen_MTIA_SRCS ${ATen_MTIA_SRCS} PARENT_SCOPE)
|
||||
set(ATen_XPU_SRCS ${ATen_XPU_SRCS} PARENT_SCOPE)
|
||||
set(ATen_XPU_INCLUDE ${ATen_XPU_INCLUDE} PARENT_SCOPE)
|
||||
set(ATen_CUDA_CU_SRCS ${ATen_CUDA_CU_SRCS} PARENT_SCOPE)
|
||||
set(ATen_CUDA_CPP_SRCS ${ATen_CUDA_CPP_SRCS} PARENT_SCOPE)
|
||||
set(ATen_CUDA_LINALG_SRCS ${ATen_CUDA_LINALG_SRCS} PARENT_SCOPE)
|
||||
set(ATen_CUDA_SRCS_W_SORT_BY_KEY ${ATen_CUDA_SRCS_W_SORT_BY_KEY} PARENT_SCOPE)
|
||||
set(ATen_CUDA_CU_SRCS_W_SORT_BY_KEY ${ATen_CUDA_CU_SRCS_W_SORT_BY_KEY} PARENT_SCOPE)
|
||||
set(ATen_HIP_SRCS ${ATen_HIP_SRCS} PARENT_SCOPE)
|
||||
set(ATen_MPS_SRCS ${ATen_MPS_SRCS} PARENT_SCOPE)
|
||||
set(ATen_MPS_TEST_SRCS ${ATen_MPS_TEST_SRCS} PARENT_SCOPE)
|
||||
set(ATen_HIP_SRCS_W_SORT_BY_KEY ${ATen_HIP_SRCS_W_SORT_BY_KEY} PARENT_SCOPE)
|
||||
set(ATen_XPU_SRCS ${ATen_XPU_SRCS} PARENT_SCOPE)
|
||||
set(ATen_XPU_TEST_SRCS ${ATen_XPU_TEST_SRCS} PARENT_SCOPE)
|
||||
set(ATen_NVRTC_STUB_SRCS ${ATen_NVRTC_STUB_SRCS} PARENT_SCOPE)
|
||||
set(ATen_CPU_TEST_SRCS ${ATen_CPU_TEST_SRCS} PARENT_SCOPE)
|
||||
set(ATen_CUDA_TEST_SRCS ${ATen_CUDA_TEST_SRCS} PARENT_SCOPE)
|
||||
set(ATen_HIP_TEST_SRCS ${ATen_HIP_TEST_SRCS} PARENT_SCOPE)
|
||||
set(ATen_VULKAN_TEST_SRCS ${ATen_VULKAN_TEST_SRCS} PARENT_SCOPE)
|
||||
set(ATen_MOBILE_BENCHMARK_SRCS ${ATen_MOBILE_BENCHMARK_SRCS} PARENT_SCOPE)
|
||||
set(ATen_MOBILE_TEST_SRCS ${ATen_MOBILE_TEST_SRCS} PARENT_SCOPE)
|
||||
set(ATen_VEC_TEST_SRCS ${ATen_VEC_TEST_SRCS} PARENT_SCOPE)
|
||||
set(ATen_CPU_INCLUDE ${ATen_CPU_INCLUDE} PARENT_SCOPE)
|
||||
set(ATen_CUDA_INCLUDE ${ATen_CUDA_INCLUDE} PARENT_SCOPE)
|
||||
set(ATen_HIP_INCLUDE ${ATen_HIP_INCLUDE} PARENT_SCOPE)
|
||||
set(ATen_XPU_INCLUDE ${ATen_XPU_INCLUDE} PARENT_SCOPE)
|
||||
set(ATen_THIRD_PARTY_INCLUDE ${ATen_THIRD_PARTY_INCLUDE} PARENT_SCOPE)
|
||||
set(ATen_CPU_DEPENDENCY_LIBS ${ATen_CPU_DEPENDENCY_LIBS} PARENT_SCOPE)
|
||||
set(ATen_XPU_DEPENDENCY_LIBS ${ATen_XPU_DEPENDENCY_LIBS} PARENT_SCOPE)
|
||||
set(ATen_CUDA_DEPENDENCY_LIBS ${ATen_CUDA_DEPENDENCY_LIBS} PARENT_SCOPE)
|
||||
set(ATen_HIP_DEPENDENCY_LIBS ${ATen_HIP_DEPENDENCY_LIBS} PARENT_SCOPE)
|
||||
set(ATen_CORE_TEST_SRCS ${ATen_CORE_TEST_SRCS} PARENT_SCOPE)
|
||||
set(FLASH_ATTENTION_CUDA_SOURCES ${FLASH_ATTENTION_CUDA_SOURCES} PARENT_SCOPE)
|
||||
set(MEM_EFF_ATTENTION_CUDA_SOURCES ${MEM_EFF_ATTENTION_CUDA_SOURCES} PARENT_SCOPE)
|
||||
set(ATen_ATTENTION_KERNEL_SRCS ${ATen_ATTENTION_KERNEL_SRCS} PARENT_SCOPE)
|
||||
set(ATen_CPU_SRCS
|
||||
${ATen_CPU_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_CORE_SRCS
|
||||
${ATen_CORE_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_MTIA_SRCS
|
||||
${ATen_MTIA_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_XPU_SRCS
|
||||
${ATen_XPU_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_XPU_INCLUDE
|
||||
${ATen_XPU_INCLUDE}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_CUDA_CU_SRCS
|
||||
${ATen_CUDA_CU_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_CUDA_CPP_SRCS
|
||||
${ATen_CUDA_CPP_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_CUDA_LINALG_SRCS
|
||||
${ATen_CUDA_LINALG_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_CUDA_SRCS_W_SORT_BY_KEY
|
||||
${ATen_CUDA_SRCS_W_SORT_BY_KEY}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_CUDA_CU_SRCS_W_SORT_BY_KEY
|
||||
${ATen_CUDA_CU_SRCS_W_SORT_BY_KEY}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_HIP_SRCS
|
||||
${ATen_HIP_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_MPS_SRCS
|
||||
${ATen_MPS_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_MPS_TEST_SRCS
|
||||
${ATen_MPS_TEST_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_HIP_SRCS_W_SORT_BY_KEY
|
||||
${ATen_HIP_SRCS_W_SORT_BY_KEY}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_XPU_SRCS
|
||||
${ATen_XPU_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_XPU_TEST_SRCS
|
||||
${ATen_XPU_TEST_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_NVRTC_STUB_SRCS
|
||||
${ATen_NVRTC_STUB_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_CPU_TEST_SRCS
|
||||
${ATen_CPU_TEST_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_CUDA_TEST_SRCS
|
||||
${ATen_CUDA_TEST_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_HIP_TEST_SRCS
|
||||
${ATen_HIP_TEST_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_VULKAN_TEST_SRCS
|
||||
${ATen_VULKAN_TEST_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_MOBILE_BENCHMARK_SRCS
|
||||
${ATen_MOBILE_BENCHMARK_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_MOBILE_TEST_SRCS
|
||||
${ATen_MOBILE_TEST_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_VEC_TEST_SRCS
|
||||
${ATen_VEC_TEST_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_CPU_INCLUDE
|
||||
${ATen_CPU_INCLUDE}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_CUDA_INCLUDE
|
||||
${ATen_CUDA_INCLUDE}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_HIP_INCLUDE
|
||||
${ATen_HIP_INCLUDE}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_XPU_INCLUDE
|
||||
${ATen_XPU_INCLUDE}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_THIRD_PARTY_INCLUDE
|
||||
${ATen_THIRD_PARTY_INCLUDE}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_CPU_DEPENDENCY_LIBS
|
||||
${ATen_CPU_DEPENDENCY_LIBS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_XPU_DEPENDENCY_LIBS
|
||||
${ATen_XPU_DEPENDENCY_LIBS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_CUDA_DEPENDENCY_LIBS
|
||||
${ATen_CUDA_DEPENDENCY_LIBS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_HIP_DEPENDENCY_LIBS
|
||||
${ATen_HIP_DEPENDENCY_LIBS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_CORE_TEST_SRCS
|
||||
${ATen_CORE_TEST_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(FLASH_ATTENTION_CUDA_SOURCES
|
||||
${FLASH_ATTENTION_CUDA_SOURCES}
|
||||
PARENT_SCOPE)
|
||||
set(MEM_EFF_ATTENTION_CUDA_SOURCES
|
||||
${MEM_EFF_ATTENTION_CUDA_SOURCES}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_ATTENTION_KERNEL_SRCS
|
||||
${ATen_ATTENTION_KERNEL_SRCS}
|
||||
PARENT_SCOPE)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,15 +1,15 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates.
|
||||
# All rights reserved.
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the BSD-style license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
# This source code is licensed under the BSD-style license found in the LICENSE
|
||||
# file in the root directory of this source tree.
|
||||
|
||||
cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR)
|
||||
|
||||
project(cpuinfo-download NONE)
|
||||
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(cpuinfo
|
||||
ExternalProject_Add(
|
||||
cpuinfo
|
||||
GIT_REPOSITORY https://github.com/pytorch/cpuinfo.git
|
||||
GIT_TAG master
|
||||
SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/cpuinfo"
|
||||
@ -17,5 +17,4 @@ ExternalProject_Add(cpuinfo
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND ""
|
||||
INSTALL_COMMAND ""
|
||||
TEST_COMMAND ""
|
||||
)
|
||||
TEST_COMMAND "")
|
||||
|
||||
@ -1,15 +1,15 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates.
|
||||
# All rights reserved.
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the BSD-style license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
# This source code is licensed under the BSD-style license found in the LICENSE
|
||||
# file in the root directory of this source tree.
|
||||
|
||||
cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR)
|
||||
|
||||
project(fp16-download NONE)
|
||||
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(fp16
|
||||
ExternalProject_Add(
|
||||
fp16
|
||||
GIT_REPOSITORY https://github.com/Maratyszcza/FP16.git
|
||||
GIT_TAG master
|
||||
SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/fp16"
|
||||
@ -17,5 +17,4 @@ ExternalProject_Add(fp16
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND ""
|
||||
INSTALL_COMMAND ""
|
||||
TEST_COMMAND ""
|
||||
)
|
||||
TEST_COMMAND "")
|
||||
|
||||
@ -1,15 +1,15 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates.
|
||||
# All rights reserved.
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the BSD-style license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
# This source code is licensed under the BSD-style license found in the LICENSE
|
||||
# file in the root directory of this source tree.
|
||||
|
||||
cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR)
|
||||
|
||||
project(fxdiv-download NONE)
|
||||
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(fxdiv
|
||||
ExternalProject_Add(
|
||||
fxdiv
|
||||
GIT_REPOSITORY https://github.com/Maratyszcza/FXdiv.git
|
||||
GIT_TAG master
|
||||
SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/fxdiv"
|
||||
@ -17,5 +17,4 @@ ExternalProject_Add(fxdiv
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND ""
|
||||
INSTALL_COMMAND ""
|
||||
TEST_COMMAND ""
|
||||
)
|
||||
TEST_COMMAND "")
|
||||
|
||||
@ -1,21 +1,21 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates.
|
||||
# All rights reserved.
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the BSD-style license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
# This source code is licensed under the BSD-style license found in the LICENSE
|
||||
# file in the root directory of this source tree.
|
||||
|
||||
cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR)
|
||||
|
||||
project(googlebenchmark-download NONE)
|
||||
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(googlebenchmark
|
||||
ExternalProject_Add(
|
||||
googlebenchmark
|
||||
URL https://github.com/google/benchmark/archive/v1.4.1.zip
|
||||
URL_HASH SHA256=61ae07eb5d4a0b02753419eb17a82b7d322786bb36ab62bd3df331a4d47c00a7
|
||||
URL_HASH
|
||||
SHA256=61ae07eb5d4a0b02753419eb17a82b7d322786bb36ab62bd3df331a4d47c00a7
|
||||
SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googlebenchmark"
|
||||
BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark"
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND ""
|
||||
INSTALL_COMMAND ""
|
||||
TEST_COMMAND ""
|
||||
)
|
||||
TEST_COMMAND "")
|
||||
|
||||
@ -1,21 +1,21 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates.
|
||||
# All rights reserved.
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the BSD-style license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
# This source code is licensed under the BSD-style license found in the LICENSE
|
||||
# file in the root directory of this source tree.
|
||||
|
||||
cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR)
|
||||
|
||||
project(googletest-download NONE)
|
||||
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(googletest
|
||||
ExternalProject_Add(
|
||||
googletest
|
||||
URL https://github.com/google/googletest/archive/release-1.10.0.zip
|
||||
URL_HASH SHA256=94c634d499558a76fa649edb13721dce6e98fb1e7018dfaeba3cd7a083945e91
|
||||
URL_HASH
|
||||
SHA256=94c634d499558a76fa649edb13721dce6e98fb1e7018dfaeba3cd7a083945e91
|
||||
SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest"
|
||||
BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest"
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND ""
|
||||
INSTALL_COMMAND ""
|
||||
TEST_COMMAND ""
|
||||
)
|
||||
TEST_COMMAND "")
|
||||
|
||||
@ -1,15 +1,15 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates.
|
||||
# All rights reserved.
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the BSD-style license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
# This source code is licensed under the BSD-style license found in the LICENSE
|
||||
# file in the root directory of this source tree.
|
||||
|
||||
cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR)
|
||||
|
||||
project(psimd-download NONE)
|
||||
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(psimd
|
||||
ExternalProject_Add(
|
||||
psimd
|
||||
GIT_REPOSITORY https://github.com/Maratyszcza/psimd.git
|
||||
GIT_TAG master
|
||||
SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/psimd"
|
||||
@ -17,5 +17,4 @@ ExternalProject_Add(psimd
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND ""
|
||||
INSTALL_COMMAND ""
|
||||
TEST_COMMAND ""
|
||||
)
|
||||
TEST_COMMAND "")
|
||||
|
||||
@ -1,15 +1,15 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates.
|
||||
# All rights reserved.
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the BSD-style license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
# This source code is licensed under the BSD-style license found in the LICENSE
|
||||
# file in the root directory of this source tree.
|
||||
|
||||
cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR)
|
||||
|
||||
project(pthreadpool-download NONE)
|
||||
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(pthreadpool
|
||||
ExternalProject_Add(
|
||||
pthreadpool
|
||||
GIT_REPOSITORY https://github.com/Maratyszcza/pthreadpool.git
|
||||
GIT_TAG master
|
||||
SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/pthreadpool"
|
||||
@ -17,5 +17,4 @@ ExternalProject_Add(pthreadpool
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND ""
|
||||
INSTALL_COMMAND ""
|
||||
TEST_COMMAND ""
|
||||
)
|
||||
TEST_COMMAND "")
|
||||
|
||||
@ -1,63 +1,92 @@
|
||||
# generate a list of kernels, but not actually emit files at config stage
|
||||
execute_process(
|
||||
COMMAND python3 ${CMAKE_SOURCE_DIR}/third_party/composable_kernel/example/ck_tile/01_fmha/generate.py
|
||||
--api fwd --receipt 600 --list_blobs ${CMAKE_CURRENT_LIST_DIR}/fwd_blob_list.txt
|
||||
RESULT_VARIABLE ret
|
||||
)
|
||||
COMMAND
|
||||
python3
|
||||
${CMAKE_SOURCE_DIR}/third_party/composable_kernel/example/ck_tile/01_fmha/generate.py
|
||||
--api fwd --receipt 600 --list_blobs
|
||||
${CMAKE_CURRENT_LIST_DIR}/fwd_blob_list.txt
|
||||
RESULT_VARIABLE ret)
|
||||
|
||||
if(ret AND NOT ret EQUAL 0)
|
||||
message( FATAL_ERROR "CK Tile FMHA FAILED to generate a list of FWD kernels via Python.")
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"CK Tile FMHA FAILED to generate a list of FWD kernels via Python.")
|
||||
endif()
|
||||
|
||||
execute_process(
|
||||
COMMAND python3 ${CMAKE_SOURCE_DIR}/third_party/composable_kernel/example/ck_tile/01_fmha/generate.py
|
||||
--api bwd --receipt 600 --list_blobs ${CMAKE_CURRENT_LIST_DIR}/bwd_blob_list.txt
|
||||
RESULT_VARIABLE ret
|
||||
)
|
||||
COMMAND
|
||||
python3
|
||||
${CMAKE_SOURCE_DIR}/third_party/composable_kernel/example/ck_tile/01_fmha/generate.py
|
||||
--api bwd --receipt 600 --list_blobs
|
||||
${CMAKE_CURRENT_LIST_DIR}/bwd_blob_list.txt
|
||||
RESULT_VARIABLE ret)
|
||||
|
||||
if(ret AND NOT ret EQUAL 0)
|
||||
message( FATAL_ERROR "CK Tile FMHA FAILED to generate a list of BWD kernels via Python.")
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"CK Tile FMHA FAILED to generate a list of BWD kernels via Python.")
|
||||
endif()
|
||||
|
||||
# Generate the files for both fwd and bwd
|
||||
execute_process(COMMAND python3 ${CMAKE_SOURCE_DIR}/third_party/composable_kernel/example/ck_tile/01_fmha/generate.py --api fwd --receipt 600 --output_dir ${CMAKE_CURRENT_LIST_DIR}
|
||||
)
|
||||
execute_process(
|
||||
COMMAND
|
||||
python3
|
||||
${CMAKE_SOURCE_DIR}/third_party/composable_kernel/example/ck_tile/01_fmha/generate.py
|
||||
--api fwd --receipt 600 --output_dir ${CMAKE_CURRENT_LIST_DIR})
|
||||
|
||||
if(ret AND NOT ret EQUAL 0)
|
||||
message( FATAL_ERROR "CK Tile FMHA FAILED to generate FWD kernels.")
|
||||
message(FATAL_ERROR "CK Tile FMHA FAILED to generate FWD kernels.")
|
||||
endif()
|
||||
|
||||
execute_process(COMMAND python3 ${CMAKE_SOURCE_DIR}/third_party/composable_kernel/example/ck_tile/01_fmha/generate.py --api bwd --receipt 600 --output_dir ${CMAKE_CURRENT_LIST_DIR}
|
||||
RESULT_VARIABLE ret
|
||||
)
|
||||
execute_process(
|
||||
COMMAND
|
||||
python3
|
||||
${CMAKE_SOURCE_DIR}/third_party/composable_kernel/example/ck_tile/01_fmha/generate.py
|
||||
--api bwd --receipt 600 --output_dir ${CMAKE_CURRENT_LIST_DIR}
|
||||
RESULT_VARIABLE ret)
|
||||
|
||||
if(ret AND NOT ret EQUAL 0)
|
||||
message( FATAL_ERROR "CK Tile FMHA FAILED to generate BWD kernels.")
|
||||
message(FATAL_ERROR "CK Tile FMHA FAILED to generate BWD kernels.")
|
||||
endif()
|
||||
|
||||
# Change make_kernel to make_kernel_pt for fwd
|
||||
execute_process(
|
||||
COMMAND bash -c "${CMAKE_CURRENT_LIST_DIR}/add_make_kernel_pt.sh ${CMAKE_CURRENT_LIST_DIR}/fwd_blob_list.txt"
|
||||
COMMAND
|
||||
bash -c
|
||||
"${CMAKE_CURRENT_LIST_DIR}/add_make_kernel_pt.sh ${CMAKE_CURRENT_LIST_DIR}/fwd_blob_list.txt"
|
||||
RESULT_VARIABLE ret)
|
||||
|
||||
if(ret AND NOT ret EQUAL 0)
|
||||
message( FATAL_ERROR "CK Tile FMHA FAILED to change make_kernel to make_kernel_pt for the fwd pass")
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"CK Tile FMHA FAILED to change make_kernel to make_kernel_pt for the fwd pass"
|
||||
)
|
||||
endif()
|
||||
|
||||
# Change make_kernel to make_kernel_pt for bwd
|
||||
execute_process(
|
||||
COMMAND bash -c "${CMAKE_CURRENT_LIST_DIR}/add_make_kernel_pt.sh ${CMAKE_CURRENT_LIST_DIR}/bwd_blob_list.txt"
|
||||
COMMAND
|
||||
bash -c
|
||||
"${CMAKE_CURRENT_LIST_DIR}/add_make_kernel_pt.sh ${CMAKE_CURRENT_LIST_DIR}/bwd_blob_list.txt"
|
||||
RESULT_VARIABLE ret)
|
||||
|
||||
if(ret AND NOT ret EQUAL 0)
|
||||
message( FATAL_ERROR "CK Tile FMHA FAILED to change make_kernel to make_kernel_pt for the bwd pass")
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"CK Tile FMHA FAILED to change make_kernel to make_kernel_pt for the bwd pass"
|
||||
)
|
||||
endif()
|
||||
|
||||
# Change file extensions to .hip
|
||||
execute_process(COMMAND bash -c "for file in ${CMAKE_CURRENT_LIST_DIR}/*.cpp; do mv -- \"$file\" \"\${file%.cpp}.hip\"; done"
|
||||
RESULT_VARIABLE ret
|
||||
)
|
||||
execute_process(
|
||||
COMMAND
|
||||
bash -c
|
||||
"for file in ${CMAKE_CURRENT_LIST_DIR}/*.cpp; do mv -- \"$file\" \"\${file%.cpp}.hip\"; done"
|
||||
RESULT_VARIABLE ret)
|
||||
|
||||
if(ret AND NOT ret EQUAL 0)
|
||||
message( FATAL_ERROR "CK Tile FMHA FAILED to change the generated instances extensions from .cpp to .hpp")
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"CK Tile FMHA FAILED to change the generated instances extensions from .cpp to .hpp"
|
||||
)
|
||||
endif()
|
||||
|
||||
@ -2,19 +2,26 @@ include(CMakePrintHelpers)
|
||||
|
||||
# Generate AITER/CK Asm code
|
||||
execute_process(
|
||||
COMMAND python3 ${CMAKE_SOURCE_DIR}/third_party/aiter/csrc/py_itfs_cu/fmha_v3_bwd_kernel_generate.py --receipt 1 --output_dir ${CMAKE_CURRENT_LIST_DIR}
|
||||
RESULT_VARIABLE ret
|
||||
)
|
||||
COMMAND
|
||||
python3
|
||||
${CMAKE_SOURCE_DIR}/third_party/aiter/csrc/py_itfs_cu/fmha_v3_bwd_kernel_generate.py
|
||||
--receipt 1 --output_dir ${CMAKE_CURRENT_LIST_DIR}
|
||||
RESULT_VARIABLE ret)
|
||||
|
||||
if(ret AND NOT ret EQUAL 0)
|
||||
message( FATAL_ERROR "Failed to generate FAv3 CK Kernels")
|
||||
message(FATAL_ERROR "Failed to generate FAv3 CK Kernels")
|
||||
endif()
|
||||
|
||||
execute_process(
|
||||
COMMAND python3 ${CMAKE_SOURCE_DIR}/third_party/aiter/csrc/cpp_itfs/mha_bwd_generate.py --receipt 3 --output_dir ${CMAKE_CURRENT_LIST_DIR}
|
||||
RESULT_VARIABLE ret
|
||||
)
|
||||
|
||||
COMMAND
|
||||
python3
|
||||
${CMAKE_SOURCE_DIR}/third_party/aiter/csrc/cpp_itfs/mha_bwd_generate.py
|
||||
--receipt 3 --output_dir ${CMAKE_CURRENT_LIST_DIR}
|
||||
RESULT_VARIABLE ret)
|
||||
|
||||
# Change file extensions to .hip
|
||||
execute_process(COMMAND bash -c "for file in ${CMAKE_CURRENT_LIST_DIR}/*.cpp; do mv -- \"$file\" \"\${file%.cpp}.hip\"; done")
|
||||
execute_process(
|
||||
COMMAND
|
||||
bash -c
|
||||
"for file in ${CMAKE_CURRENT_LIST_DIR}/*.cpp; do mv -- \"$file\" \"\${file%.cpp}.hip\"; done"
|
||||
)
|
||||
|
||||
@ -3,19 +3,21 @@ if(PYTORCH_NNAPI_STANDALONE)
|
||||
cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
|
||||
project(pytorch_nnapi)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ standard whose features are requested to build this target.")
|
||||
set(CMAKE_CXX_STANDARD
|
||||
14
|
||||
CACHE
|
||||
STRING
|
||||
"The C++ standard whose features are requested to build this target.")
|
||||
find_package(Torch REQUIRED)
|
||||
|
||||
set(NNAPI_SRCS
|
||||
nnapi_bind.cpp
|
||||
nnapi_wrapper.cpp
|
||||
nnapi_model_loader.cpp
|
||||
)
|
||||
set(NNAPI_SRCS nnapi_bind.cpp nnapi_wrapper.cpp nnapi_model_loader.cpp)
|
||||
|
||||
add_library(pytorch_nnapi SHARED ${NNAPI_SRCS})
|
||||
target_link_libraries(pytorch_nnapi torch)
|
||||
else()
|
||||
# Building within the PyTorch tree.
|
||||
file(GLOB ATen_NNAPI_SRCS "*.cpp")
|
||||
set(ATen_NNAPI_SRCS ${ATen_NNAPI_SRCS} PARENT_SCOPE)
|
||||
set(ATen_NNAPI_SRCS
|
||||
${ATen_NNAPI_SRCS}
|
||||
PARENT_SCOPE)
|
||||
endif()
|
||||
|
||||
@ -1,9 +1,16 @@
|
||||
file(GLOB_RECURSE ATen_QUANTIZED_HEADERS "*.h")
|
||||
file(GLOB_RECURSE ATen_QUANTIZED_SRCS "*.cpp")
|
||||
file(GLOB_RECURSE ATen_QUANTIZED_TEST_SRCS "*_test.cpp")
|
||||
EXCLUDE(ATen_QUANTIZED_SRCS "${ATen_QUANTIZED_SRCS}" ${ATen_QUANTIZED_TEST_SRCS})
|
||||
exclude(ATen_QUANTIZED_SRCS "${ATen_QUANTIZED_SRCS}"
|
||||
${ATen_QUANTIZED_TEST_SRCS})
|
||||
|
||||
# Pass to parent
|
||||
set(ATen_QUANTIZED_HEADERS ${ATen_QUANTIZED_HEADERS} PARENT_SCOPE)
|
||||
set(ATen_QUANTIZED_SRCS ${ATen_QUANTIZED_SRCS} PARENT_SCOPE)
|
||||
set(ATen_QUANTIZED_TEST_SRCS ${ATen_QUANTIZED_TEST_SRCS} PARENT_SCOPE)
|
||||
set(ATen_QUANTIZED_HEADERS
|
||||
${ATen_QUANTIZED_HEADERS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_QUANTIZED_SRCS
|
||||
${ATen_QUANTIZED_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_QUANTIZED_TEST_SRCS
|
||||
${ATen_QUANTIZED_TEST_SRCS}
|
||||
PARENT_SCOPE)
|
||||
|
||||
@ -4,7 +4,9 @@ if(MSVC)
|
||||
endif()
|
||||
endif(MSVC)
|
||||
|
||||
list(APPEND ATen_CPU_TEST_SRCS
|
||||
list(
|
||||
APPEND
|
||||
ATen_CPU_TEST_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/Dict_test.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/Dimname_test.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/MaybeOwned_test.cpp
|
||||
@ -49,11 +51,12 @@ list(APPEND ATen_CPU_TEST_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/weakref_test.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/wrapdim_test.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/xla_tensor_test.cpp
|
||||
# Fix this.
|
||||
# ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack_test.cpp
|
||||
)
|
||||
# Fix this. ${CMAKE_CURRENT_SOURCE_DIR}/xnnpack_test.cpp
|
||||
)
|
||||
|
||||
list(APPEND ATen_CUDA_TEST_SRCS
|
||||
list(
|
||||
APPEND
|
||||
ATen_CUDA_TEST_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cuda_allocator_test.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cuda_apply_test.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cuda_atomic_ops_test.cu
|
||||
@ -76,10 +79,12 @@ list(APPEND ATen_CUDA_TEST_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cuda_vectorized_test.cu)
|
||||
if(CAFFE2_USE_CUDNN)
|
||||
list(APPEND ATen_CUDA_TEST_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cuda_cudnn_test.cpp)
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cuda_cudnn_test.cpp)
|
||||
endif()
|
||||
|
||||
list(APPEND ATen_HIP_TEST_SRCS
|
||||
list(
|
||||
APPEND
|
||||
ATen_HIP_TEST_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hip/hip_apply_test.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hip/hip_complex_math_test.hip
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hip/hip_complex_test.hip
|
||||
@ -92,45 +97,62 @@ list(APPEND ATen_HIP_TEST_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hip/hip_packedtensoraccessor_test.hip
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hip/hip_vectorized_test.hip)
|
||||
# TODO: fix and enable these
|
||||
# ${CMAKE_CURRENT_SOURCE_DIR}/hip/hip_tensor_interop_test.cpp
|
||||
# ${CMAKE_CURRENT_SOURCE_DIR}/hip/hip_stream_test.cpp
|
||||
# ${CMAKE_CURRENT_SOURCE_DIR}/hip/hip_tensor_interop_test.cpp
|
||||
# ${CMAKE_CURRENT_SOURCE_DIR}/hip/hip_stream_test.cpp
|
||||
|
||||
list(APPEND ATen_VULKAN_TEST_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/vulkan_api_test.cpp)
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/vulkan_api_test.cpp)
|
||||
|
||||
list(APPEND ATen_MOBILE_TEST_SRCS
|
||||
list(
|
||||
APPEND
|
||||
ATen_MOBILE_TEST_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cpu_caching_allocator_test.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cpu_profiling_allocator_test.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/quantized_test.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/vec_test_all_types.cpp)
|
||||
|
||||
list(APPEND ATen_VEC_TEST_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/vec_test_all_types.cpp
|
||||
)
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/vec_test_all_types.cpp)
|
||||
|
||||
list(APPEND ATen_MPS_TEST_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mps_test_print.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mps_test_allocator.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mps_test_metal_library.cpp)
|
||||
list(APPEND ATen_MPS_TEST_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/mps_test_print.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mps_test_allocator.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mps_test_metal_library.cpp)
|
||||
if(APPLE AND USE_MPS)
|
||||
list(APPEND ATen_MPS_TEST_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mps_test_objc_interface.mm)
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mps_test_objc_interface.mm)
|
||||
endif()
|
||||
|
||||
list(APPEND ATen_XPU_TEST_SRCS
|
||||
list(
|
||||
APPEND
|
||||
ATen_XPU_TEST_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/xpu_caching_host_allocator_test.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/xpu_device_test.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/xpu_event_test.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/xpu_generator_test.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/xpu_reportMemoryUsage_test.cpp
|
||||
)
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/xpu_reportMemoryUsage_test.cpp)
|
||||
|
||||
# ---[ Send the lists to the parent scope.
|
||||
set(ATen_CPU_TEST_SRCS ${ATen_CPU_TEST_SRCS} PARENT_SCOPE)
|
||||
set(ATen_CUDA_TEST_SRCS ${ATen_CUDA_TEST_SRCS} PARENT_SCOPE)
|
||||
set(ATen_HIP_TEST_SRCS ${ATen_HIP_TEST_SRCS} PARENT_SCOPE)
|
||||
set(ATen_VULKAN_TEST_SRCS ${ATen_VULKAN_TEST_SRCS} PARENT_SCOPE)
|
||||
set(ATen_MOBILE_TEST_SRCS ${ATen_MOBILE_TEST_SRCS} PARENT_SCOPE)
|
||||
set(ATen_VEC_TEST_SRCS ${ATen_VEC_TEST_SRCS} PARENT_SCOPE)
|
||||
set(ATen_MPS_TEST_SRCS ${ATen_MPS_TEST_SRCS} PARENT_SCOPE)
|
||||
set(ATen_XPU_TEST_SRCS ${ATen_XPU_TEST_SRCS} PARENT_SCOPE)
|
||||
set(ATen_CPU_TEST_SRCS
|
||||
${ATen_CPU_TEST_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_CUDA_TEST_SRCS
|
||||
${ATen_CUDA_TEST_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_HIP_TEST_SRCS
|
||||
${ATen_HIP_TEST_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_VULKAN_TEST_SRCS
|
||||
${ATen_VULKAN_TEST_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_MOBILE_TEST_SRCS
|
||||
${ATen_MOBILE_TEST_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_VEC_TEST_SRCS
|
||||
${ATen_VEC_TEST_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_MPS_TEST_SRCS
|
||||
${ATen_MPS_TEST_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(ATen_XPU_TEST_SRCS
|
||||
${ATen_XPU_TEST_SRCS}
|
||||
PARENT_SCOPE)
|
||||
|
||||
@ -4,7 +4,7 @@ include_directories(${ATEN_INCLUDE_DIR})
|
||||
|
||||
# C++17
|
||||
if(not MSVC)
|
||||
set(CMAKE_CXX_FLAGS "--std=c++17 ${CMAKE_CXX_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "--std=c++17 ${CMAKE_CXX_FLAGS}")
|
||||
endif()
|
||||
add_executable(main main.cpp)
|
||||
target_link_libraries(main ${ATEN_LIBRARIES})
|
||||
|
||||
@ -1,8 +1,6 @@
|
||||
set(ATen_CUDA_INCLUDE ${ATen_CUDA_INCLUDE}
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}"
|
||||
PARENT_SCOPE)
|
||||
set(ATen_CUDA_INCLUDE
|
||||
${ATen_CUDA_INCLUDE} "${CMAKE_CURRENT_SOURCE_DIR}"
|
||||
PARENT_SCOPE)
|
||||
|
||||
install(FILES
|
||||
THCAtomics.cuh
|
||||
THCDeviceUtils.cuh
|
||||
DESTINATION "${ATEN_INSTALL_INCLUDE_SUBDIR}/THC")
|
||||
install(FILES THCAtomics.cuh THCDeviceUtils.cuh
|
||||
DESTINATION "${ATEN_INSTALL_INCLUDE_SUBDIR}/THC")
|
||||
|
||||
@ -1,10 +1,20 @@
|
||||
list(APPEND STATIC_RUNTIME_BENCHMARK_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/deep_wide_pt.cc)
|
||||
list(APPEND STATIC_RUNTIME_BENCHMARK_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/deep_wide_pt_bench.cc)
|
||||
set(STATIC_RUNTIME_BENCHMARK_SRCS ${STATIC_RUNTIME_BENCHMARK_SRCS} PARENT_SCOPE)
|
||||
list(APPEND STATIC_RUNTIME_BENCHMARK_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/deep_wide_pt.cc)
|
||||
list(APPEND STATIC_RUNTIME_BENCHMARK_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/deep_wide_pt_bench.cc)
|
||||
set(STATIC_RUNTIME_BENCHMARK_SRCS
|
||||
${STATIC_RUNTIME_BENCHMARK_SRCS}
|
||||
PARENT_SCOPE)
|
||||
|
||||
list(APPEND STATIC_RUNTIME_TEST_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/deep_wide_pt.cc)
|
||||
list(APPEND STATIC_RUNTIME_TEST_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/deep_wide_pt.cc)
|
||||
list(APPEND STATIC_RUNTIME_TEST_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/test_utils.cc)
|
||||
list(APPEND STATIC_RUNTIME_TEST_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/test_static_runtime.cc)
|
||||
list(APPEND STATIC_RUNTIME_TEST_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/test_static_module.cc)
|
||||
list(APPEND STATIC_RUNTIME_TEST_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/test_generated_ops.cc)
|
||||
set(STATIC_RUNTIME_TEST_SRCS ${STATIC_RUNTIME_TEST_SRCS} PARENT_SCOPE)
|
||||
list(APPEND STATIC_RUNTIME_TEST_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_static_runtime.cc)
|
||||
list(APPEND STATIC_RUNTIME_TEST_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_static_module.cc)
|
||||
list(APPEND STATIC_RUNTIME_TEST_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_generated_ops.cc)
|
||||
set(STATIC_RUNTIME_TEST_SRCS
|
||||
${STATIC_RUNTIME_TEST_SRCS}
|
||||
PARENT_SCOPE)
|
||||
|
||||
@ -8,12 +8,12 @@ if(INTERN_BUILD_MOBILE)
|
||||
endif()
|
||||
|
||||
caffe2_binary_target("parallel_info.cc")
|
||||
target_include_directories(parallel_info PUBLIC
|
||||
${CMAKE_BINARY_DIR}/aten/src) # provides "ATen/TypeExtendedInterface.h" to ATen.h
|
||||
target_include_directories(parallel_info PUBLIC ${CMAKE_BINARY_DIR}/aten/src
|
||||
)# provides "ATen/TypeExtendedInterface.h" to ATen.h
|
||||
|
||||
caffe2_binary_target("record_function_benchmark.cc")
|
||||
target_include_directories(record_function_benchmark PUBLIC
|
||||
${CMAKE_BINARY_DIR}/aten/src)
|
||||
target_include_directories(record_function_benchmark
|
||||
PUBLIC ${CMAKE_BINARY_DIR}/aten/src)
|
||||
|
||||
caffe2_binary_target("speed_benchmark_torch.cc")
|
||||
caffe2_binary_target("compare_models_torch.cc")
|
||||
|
||||
@ -1,55 +1,65 @@
|
||||
cmake_minimum_required(VERSION 3.27 FATAL_ERROR)
|
||||
project(c10 CXX)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.")
|
||||
set(CMAKE_CXX_STANDARD
|
||||
17
|
||||
CACHE STRING
|
||||
"The C++ standard whose features are requested to build this target.")
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
# Main build file for the C10 library.
|
||||
#
|
||||
# Note that the C10 library should maintain minimal dependencies - especially,
|
||||
# it should not depend on any library that is implementation specific or
|
||||
# backend specific. It should in particular NOT be dependent on any generated
|
||||
# protobuf header files, because protobuf header files will transitively force
|
||||
# one to link against a specific protobuf version.
|
||||
# it should not depend on any library that is implementation specific or backend
|
||||
# specific. It should in particular NOT be dependent on any generated protobuf
|
||||
# header files, because protobuf header files will transitively force one to
|
||||
# link against a specific protobuf version.
|
||||
|
||||
if(BUILD_LIBTORCHLESS)
|
||||
find_library(C10_LIB c10 PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
|
||||
find_library(
|
||||
C10_LIB c10
|
||||
PATHS $ENV{LIBTORCH_LIB_PATH}
|
||||
NO_DEFAULT_PATH)
|
||||
else()
|
||||
set(C10_LIB c10)
|
||||
endif()
|
||||
|
||||
set(C10_USE_GFLAGS ${USE_GFLAGS}) # also used in torch/headeronly
|
||||
set(C10_USE_GLOG ${USE_GLOG}) # also used in torch/headeronly
|
||||
set(C10_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # also used in torch/headeronly
|
||||
set(C10_USE_NUMA ${USE_NUMA}) # also used in torch/headeronly
|
||||
set(C10_USE_MSVC_STATIC_RUNTIME ${CAFFE2_USE_MSVC_STATIC_RUNTIME}) # also used in torch/headeronly
|
||||
set(C10_USE_ROCM_KERNEL_ASSERT ${USE_ROCM_KERNEL_ASSERT}) # also used in torch/headeronly
|
||||
set(C10_USE_GFLAGS ${USE_GFLAGS}) # also used in torch/headeronly
|
||||
set(C10_USE_GLOG ${USE_GLOG}) # also used in torch/headeronly
|
||||
set(C10_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # also used in torch/headeronly
|
||||
set(C10_USE_NUMA ${USE_NUMA}) # also used in torch/headeronly
|
||||
set(C10_USE_MSVC_STATIC_RUNTIME ${CAFFE2_USE_MSVC_STATIC_RUNTIME}
|
||||
)# also used in torch/headeronly
|
||||
set(C10_USE_ROCM_KERNEL_ASSERT ${USE_ROCM_KERNEL_ASSERT}) # also used in
|
||||
# torch/headeronly
|
||||
|
||||
# Note: if you want to add ANY dependency to the c10 library, make sure you
|
||||
# check with the core PyTorch developers as the dependency will be
|
||||
# transitively passed on to all libraries dependent on PyTorch.
|
||||
file(GLOB C10_SRCS
|
||||
*.cpp
|
||||
core/*.cpp
|
||||
core/impl/*.cpp
|
||||
mobile/*.cpp
|
||||
macros/*.cpp
|
||||
util/*.cpp
|
||||
)
|
||||
file(GLOB C10_HEADERS
|
||||
*.h
|
||||
core/*.h
|
||||
core/impl/*.h
|
||||
mobile/*.h
|
||||
macros/*.h
|
||||
util/*.h
|
||||
)
|
||||
# Note: if you want to add ANY dependency to the c10 library, make sure you
|
||||
# check with the core PyTorch developers as the dependency will be transitively
|
||||
# passed on to all libraries dependent on PyTorch.
|
||||
file(
|
||||
GLOB
|
||||
C10_SRCS
|
||||
*.cpp
|
||||
core/*.cpp
|
||||
core/impl/*.cpp
|
||||
mobile/*.cpp
|
||||
macros/*.cpp
|
||||
util/*.cpp)
|
||||
file(
|
||||
GLOB
|
||||
C10_HEADERS
|
||||
*.h
|
||||
core/*.h
|
||||
core/impl/*.h
|
||||
mobile/*.h
|
||||
macros/*.h
|
||||
util/*.h)
|
||||
if(NOT BUILD_LIBTORCHLESS)
|
||||
add_library(c10 ${C10_SRCS} ${C10_HEADERS})
|
||||
torch_compile_options(c10)
|
||||
if(HAVE_SOVERSION)
|
||||
set_target_properties(c10 PROPERTIES
|
||||
VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION})
|
||||
set_target_properties(c10 PROPERTIES VERSION ${TORCH_VERSION}
|
||||
SOVERSION ${TORCH_SOVERSION})
|
||||
endif()
|
||||
# If building shared library, set dllimport/dllexport proper.
|
||||
target_compile_options(c10 PRIVATE "-DC10_BUILD_MAIN_LIB")
|
||||
@ -58,7 +68,8 @@ if(NOT BUILD_LIBTORCHLESS)
|
||||
target_compile_options(c10 PRIVATE "-fvisibility=hidden")
|
||||
endif()
|
||||
|
||||
option(C10_USE_IWYU "Use include-what-you-use to clean up header inclusion" OFF)
|
||||
option(C10_USE_IWYU "Use include-what-you-use to clean up header inclusion"
|
||||
OFF)
|
||||
if(C10_USE_IWYU)
|
||||
find_program(iwyu NAMES include-what-you-use)
|
||||
if(iwyu)
|
||||
@ -71,8 +82,7 @@ if(NOT BUILD_LIBTORCHLESS)
|
||||
"-Xiwyu"
|
||||
"--prefix_header_includes=keep"
|
||||
"-Xiwyu"
|
||||
"--mapping_file=${CMAKE_CURRENT_LIST_DIR}/../tools/iwyu/all.imp"
|
||||
)
|
||||
"--mapping_file=${CMAKE_CURRENT_LIST_DIR}/../tools/iwyu/all.imp")
|
||||
set_property(TARGET c10 PROPERTY CXX_INCLUDE_WHAT_YOU_USE ${iwyu_cmd})
|
||||
endif()
|
||||
endif()
|
||||
@ -106,7 +116,8 @@ if(NOT BUILD_LIBTORCHLESS)
|
||||
message(STATUS "don't use NUMA")
|
||||
endif()
|
||||
|
||||
if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "s390x" AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64le")
|
||||
if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "s390x" AND NOT CMAKE_SYSTEM_PROCESSOR
|
||||
MATCHES "ppc64le")
|
||||
target_link_libraries(c10 PRIVATE cpuinfo)
|
||||
endif()
|
||||
|
||||
@ -134,10 +145,9 @@ if(NOT BUILD_LIBTORCHLESS)
|
||||
endif()
|
||||
|
||||
target_include_directories(
|
||||
c10 PUBLIC
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../>
|
||||
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
|
||||
$<INSTALL_INTERFACE:include>)
|
||||
c10
|
||||
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../>
|
||||
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}> $<INSTALL_INTERFACE:include>)
|
||||
endif()
|
||||
|
||||
add_subdirectory(test)
|
||||
@ -148,8 +158,7 @@ if(USE_CUDA)
|
||||
endif()
|
||||
|
||||
if(USE_ROCM)
|
||||
# NB: This directory is generated by the HIPIFY script; it's
|
||||
# not checked in
|
||||
# NB: This directory is generated by the HIPIFY script; it's not checked in
|
||||
add_subdirectory(hip)
|
||||
endif()
|
||||
|
||||
@ -158,17 +167,25 @@ if(USE_XPU)
|
||||
endif()
|
||||
|
||||
if(NOT BUILD_LIBTORCHLESS)
|
||||
# ---[ Installation
|
||||
# Note: for now, we will put all export path into one single Caffe2Targets group
|
||||
# to deal with the cmake deployment need. Inside the Caffe2Targets set, the
|
||||
# individual libraries like libc10.so and libcaffe2.so are still self-contained.
|
||||
install(TARGETS c10 EXPORT Caffe2Targets DESTINATION lib)
|
||||
# ---[ Installation Note: for now, we will put all export path into one single
|
||||
# Caffe2Targets group to deal with the cmake deployment need. Inside the
|
||||
# Caffe2Targets set, the individual libraries like libc10.so and libcaffe2.so
|
||||
# are still self-contained.
|
||||
install(
|
||||
TARGETS c10
|
||||
EXPORT Caffe2Targets
|
||||
DESTINATION lib)
|
||||
endif()
|
||||
|
||||
install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
|
||||
DESTINATION include
|
||||
FILES_MATCHING PATTERN "*.h")
|
||||
install(
|
||||
DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
|
||||
DESTINATION include
|
||||
FILES_MATCHING
|
||||
PATTERN "*.h")
|
||||
|
||||
if(MSVC AND C10_BUILD_SHARED_LIBS)
|
||||
install(FILES $<TARGET_PDB_FILE:c10> DESTINATION lib OPTIONAL)
|
||||
install(
|
||||
FILES $<TARGET_PDB_FILE:c10>
|
||||
DESTINATION lib
|
||||
OPTIONAL)
|
||||
endif()
|
||||
|
||||
@ -8,7 +8,10 @@ if(BUILD_TEST)
|
||||
add_executable(${bench_name} "${bench_src}")
|
||||
target_link_libraries(${bench_name} ${C10_LIB} benchmark)
|
||||
if(INSTALL_TEST)
|
||||
set_target_properties(${bench_name} PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
|
||||
set_target_properties(
|
||||
${bench_name}
|
||||
PROPERTIES INSTALL_RPATH
|
||||
"${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
|
||||
install(TARGETS ${bench_name} DESTINATION test)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
@ -8,23 +8,24 @@ include(../../cmake/public/cuda.cmake)
|
||||
# ---[ Configure macro file.
|
||||
set(C10_CUDA_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # used in cmake_macros.h.in
|
||||
# Probably have to do this :(
|
||||
configure_file(
|
||||
${CMAKE_CURRENT_LIST_DIR}/impl/cuda_cmake_macros.h.in
|
||||
${CMAKE_BINARY_DIR}/c10/cuda/impl/cuda_cmake_macros.h)
|
||||
configure_file(${CMAKE_CURRENT_LIST_DIR}/impl/cuda_cmake_macros.h.in
|
||||
${CMAKE_BINARY_DIR}/c10/cuda/impl/cuda_cmake_macros.h)
|
||||
|
||||
if(BUILD_LIBTORCHLESS)
|
||||
find_library(C10_CUDA_LIB c10_cuda PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
|
||||
find_library(
|
||||
C10_CUDA_LIB c10_cuda
|
||||
PATHS $ENV{LIBTORCH_LIB_PATH}
|
||||
NO_DEFAULT_PATH)
|
||||
else()
|
||||
set(C10_CUDA_LIB c10_cuda)
|
||||
endif()
|
||||
|
||||
# Note: if you want to add ANY dependency to the c10 library, make sure you
|
||||
# check with the core PyTorch developers as the dependency will be
|
||||
# transitively passed on to all libraries dependent on PyTorch.
|
||||
# check with the core PyTorch developers as the dependency will be transitively
|
||||
# passed on to all libraries dependent on PyTorch.
|
||||
|
||||
# Note: if you add a new source file/header, you will need to update
|
||||
# torch/utils/hipify/cuda_to_hip_mappings.py for new files
|
||||
# and headers you add
|
||||
# torch/utils/hipify/cuda_to_hip_mappings.py for new files and headers you add
|
||||
set(C10_CUDA_SRCS
|
||||
CUDAAllocatorConfig.cpp
|
||||
CUDACachingAllocator.cpp
|
||||
@ -36,8 +37,7 @@ set(C10_CUDA_SRCS
|
||||
CUDAStream.cpp
|
||||
impl/CUDAGuardImpl.cpp
|
||||
impl/CUDATest.cpp
|
||||
driver_api.cpp
|
||||
)
|
||||
driver_api.cpp)
|
||||
set(C10_CUDA_HEADERS
|
||||
CUDAAllocatorConfig.h
|
||||
CUDACachingAllocator.h
|
||||
@ -50,8 +50,7 @@ set(C10_CUDA_HEADERS
|
||||
CUDAMiscFunctions.h
|
||||
CUDAStream.h
|
||||
impl/CUDAGuardImpl.h
|
||||
impl/CUDATest.h
|
||||
)
|
||||
impl/CUDATest.h)
|
||||
set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE)
|
||||
|
||||
if(NOT BUILD_LIBTORCHLESS)
|
||||
@ -69,33 +68,39 @@ if(NOT BUILD_LIBTORCHLESS)
|
||||
target_link_libraries(c10_cuda PUBLIC ${C10_LIB} torch::cudart)
|
||||
|
||||
if(NOT WIN32)
|
||||
target_link_libraries(c10_cuda PRIVATE dl)
|
||||
target_compile_options(c10_cuda PRIVATE "-DPYTORCH_C10_DRIVER_API_SUPPORTED")
|
||||
target_link_libraries(c10_cuda PRIVATE dl)
|
||||
target_compile_options(c10_cuda
|
||||
PRIVATE "-DPYTORCH_C10_DRIVER_API_SUPPORTED")
|
||||
endif()
|
||||
|
||||
target_include_directories(
|
||||
c10_cuda PUBLIC
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../..>
|
||||
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
|
||||
$<INSTALL_INTERFACE:include>)
|
||||
c10_cuda
|
||||
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../..>
|
||||
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}> $<INSTALL_INTERFACE:include>)
|
||||
|
||||
# ---[ Installation
|
||||
# Note: for now, we will put all export path into one single Caffe2Targets group
|
||||
# to deal with the cmake deployment need. Inside the Caffe2Targets set, the
|
||||
# individual libraries like libc10.so and libcaffe2.so are still self-contained.
|
||||
install(TARGETS c10_cuda EXPORT Caffe2Targets DESTINATION lib)
|
||||
# ---[ Installation Note: for now, we will put all export path into one single
|
||||
# Caffe2Targets group to deal with the cmake deployment need. Inside the
|
||||
# Caffe2Targets set, the individual libraries like libc10.so and libcaffe2.so
|
||||
# are still self-contained.
|
||||
install(
|
||||
TARGETS c10_cuda
|
||||
EXPORT Caffe2Targets
|
||||
DESTINATION lib)
|
||||
|
||||
endif()
|
||||
|
||||
add_subdirectory(test)
|
||||
|
||||
foreach(file ${C10_CUDA_HEADERS})
|
||||
get_filename_component( dir ${file} DIRECTORY )
|
||||
install( FILES ${file} DESTINATION include/c10/cuda/${dir} )
|
||||
get_filename_component(dir ${file} DIRECTORY)
|
||||
install(FILES ${file} DESTINATION include/c10/cuda/${dir})
|
||||
endforeach()
|
||||
install(FILES ${CMAKE_BINARY_DIR}/c10/cuda/impl/cuda_cmake_macros.h
|
||||
DESTINATION include/c10/cuda/impl)
|
||||
DESTINATION include/c10/cuda/impl)
|
||||
|
||||
if(MSVC AND C10_CUDA_BUILD_SHARED_LIBS)
|
||||
install(FILES $<TARGET_PDB_FILE:c10_cuda> DESTINATION lib OPTIONAL)
|
||||
install(
|
||||
FILES $<TARGET_PDB_FILE:c10_cuda>
|
||||
DESTINATION lib
|
||||
OPTIONAL)
|
||||
endif()
|
||||
|
||||
@ -8,27 +8,33 @@ set(C10_CUDA_ALL_TEST_FILES
|
||||
impl/CUDAAssertionsTest_multiple_writes_from_blocks_and_threads.cu
|
||||
impl/CUDAAssertionsTest_multiple_writes_from_multiple_blocks.cu
|
||||
impl/CUDAAssertionsTest_multiple_writes_from_same_block.cu
|
||||
impl/CUDATest.cpp
|
||||
)
|
||||
impl/CUDATest.cpp)
|
||||
if(BUILD_TEST)
|
||||
foreach(test_src ${C10_CUDA_ALL_TEST_FILES})
|
||||
get_filename_component(test_file_name ${test_src} NAME_WE)
|
||||
set(test_name "c10_cuda_${test_file_name}")
|
||||
if(WIN32 AND test_src MATCHES "^.*\.hip$")
|
||||
set_source_files_properties(${test_src} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
|
||||
set_source_files_properties(${test_src}
|
||||
PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
|
||||
hip_add_executable(${test_name} "${test_src}")
|
||||
list(JOIN PYTORCH_ROCM_ARCH " " ROCM_PROPERTY_ARCH_LIST)
|
||||
set_target_properties(${test_name} PROPERTIES LINKER_LANGUAGE CXX HIP_ARCHITECTURES ${ROCM_PROPERTY_ARCH_LIST})
|
||||
set_target_properties(
|
||||
${test_name} PROPERTIES LINKER_LANGUAGE CXX HIP_ARCHITECTURES
|
||||
${ROCM_PROPERTY_ARCH_LIST})
|
||||
else()
|
||||
add_executable(${test_name} "${test_src}")
|
||||
endif()
|
||||
if(test_src MATCHES "^.*\.hip$")
|
||||
set_target_properties(${test_name} PROPERTIES LINKER_LANGUAGE CXX)
|
||||
endif()
|
||||
target_link_libraries(${test_name} ${C10_CUDA_LIB} ${C10_LIB} gmock gtest gtest_main)
|
||||
target_link_libraries(${test_name} ${C10_CUDA_LIB} ${C10_LIB} gmock gtest
|
||||
gtest_main)
|
||||
add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
|
||||
if(INSTALL_TEST)
|
||||
set_target_properties(${test_name} PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
|
||||
set_target_properties(
|
||||
${test_name}
|
||||
PROPERTIES INSTALL_RPATH
|
||||
"${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
|
||||
install(TARGETS ${test_name} DESTINATION test)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
@ -1,33 +1,31 @@
|
||||
# Build file for the C10 HIP. This directory doesn't actually contain
|
||||
# any files; these files are copied over by the HIPIFY script in tools/amd_build
|
||||
# Build file for the C10 HIP. This directory doesn't actually contain any
|
||||
# files; these files are copied over by the HIPIFY script in tools/amd_build
|
||||
#
|
||||
# At the moment, this cmake is NOT standalone
|
||||
|
||||
include(../../cmake/public/utils.cmake)
|
||||
|
||||
if(BUILD_LIBTORCHLESS)
|
||||
find_library(C10_HIP_LIB c10_hip PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
|
||||
find_library(
|
||||
C10_HIP_LIB c10_hip
|
||||
PATHS $ENV{LIBTORCH_LIB_PATH}
|
||||
NO_DEFAULT_PATH)
|
||||
endif()
|
||||
|
||||
# ---[ Configure macro file.
|
||||
set(C10_HIP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # used in cmake_macros.h.in
|
||||
configure_file(
|
||||
${CMAKE_CURRENT_LIST_DIR}/impl/hip_cmake_macros.h.in
|
||||
${CMAKE_BINARY_DIR}/c10/hip/impl/hip_cmake_macros.h)
|
||||
configure_file(${CMAKE_CURRENT_LIST_DIR}/impl/hip_cmake_macros.h.in
|
||||
${CMAKE_BINARY_DIR}/c10/hip/impl/hip_cmake_macros.h)
|
||||
|
||||
# NB: All previous cu files are renamed into cc files. This isn't tested at the
|
||||
# moment.
|
||||
file(GLOB C10_HIP_SRCS
|
||||
*.cpp
|
||||
*.cc
|
||||
impl/*.cpp
|
||||
impl/*.cc
|
||||
)
|
||||
file(GLOB C10_HIP_SRCS *.cpp *.cc impl/*.cpp impl/*.cc)
|
||||
|
||||
# Mark the cc files as HIP files, so we call the compiler. (They have to be
|
||||
# suffixed with cc, because the hcc compiler won't accept them otherwise.)
|
||||
file(GLOB __c10_hip_srcs_cpp *.cc impl/*.cc)
|
||||
set_source_files_properties(${__c10_hip_srcs_cpp} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
|
||||
set_source_files_properties(${__c10_hip_srcs_cpp}
|
||||
PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
|
||||
|
||||
file(GLOB_RECURSE C10_HIP_HEADERS *.h)
|
||||
|
||||
@ -38,8 +36,9 @@ if(NOT BUILD_LIBTORCHLESS)
|
||||
# Propagate HIP_CXX_FLAGS that were set from Dependencies.cmake
|
||||
target_compile_options(c10_hip PRIVATE ${HIP_CXX_FLAGS})
|
||||
|
||||
# caffe2_hip adds a bunch of dependencies like rocsparse, but c10/hip is supposed to be
|
||||
# minimal. I'm not sure if we need hip_hcc or not; for now leave it out
|
||||
# caffe2_hip adds a bunch of dependencies like rocsparse, but c10/hip is
|
||||
# supposed to be minimal. I'm not sure if we need hip_hcc or not; for now
|
||||
# leave it out
|
||||
|
||||
# If building shared library, set dllimport/dllexport proper.
|
||||
target_compile_options(c10_hip PRIVATE "-DC10_HIP_BUILD_MAIN_LIB")
|
||||
@ -52,19 +51,23 @@ if(NOT BUILD_LIBTORCHLESS)
|
||||
target_link_libraries(c10_hip PUBLIC ${C10_LIB} hip::amdhip64)
|
||||
|
||||
target_include_directories(
|
||||
c10_hip PUBLIC
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../..>
|
||||
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
|
||||
$<INSTALL_INTERFACE:include>)
|
||||
install(TARGETS c10_hip EXPORT Caffe2Targets DESTINATION lib)
|
||||
c10_hip
|
||||
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../..>
|
||||
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}> $<INSTALL_INTERFACE:include>)
|
||||
install(
|
||||
TARGETS c10_hip
|
||||
EXPORT Caffe2Targets
|
||||
DESTINATION lib)
|
||||
set(C10_HIP_LIB c10_hip)
|
||||
endif()
|
||||
|
||||
add_subdirectory(test)
|
||||
|
||||
# ---[ Installation
|
||||
install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
|
||||
DESTINATION include
|
||||
FILES_MATCHING PATTERN "*.h")
|
||||
install(
|
||||
DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
|
||||
DESTINATION include
|
||||
FILES_MATCHING
|
||||
PATTERN "*.h")
|
||||
install(FILES ${CMAKE_BINARY_DIR}/c10/hip/impl/hip_cmake_macros.h
|
||||
DESTINATION include/c10/hip/impl)
|
||||
DESTINATION include/c10/hip/impl)
|
||||
|
||||
@ -12,7 +12,10 @@ if(BUILD_TEST)
|
||||
target_link_libraries(${test_name} ${C10_LIB} gmock gtest gtest_main)
|
||||
add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
|
||||
if(INSTALL_TEST)
|
||||
set_target_properties(${test_name} PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
|
||||
set_target_properties(
|
||||
${test_name}
|
||||
PROPERTIES INSTALL_RPATH
|
||||
"${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
|
||||
install(TARGETS ${test_name} DESTINATION test)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
@ -5,21 +5,20 @@
|
||||
include(../../cmake/public/xpu.cmake)
|
||||
|
||||
if(NOT BUILD_LIBTORCHLESS)
|
||||
find_library(C10_XPU_LIB c10_xpu PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
|
||||
find_library(
|
||||
C10_XPU_LIB c10_xpu
|
||||
PATHS $ENV{LIBTORCH_LIB_PATH}
|
||||
NO_DEFAULT_PATH)
|
||||
endif()
|
||||
|
||||
# ---[ Configure macro file.
|
||||
set(C10_XPU_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # used in xpu_cmake_macros.h.in
|
||||
configure_file(
|
||||
${CMAKE_CURRENT_LIST_DIR}/impl/xpu_cmake_macros.h.in
|
||||
${CMAKE_BINARY_DIR}/c10/xpu/impl/xpu_cmake_macros.h)
|
||||
set(C10_XPU_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # used in
|
||||
# xpu_cmake_macros.h.in
|
||||
configure_file(${CMAKE_CURRENT_LIST_DIR}/impl/xpu_cmake_macros.h.in
|
||||
${CMAKE_BINARY_DIR}/c10/xpu/impl/xpu_cmake_macros.h)
|
||||
|
||||
set(C10_XPU_SRCS
|
||||
XPUCachingAllocator.cpp
|
||||
XPUFunctions.cpp
|
||||
XPUStream.cpp
|
||||
impl/XPUGuardImpl.cpp
|
||||
)
|
||||
set(C10_XPU_SRCS XPUCachingAllocator.cpp XPUFunctions.cpp XPUStream.cpp
|
||||
impl/XPUGuardImpl.cpp)
|
||||
set(C10_XPU_HEADERS
|
||||
XPUCachingAllocator.h
|
||||
XPUDeviceProp.h
|
||||
@ -27,8 +26,7 @@ set(C10_XPU_HEADERS
|
||||
XPUFunctions.h
|
||||
XPUMacros.h
|
||||
XPUStream.h
|
||||
impl/XPUGuardImpl.h
|
||||
)
|
||||
impl/XPUGuardImpl.h)
|
||||
if(NOT BUILD_LIBTORCHLESS)
|
||||
add_library(c10_xpu ${C10_XPU_SRCS} ${C10_XPU_HEADERS})
|
||||
torch_compile_options(c10_xpu)
|
||||
@ -41,12 +39,13 @@ if(NOT BUILD_LIBTORCHLESS)
|
||||
# ---[ Dependency of c10_xpu
|
||||
target_link_libraries(c10_xpu PUBLIC c10 torch::xpurt)
|
||||
target_include_directories(
|
||||
c10_xpu PUBLIC
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../..>
|
||||
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
|
||||
$<INSTALL_INTERFACE:include>
|
||||
)
|
||||
install(TARGETS c10_xpu EXPORT Caffe2Targets DESTINATION lib)
|
||||
c10_xpu
|
||||
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../..>
|
||||
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}> $<INSTALL_INTERFACE:include>)
|
||||
install(
|
||||
TARGETS c10_xpu
|
||||
EXPORT Caffe2Targets
|
||||
DESTINATION lib)
|
||||
set(C10_XPU_LIB c10_xpu)
|
||||
add_subdirectory(test)
|
||||
endif()
|
||||
@ -58,8 +57,11 @@ foreach(file ${C10_XPU_HEADERS})
|
||||
install(FILES ${file} DESTINATION include/c10/xpu/${dir})
|
||||
endforeach()
|
||||
install(FILES ${CMAKE_BINARY_DIR}/c10/xpu/impl/xpu_cmake_macros.h
|
||||
DESTINATION include/c10/xpu/impl)
|
||||
DESTINATION include/c10/xpu/impl)
|
||||
|
||||
if(MSVC AND C10_XPU_BUILD_SHARED_LIBS)
|
||||
install(FILES $<TARGET_PDB_FILE:c10_xpu> DESTINATION lib OPTIONAL)
|
||||
install(
|
||||
FILES $<TARGET_PDB_FILE:c10_xpu>
|
||||
DESTINATION lib
|
||||
OPTIONAL)
|
||||
endif()
|
||||
|
||||
@ -1,11 +1,8 @@
|
||||
# ---[ Test binaries.
|
||||
|
||||
set(C10_XPU_ALL_TEST_FILES
|
||||
impl/XPUCachingAllocatorTest.cpp
|
||||
impl/XPUDeviceTest.cpp
|
||||
impl/XPUGuardTest.cpp
|
||||
impl/XPUStreamTest.cpp
|
||||
)
|
||||
impl/XPUCachingAllocatorTest.cpp impl/XPUDeviceTest.cpp
|
||||
impl/XPUGuardTest.cpp impl/XPUStreamTest.cpp)
|
||||
if(BUILD_TEST)
|
||||
foreach(test_src ${C10_XPU_ALL_TEST_FILES})
|
||||
get_filename_component(test_file_name ${test_src} NAME_WE)
|
||||
@ -14,7 +11,10 @@ if(BUILD_TEST)
|
||||
target_link_libraries(${test_name} ${C10_XPU_LIB} gmock gtest gtest_main)
|
||||
add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
|
||||
if(INSTALL_TEST)
|
||||
set_target_properties(${test_name} PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
|
||||
set_target_properties(
|
||||
${test_name}
|
||||
PROPERTIES INSTALL_RPATH
|
||||
"${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
|
||||
install(TARGETS ${test_name} DESTINATION test)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,4 @@
|
||||
list(APPEND Caffe2_CPU_SRCS
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/common.cc"
|
||||
)
|
||||
set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} PARENT_SCOPE)
|
||||
list(APPEND Caffe2_CPU_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/common.cc")
|
||||
set(Caffe2_CPU_SRCS
|
||||
${Caffe2_CPU_SRCS}
|
||||
PARENT_SCOPE)
|
||||
|
||||
@ -1,8 +1,9 @@
|
||||
if(INTERN_BUILD_MOBILE)
|
||||
list(APPEND Caffe2_CPU_SRCS
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/embedding_lookup_idx.cc"
|
||||
)
|
||||
set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} PARENT_SCOPE)
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/embedding_lookup_idx.cc")
|
||||
set(Caffe2_CPU_SRCS
|
||||
${Caffe2_CPU_SRCS}
|
||||
PARENT_SCOPE)
|
||||
return()
|
||||
endif()
|
||||
|
||||
@ -28,21 +29,22 @@ if(CXX_AVX2_FOUND)
|
||||
target_link_libraries(Caffe2_perfkernels_avx2 PRIVATE c10)
|
||||
|
||||
if(MSVC AND NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
||||
target_compile_options(Caffe2_perfkernels_avx2
|
||||
PRIVATE "/arch:AVX2"
|
||||
PRIVATE "/D__FMA__"
|
||||
PRIVATE "/D__F16C__")
|
||||
target_compile_options(
|
||||
Caffe2_perfkernels_avx2
|
||||
PRIVATE "/arch:AVX2"
|
||||
PRIVATE "/D__FMA__"
|
||||
PRIVATE "/D__F16C__")
|
||||
else()
|
||||
target_compile_options(Caffe2_perfkernels_avx2
|
||||
PRIVATE "-mavx2"
|
||||
PRIVATE "-mfma"
|
||||
PRIVATE "-mavx"
|
||||
PRIVATE "-mf16c")
|
||||
target_compile_options(
|
||||
Caffe2_perfkernels_avx2
|
||||
PRIVATE "-mavx2"
|
||||
PRIVATE "-mfma"
|
||||
PRIVATE "-mavx"
|
||||
PRIVATE "-mf16c")
|
||||
endif()
|
||||
caffe2_interface_library(
|
||||
Caffe2_perfkernels_avx2 Caffe2_perfkernels_avx2_interface)
|
||||
list(APPEND
|
||||
Caffe2_DEPENDENCY_WHOLE_LINK_LIBS
|
||||
caffe2_interface_library(Caffe2_perfkernels_avx2
|
||||
Caffe2_perfkernels_avx2_interface)
|
||||
list(APPEND Caffe2_DEPENDENCY_WHOLE_LINK_LIBS
|
||||
"Caffe2_perfkernels_avx2_interface")
|
||||
endif()
|
||||
|
||||
@ -52,23 +54,25 @@ if(CXX_SVE_FOUND)
|
||||
add_library(Caffe2_perfkernels_sve STATIC ${sve_srcs})
|
||||
target_link_libraries(Caffe2_perfkernels_sve PRIVATE c10)
|
||||
install(TARGETS Caffe2_perfkernels_sve
|
||||
ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}")
|
||||
ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}")
|
||||
|
||||
target_compile_options(Caffe2_perfkernels_sve PRIVATE "-march=armv8-a+sve")
|
||||
|
||||
caffe2_interface_library(
|
||||
Caffe2_perfkernels_sve Caffe2_perfkernels_sve_interface)
|
||||
list(APPEND
|
||||
Caffe2_DEPENDENCY_WHOLE_LINK_LIBS "Caffe2_perfkernels_sve_interface")
|
||||
caffe2_interface_library(Caffe2_perfkernels_sve
|
||||
Caffe2_perfkernels_sve_interface)
|
||||
list(APPEND Caffe2_DEPENDENCY_WHOLE_LINK_LIBS
|
||||
"Caffe2_perfkernels_sve_interface")
|
||||
endif()
|
||||
|
||||
# TODO(jiayq): currently, we only implement the very base files for the
|
||||
# perfkernels. This is because to implement avx and avx2 files, we actually
|
||||
# need to set up different compilation units and this is a bit more involving
|
||||
# in terms of CMakefile changes. This is a stop-gap solution until we get a
|
||||
# more proper implementation.
|
||||
# perfkernels. This is because to implement avx and avx2 files, we actually need
|
||||
# to set up different compilation units and this is a bit more involving in
|
||||
# terms of CMakefile changes. This is a stop-gap solution until we get a more
|
||||
# proper implementation.
|
||||
|
||||
set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} PARENT_SCOPE)
|
||||
set(Caffe2_CPU_SRCS
|
||||
${Caffe2_CPU_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(Caffe2_DEPENDENCY_WHOLE_LINK_LIBS
|
||||
${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS}
|
||||
PARENT_SCOPE)
|
||||
|
||||
@ -1,7 +1,9 @@
|
||||
file(GLOB tmp *_test.cc)
|
||||
|
||||
set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} ${tmp})
|
||||
list(APPEND Caffe2_CPU_SRCS
|
||||
list(
|
||||
APPEND
|
||||
Caffe2_CPU_SRCS
|
||||
${PROJECT_SOURCE_DIR}/third_party/miniz-3.0.2/miniz.c
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/inline_container.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/istream_adapter.cc
|
||||
@ -10,6 +12,12 @@ list(APPEND Caffe2_CPU_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/read_adapter_interface.cc)
|
||||
list(APPEND Caffe2_CPU_INCLUDE ${PROJECT_SOURCE_DIR}/third_party/miniz-3.0.2)
|
||||
|
||||
set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} PARENT_SCOPE)
|
||||
set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} PARENT_SCOPE)
|
||||
set(Caffe2_CPU_INCLUDE ${Caffe2_CPU_INCLUDE} PARENT_SCOPE)
|
||||
set(Caffe2_CPU_TEST_SRCS
|
||||
${Caffe2_CPU_TEST_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(Caffe2_CPU_SRCS
|
||||
${Caffe2_CPU_SRCS}
|
||||
PARENT_SCOPE)
|
||||
set(Caffe2_CPU_INCLUDE
|
||||
${Caffe2_CPU_INCLUDE}
|
||||
PARENT_SCOPE)
|
||||
|
||||
@ -1,18 +1,14 @@
|
||||
list(APPEND Caffe2_CPU_SRCS
|
||||
utils/string_utils.cc
|
||||
utils/threadpool/ThreadPool.cc
|
||||
)
|
||||
list(APPEND Caffe2_CPU_SRCS utils/string_utils.cc
|
||||
utils/threadpool/ThreadPool.cc)
|
||||
|
||||
if(USE_PTHREADPOOL)
|
||||
list(APPEND Caffe2_CPU_SRCS
|
||||
utils/threadpool/pthreadpool-cpp.cc
|
||||
utils/threadpool/thread_pool_guard.cpp
|
||||
)
|
||||
list(APPEND Caffe2_CPU_SRCS utils/threadpool/pthreadpool-cpp.cc
|
||||
utils/threadpool/thread_pool_guard.cpp)
|
||||
endif()
|
||||
|
||||
if(NOT INTERN_BUILD_MOBILE)
|
||||
list(APPEND Caffe2_CPU_SRCS
|
||||
utils/proto_wrap.cc
|
||||
)
|
||||
list(APPEND Caffe2_CPU_SRCS utils/proto_wrap.cc)
|
||||
endif()
|
||||
set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} PARENT_SCOPE)
|
||||
set(Caffe2_CPU_SRCS
|
||||
${Caffe2_CPU_SRCS}
|
||||
PARENT_SCOPE)
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
|
||||
if(__caffe2_allowlist_included)
|
||||
return()
|
||||
endif()
|
||||
|
||||
@ -1,26 +1,28 @@
|
||||
# Push host architecture when cross-compiling otherwise check would fail
|
||||
# when cross-compiling for arm64 on x86_64
|
||||
# Push host architecture when cross-compiling otherwise check would fail when
|
||||
# cross-compiling for arm64 on x86_64
|
||||
cmake_push_check_state(RESET)
|
||||
if(CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64)$")
|
||||
if(CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_OSX_ARCHITECTURES MATCHES
|
||||
"^(x86_64|arm64)$")
|
||||
list(APPEND CMAKE_REQUIRED_FLAGS "-arch ${CMAKE_HOST_SYSTEM_PROCESSOR}")
|
||||
endif()
|
||||
|
||||
# Set values through env variables if cross compiling
|
||||
if(CMAKE_CROSSCOMPILING)
|
||||
if("$ENV{PYTORCH_BLAS_F2C}" STREQUAL "ON")
|
||||
SET(BLAS_F2C TRUE)
|
||||
set(BLAS_F2C TRUE)
|
||||
else()
|
||||
SET(BLAS_F2C FALSE)
|
||||
set(BLAS_F2C FALSE)
|
||||
endif()
|
||||
|
||||
if("$ENV{PYTORCH_BLAS_USE_CBLAS_DOT}" STREQUAL "ON")
|
||||
SET(BLAS_USE_CBLAS_DOT TRUE)
|
||||
set(BLAS_USE_CBLAS_DOT TRUE)
|
||||
else()
|
||||
SET(BLAS_USE_CBLAS_DOT FALSE)
|
||||
set(BLAS_USE_CBLAS_DOT FALSE)
|
||||
endif()
|
||||
else()
|
||||
SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES})
|
||||
CHECK_C_SOURCE_RUNS("
|
||||
set(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES})
|
||||
check_c_source_runs(
|
||||
"
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
float x[4] = { 1, 2, 3, 4 };
|
||||
@ -32,8 +34,10 @@ int main() {
|
||||
int i;
|
||||
double r = sdot_(&four, x, &one, y, &one);
|
||||
exit((float)r != (float).1234);
|
||||
}" BLAS_F2C_DOUBLE_WORKS )
|
||||
CHECK_C_SOURCE_RUNS("
|
||||
}"
|
||||
BLAS_F2C_DOUBLE_WORKS)
|
||||
check_c_source_runs(
|
||||
"
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
float x[4] = { 1, 2, 3, 4 };
|
||||
@ -45,15 +49,17 @@ int main() {
|
||||
int i;
|
||||
double r = sdot_(&four, x, &one, y, &one);
|
||||
exit((float)r != (float).1234);
|
||||
}" BLAS_F2C_FLOAT_WORKS )
|
||||
}"
|
||||
BLAS_F2C_FLOAT_WORKS)
|
||||
|
||||
if(BLAS_F2C_DOUBLE_WORKS AND NOT BLAS_F2C_FLOAT_WORKS)
|
||||
MESSAGE(STATUS "This BLAS uses the F2C return conventions")
|
||||
SET(BLAS_F2C TRUE)
|
||||
message(STATUS "This BLAS uses the F2C return conventions")
|
||||
set(BLAS_F2C TRUE)
|
||||
else(BLAS_F2C_DOUBLE_WORKS AND NOT BLAS_F2C_FLOAT_WORKS)
|
||||
SET(BLAS_F2C FALSE)
|
||||
set(BLAS_F2C FALSE)
|
||||
endif(BLAS_F2C_DOUBLE_WORKS AND NOT BLAS_F2C_FLOAT_WORKS)
|
||||
CHECK_C_SOURCE_RUNS("
|
||||
check_c_source_runs(
|
||||
"
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
float x[4] = { 1, 2, 3, 4 };
|
||||
@ -63,14 +69,15 @@ int main() {
|
||||
int i;
|
||||
double r = cblas_sdot(4, x, 1, y, 1);
|
||||
exit((float)r != (float).1234);
|
||||
}" BLAS_USE_CBLAS_DOT )
|
||||
}"
|
||||
BLAS_USE_CBLAS_DOT)
|
||||
if(BLAS_USE_CBLAS_DOT)
|
||||
SET(BLAS_USE_CBLAS_DOT TRUE)
|
||||
set(BLAS_USE_CBLAS_DOT TRUE)
|
||||
else(BLAS_USE_CBLAS_DOT)
|
||||
SET(BLAS_USE_CBLAS_DOT FALSE)
|
||||
set(BLAS_USE_CBLAS_DOT FALSE)
|
||||
endif(BLAS_USE_CBLAS_DOT)
|
||||
SET(CMAKE_REQUIRED_LIBRARIES)
|
||||
set(CMAKE_REQUIRED_LIBRARIES)
|
||||
endif(CMAKE_CROSSCOMPILING)
|
||||
MESSAGE(STATUS "BLAS_USE_CBLAS_DOT: ${BLAS_USE_CBLAS_DOT}")
|
||||
MESSAGE(STATUS "BLAS_F2C: ${BLAS_F2C}")
|
||||
message(STATUS "BLAS_USE_CBLAS_DOT: ${BLAS_USE_CBLAS_DOT}")
|
||||
message(STATUS "BLAS_F2C: ${BLAS_F2C}")
|
||||
cmake_pop_check_state()
|
||||
|
||||
@ -1,14 +1,13 @@
|
||||
# ---[ Declare variables that we are going to use across the Caffe2 build.
|
||||
# This file defines common, Caffe2-wide variables that we use to collect
|
||||
# source files and other things. Each variable is annotated with their
|
||||
# intended uses.
|
||||
# Note that adding and / or deleting these variables usually involves
|
||||
# changing the whole build system, so make sure you send a PR early if you
|
||||
# want to change them.
|
||||
# ---[ Declare variables that we are going to use across the Caffe2 build. This
|
||||
# file defines common, Caffe2-wide variables that we use to collect source files
|
||||
# and other things. Each variable is annotated with their intended uses. Note
|
||||
# that adding and / or deleting these variables usually involves changing the
|
||||
# whole build system, so make sure you send a PR early if you want to change
|
||||
# them.
|
||||
|
||||
# Caffe2_{CPU,GPU}_SRCS is the list that will have all the related source
|
||||
# files for CPU and GPU respectively. They will be filled with the
|
||||
# CMakeLists.txt files under each folder respectively.
|
||||
# Caffe2_{CPU,GPU}_SRCS is the list that will have all the related source files
|
||||
# for CPU and GPU respectively. They will be filled with the CMakeLists.txt
|
||||
# files under each folder respectively.
|
||||
set(Caffe2_CPU_SRCS)
|
||||
set(Caffe2_GPU_SRCS)
|
||||
|
||||
@ -27,12 +26,12 @@ set(Caffe2_DEPENDENCY_LIBS "")
|
||||
set(Caffe2_CUDA_DEPENDENCY_LIBS "")
|
||||
# This variable contains dependency libraries of Caffe2 which requires whole
|
||||
# symbol linkage. One example is the onnx lib where we need all its schema
|
||||
# symbols. However, if the lib is whole linked in caffe2 lib, we don't want
|
||||
# it to be linked in binaries that will link caffe2 lib. Because if caffe2 lib
|
||||
# is built as dynamic library, it will result in two copied of symbols of
|
||||
# symbols. However, if the lib is whole linked in caffe2 lib, we don't want it
|
||||
# to be linked in binaries that will link caffe2 lib. Because if caffe2 lib is
|
||||
# built as dynamic library, it will result in two copied of symbols of
|
||||
# Caffe2_DEPENDENCY_WHOLE_LINK_LIBS existing in caffe2.so and the binary, which
|
||||
# will cause issues. Therefore Caffe2_DEPENDENCY_WHOLE_LINK_LIBS will only
|
||||
# be linked by caffe2 lib.
|
||||
# will cause issues. Therefore Caffe2_DEPENDENCY_WHOLE_LINK_LIBS will only be
|
||||
# linked by caffe2 lib.
|
||||
set(Caffe2_DEPENDENCY_WHOLE_LINK_LIBS "")
|
||||
|
||||
# Lists for Caffe2 public dependency libraries. These libraries will be
|
||||
@ -40,7 +39,7 @@ set(Caffe2_DEPENDENCY_WHOLE_LINK_LIBS "")
|
||||
set(Caffe2_PUBLIC_DEPENDENCY_LIBS "")
|
||||
set(Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS "")
|
||||
|
||||
# List of modules that is built as part of the main Caffe2 build. For all
|
||||
# binary targets, such as Python and native binaries, they will be linked
|
||||
# automatically with these modules.
|
||||
# List of modules that is built as part of the main Caffe2 build. For all binary
|
||||
# targets, such as Python and native binaries, they will be linked automatically
|
||||
# with these modules.
|
||||
set(Caffe2_MODULES "")
|
||||
|
||||
@ -1,60 +1,66 @@
|
||||
# This ill-named file does a number of things:
|
||||
# - Installs Caffe2 header files (this has nothing to do with code generation)
|
||||
# - Configures caffe2/core/macros.h
|
||||
# - Creates an ATen target for its generated C++ files and adds it
|
||||
# as a dependency
|
||||
# - Reads build lists defined in build_variables.bzl
|
||||
# This ill-named file does a number of things: - Installs Caffe2 header files
|
||||
# (this has nothing to do with code generation) - Configures
|
||||
# caffe2/core/macros.h - Creates an ATen target for its generated C++ files and
|
||||
# adds it as a dependency - Reads build lists defined in build_variables.bzl
|
||||
|
||||
################################################################################
|
||||
# ##############################################################################
|
||||
# Helper functions
|
||||
################################################################################
|
||||
# ##############################################################################
|
||||
|
||||
function(filter_list output input)
|
||||
unset(result)
|
||||
foreach(filename ${${input}})
|
||||
foreach(pattern ${ARGN})
|
||||
if("${filename}" MATCHES "${pattern}")
|
||||
list(APPEND result "${filename}")
|
||||
endif()
|
||||
endforeach()
|
||||
unset(result)
|
||||
foreach(filename ${${input}})
|
||||
foreach(pattern ${ARGN})
|
||||
if("${filename}" MATCHES "${pattern}")
|
||||
list(APPEND result "${filename}")
|
||||
endif()
|
||||
endforeach()
|
||||
set(${output} ${result} PARENT_SCOPE)
|
||||
endforeach()
|
||||
set(${output}
|
||||
${result}
|
||||
PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
function(filter_list_exclude output input)
|
||||
unset(result)
|
||||
foreach(filename ${${input}})
|
||||
foreach(pattern ${ARGN})
|
||||
if(NOT "${filename}" MATCHES "${pattern}")
|
||||
list(APPEND result "${filename}")
|
||||
endif()
|
||||
endforeach()
|
||||
unset(result)
|
||||
foreach(filename ${${input}})
|
||||
foreach(pattern ${ARGN})
|
||||
if(NOT "${filename}" MATCHES "${pattern}")
|
||||
list(APPEND result "${filename}")
|
||||
endif()
|
||||
endforeach()
|
||||
set(${output} ${result} PARENT_SCOPE)
|
||||
endforeach()
|
||||
set(${output}
|
||||
${result}
|
||||
PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
################################################################################
|
||||
# ##############################################################################
|
||||
|
||||
# -- [ Determine commit hash
|
||||
execute_process(
|
||||
COMMAND "${Python_EXECUTABLE}" -c "from tools.generate_torch_version import get_sha;print(get_sha('.'), end='')"
|
||||
OUTPUT_VARIABLE COMMIT_SHA
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..
|
||||
)
|
||||
COMMAND
|
||||
"${Python_EXECUTABLE}" -c
|
||||
"from tools.generate_torch_version import get_sha;print(get_sha('.'), end='')"
|
||||
OUTPUT_VARIABLE COMMIT_SHA
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..)
|
||||
|
||||
# ---[ Write the macros file
|
||||
configure_file(
|
||||
${CMAKE_CURRENT_LIST_DIR}/../caffe2/core/macros.h.in
|
||||
${CMAKE_BINARY_DIR}/caffe2/core/macros.h)
|
||||
configure_file(${CMAKE_CURRENT_LIST_DIR}/../caffe2/core/macros.h.in
|
||||
${CMAKE_BINARY_DIR}/caffe2/core/macros.h)
|
||||
|
||||
# ---[ Installing the header files
|
||||
install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/../caffe2
|
||||
DESTINATION include
|
||||
FILES_MATCHING PATTERN "*.h")
|
||||
install(
|
||||
DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/../caffe2
|
||||
DESTINATION include
|
||||
FILES_MATCHING
|
||||
PATTERN "*.h")
|
||||
if(NOT INTERN_BUILD_ATEN_OPS)
|
||||
install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/core
|
||||
DESTINATION include/ATen
|
||||
FILES_MATCHING PATTERN "*.h")
|
||||
install(
|
||||
DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/core
|
||||
DESTINATION include/ATen
|
||||
FILES_MATCHING
|
||||
PATTERN "*.h")
|
||||
endif()
|
||||
install(FILES ${CMAKE_BINARY_DIR}/caffe2/core/macros.h
|
||||
DESTINATION include/caffe2/core)
|
||||
@ -71,22 +77,23 @@ if(INTERN_BUILD_ATEN_OPS)
|
||||
endif(MSVC)
|
||||
|
||||
if(NOT MSVC AND NOT "${CMAKE_C_COMPILER_ID}" MATCHES "Clang")
|
||||
set_source_files_properties(${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/MapAllocator.cpp PROPERTIES COMPILE_FLAGS "-fno-openmp")
|
||||
set_source_files_properties(
|
||||
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/MapAllocator.cpp
|
||||
PROPERTIES COMPILE_FLAGS "-fno-openmp")
|
||||
endif()
|
||||
|
||||
file(GLOB_RECURSE all_python "${CMAKE_CURRENT_LIST_DIR}/../torchgen/*.py")
|
||||
|
||||
# Handle files that may need sm89/sm90a/sm100a flags (stable/nightly
|
||||
# builds are not built for these archs).
|
||||
# Handle files that may need sm89/sm90a/sm100a flags (stable/nightly builds
|
||||
# are not built for these archs).
|
||||
if(USE_CUDA)
|
||||
# The stable/nightly builds do not enable some SM architectures,
|
||||
# like 89/90a/100a. Still, some files need to be built for these
|
||||
# architectures specifically. This function makes it possible to
|
||||
# enable building given file for a specific such architecture, in
|
||||
# case if PyTorch is built for corresponding other architecture;
|
||||
# for example, it will enable building for SM 90a in case PyTorch
|
||||
# built for SM 90, etc. For examples of how to use the function,
|
||||
# see below the function itself.
|
||||
# The stable/nightly builds do not enable some SM architectures, like
|
||||
# 89/90a/100a. Still, some files need to be built for these architectures
|
||||
# specifically. This function makes it possible to enable building given
|
||||
# file for a specific such architecture, in case if PyTorch is built for
|
||||
# corresponding other architecture; for example, it will enable building for
|
||||
# SM 90a in case PyTorch built for SM 90, etc. For examples of how to use
|
||||
# the function, see below the function itself.
|
||||
function(_BUILD_FOR_ADDITIONAL_ARCHS file archs)
|
||||
torch_cuda_get_nvcc_gencode_flag(_existing_arch_flags)
|
||||
|
||||
@ -95,38 +102,43 @@ if(INTERN_BUILD_ATEN_OPS)
|
||||
foreach(_arch ${archs})
|
||||
if("${_arch}" STREQUAL "89")
|
||||
if(_existing_arch_flags MATCHES ".*compute_86.*")
|
||||
list(APPEND _file_compile_flags "-gencode;arch=compute_89,code=sm_89")
|
||||
list(APPEND _file_compile_flags
|
||||
"-gencode;arch=compute_89,code=sm_89")
|
||||
endif()
|
||||
endif()
|
||||
if("${_arch}" STREQUAL "90a")
|
||||
if(_existing_arch_flags MATCHES ".*compute_90.*")
|
||||
list(APPEND _file_compile_flags "-gencode;arch=compute_90a,code=sm_90a")
|
||||
list(APPEND _file_compile_flags
|
||||
"-gencode;arch=compute_90a,code=sm_90a")
|
||||
endif()
|
||||
endif()
|
||||
if("${_arch}" STREQUAL "100a")
|
||||
if(_existing_arch_flags MATCHES ".*compute_100.*")
|
||||
list(APPEND _file_compile_flags "-gencode;arch=compute_100a,code=sm_100a")
|
||||
list(APPEND _file_compile_flags
|
||||
"-gencode;arch=compute_100a,code=sm_100a")
|
||||
endif()
|
||||
endif()
|
||||
if("${_arch}" STREQUAL "120a")
|
||||
if(_existing_arch_flags MATCHES ".*compute_120.*")
|
||||
list(APPEND _file_compile_flags "-gencode;arch=compute_120a,code=sm_120a")
|
||||
list(APPEND _file_compile_flags
|
||||
"-gencode;arch=compute_120a,code=sm_120a")
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
list(JOIN _file_compile_flags " " _file_compile_flags)
|
||||
|
||||
set_source_files_properties(${file} PROPERTIES COMPILE_FLAGS "${_file_compile_flags}")
|
||||
set_source_files_properties(${file} PROPERTIES COMPILE_FLAGS
|
||||
"${_file_compile_flags}")
|
||||
endfunction()
|
||||
|
||||
_BUILD_FOR_ADDITIONAL_ARCHS(
|
||||
_build_for_additional_archs(
|
||||
"${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/cuda/RowwiseScaledMM.cu"
|
||||
"89;90a;100a;120a")
|
||||
_BUILD_FOR_ADDITIONAL_ARCHS(
|
||||
_build_for_additional_archs(
|
||||
"${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/cuda/ScaledGroupMM.cu"
|
||||
"90a")
|
||||
_BUILD_FOR_ADDITIONAL_ARCHS(
|
||||
_build_for_additional_archs(
|
||||
"${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/cuda/GroupMM.cu"
|
||||
"90a;100a")
|
||||
|
||||
@ -150,7 +162,8 @@ if(INTERN_BUILD_ATEN_OPS)
|
||||
set(CUSTOM_BUILD_FLAGS)
|
||||
if(INTERN_BUILD_MOBILE)
|
||||
if(USE_VULKAN)
|
||||
list(APPEND CUSTOM_BUILD_FLAGS --backend_whitelist CPU QuantizedCPU Vulkan)
|
||||
list(APPEND CUSTOM_BUILD_FLAGS --backend_whitelist CPU QuantizedCPU
|
||||
Vulkan)
|
||||
else()
|
||||
list(APPEND CUSTOM_BUILD_FLAGS --backend_whitelist CPU QuantizedCPU)
|
||||
endif()
|
||||
@ -158,64 +171,69 @@ if(INTERN_BUILD_ATEN_OPS)
|
||||
|
||||
if(SELECTED_OP_LIST)
|
||||
if(TRACING_BASED)
|
||||
message(STATUS "Running tracing-based selective build given operator list: ${SELECTED_OP_LIST}")
|
||||
list(APPEND CUSTOM_BUILD_FLAGS
|
||||
--op_selection_yaml_path ${SELECTED_OP_LIST})
|
||||
elseif(NOT STATIC_DISPATCH_BACKEND)
|
||||
message(WARNING
|
||||
"You have to run tracing-based selective build with dynamic dispatch.\n"
|
||||
"Switching to STATIC_DISPATCH_BACKEND=CPU."
|
||||
message(
|
||||
STATUS
|
||||
"Running tracing-based selective build given operator list: ${SELECTED_OP_LIST}"
|
||||
)
|
||||
list(APPEND CUSTOM_BUILD_FLAGS --op_selection_yaml_path
|
||||
${SELECTED_OP_LIST})
|
||||
elseif(NOT STATIC_DISPATCH_BACKEND)
|
||||
message(
|
||||
WARNING
|
||||
"You have to run tracing-based selective build with dynamic dispatch.\n"
|
||||
"Switching to STATIC_DISPATCH_BACKEND=CPU.")
|
||||
set(STATIC_DISPATCH_BACKEND CPU)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(STATIC_DISPATCH_BACKEND)
|
||||
message(STATUS "Custom build with static dispatch backends: ${STATIC_DISPATCH_BACKEND}")
|
||||
message(
|
||||
STATUS
|
||||
"Custom build with static dispatch backends: ${STATIC_DISPATCH_BACKEND}"
|
||||
)
|
||||
list(LENGTH STATIC_DISPATCH_BACKEND len)
|
||||
list(APPEND CUSTOM_BUILD_FLAGS
|
||||
--static_dispatch_backend ${STATIC_DISPATCH_BACKEND})
|
||||
list(APPEND CUSTOM_BUILD_FLAGS --static_dispatch_backend
|
||||
${STATIC_DISPATCH_BACKEND})
|
||||
endif()
|
||||
|
||||
# Codegen unboxing
|
||||
if(USE_LIGHTWEIGHT_DISPATCH)
|
||||
file(GLOB_RECURSE all_unboxing_script "${CMAKE_CURRENT_LIST_DIR}/../tools/jit/*.py")
|
||||
file(GLOB_RECURSE all_unboxing_script
|
||||
"${CMAKE_CURRENT_LIST_DIR}/../tools/jit/*.py")
|
||||
list(APPEND CUSTOM_BUILD_FLAGS --skip_dispatcher_op_registration)
|
||||
set(GEN_UNBOXING_COMMAND
|
||||
"${Python_EXECUTABLE}" -m tools.jit.gen_unboxing
|
||||
--source-path ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen
|
||||
--install_dir ${CMAKE_BINARY_DIR}/aten/src/ATen
|
||||
)
|
||||
"${Python_EXECUTABLE}" -m tools.jit.gen_unboxing --source-path
|
||||
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen --install_dir
|
||||
${CMAKE_BINARY_DIR}/aten/src/ATen)
|
||||
if(SELECTED_OP_LIST)
|
||||
list(APPEND GEN_UNBOXING_COMMAND
|
||||
--TEST_ONLY_op_registration_allowlist_yaml_path "${SELECTED_OP_LIST}")
|
||||
--TEST_ONLY_op_registration_allowlist_yaml_path
|
||||
"${SELECTED_OP_LIST}")
|
||||
endif()
|
||||
set("GEN_UNBOXING_COMMAND_sources"
|
||||
${GEN_UNBOXING_COMMAND}
|
||||
--output-dependencies ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake
|
||||
)
|
||||
${GEN_UNBOXING_COMMAND} --output-dependencies
|
||||
${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake)
|
||||
message(STATUS "Generating sources for lightweight dispatch")
|
||||
execute_process(
|
||||
COMMAND ${GEN_UNBOXING_COMMAND_sources} --dry-run
|
||||
RESULT_VARIABLE RETURN_VALUE
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..
|
||||
)
|
||||
COMMAND ${GEN_UNBOXING_COMMAND_sources} --dry-run
|
||||
RESULT_VARIABLE RETURN_VALUE
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..)
|
||||
if(NOT RETURN_VALUE EQUAL 0)
|
||||
message(FATAL_ERROR "Failed to get generated_unboxing_sources list")
|
||||
endif()
|
||||
|
||||
include("${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake")
|
||||
include(
|
||||
"${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake")
|
||||
add_custom_command(
|
||||
COMMENT "Generating ATen unboxing sources"
|
||||
OUTPUT
|
||||
${generated_unboxing_sources}
|
||||
${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake
|
||||
COMMAND ${GEN_UNBOXING_COMMAND_sources}
|
||||
DEPENDS ${all_unboxing_script} ${sources_templates}
|
||||
COMMENT "Generating ATen unboxing sources"
|
||||
OUTPUT ${generated_unboxing_sources}
|
||||
${CMAKE_BINARY_DIR}/aten/src/ATen/generated_unboxing_sources.cmake
|
||||
COMMAND ${GEN_UNBOXING_COMMAND_sources}
|
||||
DEPENDS
|
||||
${all_unboxing_script} ${sources_templates}
|
||||
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/native_functions.yaml
|
||||
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/tags.yaml
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..
|
||||
)
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..)
|
||||
else() # Otherwise do not generate or include sources into build.
|
||||
set(generated_unboxing_sources "")
|
||||
endif()
|
||||
@ -226,116 +244,125 @@ if(INTERN_BUILD_ATEN_OPS)
|
||||
endif()
|
||||
|
||||
set(GEN_COMMAND
|
||||
"${Python_EXECUTABLE}" -m torchgen.gen
|
||||
--source-path ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen
|
||||
--install_dir ${CMAKE_BINARY_DIR}/aten/src/ATen
|
||||
${GEN_PER_OPERATOR_FLAG}
|
||||
${GEN_ROCM_FLAG}
|
||||
${GEN_MPS_FLAG}
|
||||
${GEN_XPU_FLAG}
|
||||
${CUSTOM_BUILD_FLAGS}
|
||||
)
|
||||
"${Python_EXECUTABLE}" -m torchgen.gen --source-path
|
||||
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen --install_dir
|
||||
${CMAKE_BINARY_DIR}/aten/src/ATen ${GEN_PER_OPERATOR_FLAG}
|
||||
${GEN_ROCM_FLAG} ${GEN_MPS_FLAG} ${GEN_XPU_FLAG} ${CUSTOM_BUILD_FLAGS})
|
||||
|
||||
file(GLOB_RECURSE headers_templates "${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/templates/*\.h")
|
||||
file(GLOB_RECURSE sources_templates "${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/templates/*\.cpp")
|
||||
file(GLOB_RECURSE headers_templates
|
||||
"${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/templates/*\.h")
|
||||
file(GLOB_RECURSE sources_templates
|
||||
"${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/templates/*\.cpp")
|
||||
set(declarations_yaml_templates "")
|
||||
|
||||
foreach(gen_type "headers" "sources" "declarations_yaml")
|
||||
# The codegen outputs may change dynamically as PyTorch is
|
||||
# developed, but add_custom_command only supports dynamic inputs.
|
||||
# The codegen outputs may change dynamically as PyTorch is developed, but
|
||||
# add_custom_command only supports dynamic inputs.
|
||||
#
|
||||
# We work around this by generating a .cmake file which is
|
||||
# included below to set the list of output files. If that file
|
||||
# ever changes then cmake will be re-run automatically because it
|
||||
# was included and so we get fully dynamic outputs.
|
||||
# We work around this by generating a .cmake file which is included below to
|
||||
# set the list of output files. If that file ever changes then cmake will be
|
||||
# re-run automatically because it was included and so we get fully dynamic
|
||||
# outputs.
|
||||
|
||||
set("GEN_COMMAND_${gen_type}"
|
||||
${GEN_COMMAND}
|
||||
--generate ${gen_type}
|
||||
--output-dependencies ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_${gen_type}.cmake
|
||||
)
|
||||
${GEN_COMMAND} --generate ${gen_type} --output-dependencies
|
||||
${CMAKE_BINARY_DIR}/aten/src/ATen/generated_${gen_type}.cmake)
|
||||
|
||||
# Dry run to bootstrap the output variables
|
||||
execute_process(
|
||||
COMMAND ${GEN_COMMAND_${gen_type}} --dry-run
|
||||
RESULT_VARIABLE RETURN_VALUE
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..
|
||||
)
|
||||
COMMAND ${GEN_COMMAND_${gen_type}} --dry-run
|
||||
RESULT_VARIABLE RETURN_VALUE
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..)
|
||||
|
||||
if(NOT RETURN_VALUE EQUAL 0)
|
||||
message(FATAL_ERROR "Failed to get generated_${gen_type} list")
|
||||
endif()
|
||||
|
||||
include("${CMAKE_BINARY_DIR}/aten/src/ATen/generated_${gen_type}.cmake")
|
||||
include("${CMAKE_BINARY_DIR}/aten/src/ATen/core_generated_${gen_type}.cmake")
|
||||
include("${CMAKE_BINARY_DIR}/aten/src/ATen/cpu_vec_generated_${gen_type}.cmake")
|
||||
include("${CMAKE_BINARY_DIR}/aten/src/ATen/cuda_generated_${gen_type}.cmake")
|
||||
include(
|
||||
"${CMAKE_BINARY_DIR}/aten/src/ATen/core_generated_${gen_type}.cmake")
|
||||
include(
|
||||
"${CMAKE_BINARY_DIR}/aten/src/ATen/cpu_vec_generated_${gen_type}.cmake")
|
||||
include(
|
||||
"${CMAKE_BINARY_DIR}/aten/src/ATen/cuda_generated_${gen_type}.cmake")
|
||||
include("${CMAKE_BINARY_DIR}/aten/src/ATen/ops_generated_${gen_type}.cmake")
|
||||
if(USE_XPU)
|
||||
include("${CMAKE_BINARY_DIR}/aten/src/ATen/xpu_generated_${gen_type}.cmake")
|
||||
include(
|
||||
"${CMAKE_BINARY_DIR}/aten/src/ATen/xpu_generated_${gen_type}.cmake")
|
||||
endif()
|
||||
message(STATUS "${gen_type} outputs: ${gen_outputs}")
|
||||
set(OUTPUT_LIST
|
||||
${generated_${gen_type}}
|
||||
${cuda_generated_${gen_type}}
|
||||
${core_generated_${gen_type}}
|
||||
${cpu_vec_generated_${gen_type}}
|
||||
${ops_generated_${gen_type}}
|
||||
${CMAKE_BINARY_DIR}/aten/src/ATen/generated_${gen_type}.cmake
|
||||
${CMAKE_BINARY_DIR}/aten/src/ATen/ops_generated_${gen_type}.cmake
|
||||
${CMAKE_BINARY_DIR}/aten/src/ATen/core_generated_${gen_type}.cmake
|
||||
${CMAKE_BINARY_DIR}/aten/src/ATen/cpu_vec_generated_${gen_type}.cmake
|
||||
${CMAKE_BINARY_DIR}/aten/src/ATen/cuda_generated_${gen_type}.cmake)
|
||||
${generated_${gen_type}}
|
||||
${cuda_generated_${gen_type}}
|
||||
${core_generated_${gen_type}}
|
||||
${cpu_vec_generated_${gen_type}}
|
||||
${ops_generated_${gen_type}}
|
||||
${CMAKE_BINARY_DIR}/aten/src/ATen/generated_${gen_type}.cmake
|
||||
${CMAKE_BINARY_DIR}/aten/src/ATen/ops_generated_${gen_type}.cmake
|
||||
${CMAKE_BINARY_DIR}/aten/src/ATen/core_generated_${gen_type}.cmake
|
||||
${CMAKE_BINARY_DIR}/aten/src/ATen/cpu_vec_generated_${gen_type}.cmake
|
||||
${CMAKE_BINARY_DIR}/aten/src/ATen/cuda_generated_${gen_type}.cmake)
|
||||
if(USE_XPU)
|
||||
list(APPEND OUTPUT_LIST
|
||||
${xpu_generated_${gen_type}}
|
||||
${CMAKE_BINARY_DIR}/aten/src/ATen/xpu_generated_${gen_type}.cmake
|
||||
)
|
||||
list(APPEND OUTPUT_LIST ${xpu_generated_${gen_type}}
|
||||
${CMAKE_BINARY_DIR}/aten/src/ATen/xpu_generated_${gen_type}.cmake)
|
||||
endif()
|
||||
|
||||
add_custom_command(
|
||||
COMMENT "Generating ATen ${gen_type}"
|
||||
OUTPUT ${OUTPUT_LIST}
|
||||
COMMAND ${GEN_COMMAND_${gen_type}}
|
||||
DEPENDS ${all_python} ${${gen_type}_templates}
|
||||
DEPENDS
|
||||
${all_python} ${${gen_type}_templates}
|
||||
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/native_functions.yaml
|
||||
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/tags.yaml
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..
|
||||
)
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/..)
|
||||
endforeach()
|
||||
|
||||
# Generated headers used from a CUDA (.cu) file are
|
||||
# not tracked correctly in CMake. We make the libATen.so depend explicitly
|
||||
# on building the generated ATen files to workaround.
|
||||
add_custom_target(ATEN_CPU_FILES_GEN_TARGET DEPENDS
|
||||
${generated_headers} ${core_generated_headers} ${cpu_vec_generated_headers} ${ops_generated_headers}
|
||||
${generated_sources} ${core_generated_sources} ${cpu_vec_generated_sources} ${ops_generated_sources}
|
||||
${generated_declarations_yaml} ${generated_unboxing_sources})
|
||||
add_custom_target(ATEN_CUDA_FILES_GEN_TARGET DEPENDS
|
||||
${cuda_generated_headers} ${cuda_generated_sources})
|
||||
# Generated headers used from a CUDA (.cu) file are not tracked correctly in
|
||||
# CMake. We make the libATen.so depend explicitly on building the generated
|
||||
# ATen files to workaround.
|
||||
add_custom_target(
|
||||
ATEN_CPU_FILES_GEN_TARGET
|
||||
DEPENDS ${generated_headers}
|
||||
${core_generated_headers}
|
||||
${cpu_vec_generated_headers}
|
||||
${ops_generated_headers}
|
||||
${generated_sources}
|
||||
${core_generated_sources}
|
||||
${cpu_vec_generated_sources}
|
||||
${ops_generated_sources}
|
||||
${generated_declarations_yaml}
|
||||
${generated_unboxing_sources})
|
||||
add_custom_target(ATEN_CUDA_FILES_GEN_TARGET
|
||||
DEPENDS ${cuda_generated_headers} ${cuda_generated_sources})
|
||||
add_library(ATEN_CPU_FILES_GEN_LIB INTERFACE)
|
||||
add_library(ATEN_CUDA_FILES_GEN_LIB INTERFACE)
|
||||
add_dependencies(ATEN_CPU_FILES_GEN_LIB ATEN_CPU_FILES_GEN_TARGET)
|
||||
add_dependencies(ATEN_CUDA_FILES_GEN_LIB ATEN_CUDA_FILES_GEN_TARGET)
|
||||
|
||||
if(USE_PER_OPERATOR_HEADERS)
|
||||
target_compile_definitions(ATEN_CPU_FILES_GEN_LIB INTERFACE AT_PER_OPERATOR_HEADERS)
|
||||
target_compile_definitions(ATEN_CUDA_FILES_GEN_LIB INTERFACE AT_PER_OPERATOR_HEADERS)
|
||||
target_compile_definitions(ATEN_CPU_FILES_GEN_LIB
|
||||
INTERFACE AT_PER_OPERATOR_HEADERS)
|
||||
target_compile_definitions(ATEN_CUDA_FILES_GEN_LIB
|
||||
INTERFACE AT_PER_OPERATOR_HEADERS)
|
||||
endif()
|
||||
|
||||
if(USE_XPU)
|
||||
add_custom_target(ATEN_XPU_FILES_GEN_TARGET DEPENDS
|
||||
${xpu_generated_headers} ${xpu_generated_sources})
|
||||
add_custom_target(ATEN_XPU_FILES_GEN_TARGET
|
||||
DEPENDS ${xpu_generated_headers} ${xpu_generated_sources})
|
||||
add_library(ATEN_XPU_FILES_GEN_LIB INTERFACE)
|
||||
add_dependencies(ATEN_XPU_FILES_GEN_LIB ATEN_XPU_FILES_GEN_TARGET)
|
||||
|
||||
if(USE_PER_OPERATOR_HEADERS)
|
||||
target_compile_definitions(ATEN_XPU_FILES_GEN_LIB INTERFACE AT_PER_OPERATOR_HEADERS)
|
||||
target_compile_definitions(ATEN_XPU_FILES_GEN_LIB
|
||||
INTERFACE AT_PER_OPERATOR_HEADERS)
|
||||
endif()
|
||||
endif()
|
||||
# Handle source files that need to be compiled multiple times for
|
||||
# different vectorization options
|
||||
file(GLOB cpu_kernel_cpp_in "${PROJECT_SOURCE_DIR}/aten/src/ATen/native/cpu/*.cpp" "${PROJECT_SOURCE_DIR}/aten/src/ATen/native/quantized/cpu/kernels/*.cpp")
|
||||
# Handle source files that need to be compiled multiple times for different
|
||||
# vectorization options
|
||||
file(GLOB cpu_kernel_cpp_in
|
||||
"${PROJECT_SOURCE_DIR}/aten/src/ATen/native/cpu/*.cpp"
|
||||
"${PROJECT_SOURCE_DIR}/aten/src/ATen/native/quantized/cpu/kernels/*.cpp")
|
||||
|
||||
list(APPEND CPU_CAPABILITY_NAMES "DEFAULT")
|
||||
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG}")
|
||||
@ -346,7 +373,8 @@ if(INTERN_BUILD_ATEN_OPS)
|
||||
if(MSVC)
|
||||
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG}/arch:AVX512")
|
||||
else(MSVC)
|
||||
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -mavx512f -mavx512bw -mavx512vl -mavx512dq -mfma")
|
||||
list(APPEND CPU_CAPABILITY_FLAGS
|
||||
"${OPT_FLAG} -mavx512f -mavx512bw -mavx512vl -mavx512dq -mfma")
|
||||
endif(MSVC)
|
||||
endif(CXX_AVX512_FOUND)
|
||||
|
||||
@ -354,12 +382,15 @@ if(INTERN_BUILD_ATEN_OPS)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_AVX2_CPU_DEFINITION")
|
||||
|
||||
# Some versions of GCC pessimistically split unaligned load and store
|
||||
# instructions when using the default tuning. This is a bad choice on
|
||||
# new Intel and AMD processors so we disable it when compiling with AVX2.
|
||||
# See https://stackoverflow.com/questions/52626726/why-doesnt-gcc-resolve-mm256-loadu-pd-as-single-vmovupd#tab-top
|
||||
check_cxx_compiler_flag("-mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store" COMPILER_SUPPORTS_NO_AVX256_SPLIT)
|
||||
# instructions when using the default tuning. This is a bad choice on new
|
||||
# Intel and AMD processors so we disable it when compiling with AVX2. See
|
||||
# https://stackoverflow.com/questions/52626726/why-doesnt-gcc-resolve-mm256-loadu-pd-as-single-vmovupd#tab-top
|
||||
check_cxx_compiler_flag(
|
||||
"-mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store"
|
||||
COMPILER_SUPPORTS_NO_AVX256_SPLIT)
|
||||
if(COMPILER_SUPPORTS_NO_AVX256_SPLIT)
|
||||
set(CPU_NO_AVX256_SPLIT_FLAGS "-mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store")
|
||||
set(CPU_NO_AVX256_SPLIT_FLAGS
|
||||
"-mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store")
|
||||
endif(COMPILER_SUPPORTS_NO_AVX256_SPLIT)
|
||||
|
||||
list(APPEND CPU_CAPABILITY_NAMES "AVX2")
|
||||
@ -370,7 +401,8 @@ if(INTERN_BUILD_ATEN_OPS)
|
||||
if(MSVC)
|
||||
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG}/arch:AVX512")
|
||||
else(MSVC)
|
||||
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -march=native ${CPU_NO_AVX256_SPLIT_FLAGS}")
|
||||
list(APPEND CPU_CAPABILITY_FLAGS
|
||||
"${OPT_FLAG} -march=native ${CPU_NO_AVX256_SPLIT_FLAGS}")
|
||||
endif(MSVC)
|
||||
endif(CXX_AVX512_FOUND)
|
||||
endif()
|
||||
@ -378,68 +410,94 @@ if(INTERN_BUILD_ATEN_OPS)
|
||||
if(MSVC)
|
||||
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG}/arch:AVX2")
|
||||
else(MSVC)
|
||||
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -mavx2 -mfma -mf16c ${CPU_NO_AVX256_SPLIT_FLAGS}")
|
||||
list(APPEND CPU_CAPABILITY_FLAGS
|
||||
"${OPT_FLAG} -mavx2 -mfma -mf16c ${CPU_NO_AVX256_SPLIT_FLAGS}")
|
||||
endif(MSVC)
|
||||
endif()
|
||||
endif(CXX_AVX2_FOUND)
|
||||
|
||||
if(CXX_VSX_FOUND)
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_VSX_CPU_DEFINITION")
|
||||
LIST(APPEND CPU_CAPABILITY_NAMES "VSX")
|
||||
LIST(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} ${CXX_VSX_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_VSX_CPU_DEFINITION")
|
||||
list(APPEND CPU_CAPABILITY_NAMES "VSX")
|
||||
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} ${CXX_VSX_FLAGS}")
|
||||
endif(CXX_VSX_FOUND)
|
||||
|
||||
if(CXX_ZVECTOR_FOUND)
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_ZVECTOR_CPU_DEFINITION")
|
||||
LIST(APPEND CPU_CAPABILITY_NAMES "ZVECTOR")
|
||||
LIST(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} ${CXX_ZVECTOR_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_ZVECTOR_CPU_DEFINITION")
|
||||
list(APPEND CPU_CAPABILITY_NAMES "ZVECTOR")
|
||||
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} ${CXX_ZVECTOR_FLAGS}")
|
||||
endif(CXX_ZVECTOR_FOUND)
|
||||
|
||||
if(CXX_SVE_FOUND AND CXX_SVE256_FOUND AND CXX_ARM_BF16_FOUND)
|
||||
if(CXX_SVE_FOUND
|
||||
AND CXX_SVE256_FOUND
|
||||
AND CXX_ARM_BF16_FOUND)
|
||||
list(APPEND CPU_CAPABILITY_NAMES "SVE256")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_SVE_CPU_DEFINITION -DHAVE_SVE256_CPU_DEFINITION -DHAVE_ARM_BF16_CPU_DEFINITION")
|
||||
set(CMAKE_CXX_FLAGS
|
||||
"${CMAKE_CXX_FLAGS} -DHAVE_SVE_CPU_DEFINITION -DHAVE_SVE256_CPU_DEFINITION -DHAVE_ARM_BF16_CPU_DEFINITION"
|
||||
)
|
||||
if("${CMAKE_C_COMPILER_ID}" MATCHES "Clang")
|
||||
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -O2 -march=armv8-a+sve+bf16 -D__ARM_FEATURE_BF16 -DCPU_CAPABILITY_SVE -msve-vector-bits=256")
|
||||
list(
|
||||
APPEND
|
||||
CPU_CAPABILITY_FLAGS
|
||||
"${OPT_FLAG} -O2 -march=armv8-a+sve+bf16 -D__ARM_FEATURE_BF16 -DCPU_CAPABILITY_SVE -msve-vector-bits=256"
|
||||
)
|
||||
else()
|
||||
list(APPEND CPU_CAPABILITY_FLAGS "${OPT_FLAG} -march=armv8-a+sve+bf16 -D__ARM_FEATURE_BF16 -DCPU_CAPABILITY_SVE -msve-vector-bits=256")
|
||||
list(
|
||||
APPEND
|
||||
CPU_CAPABILITY_FLAGS
|
||||
"${OPT_FLAG} -march=armv8-a+sve+bf16 -D__ARM_FEATURE_BF16 -DCPU_CAPABILITY_SVE -msve-vector-bits=256"
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
list(LENGTH CPU_CAPABILITY_NAMES NUM_CPU_CAPABILITY_NAMES)
|
||||
math(EXPR NUM_CPU_CAPABILITY_NAMES "${NUM_CPU_CAPABILITY_NAMES}-1")
|
||||
|
||||
# The sources list might get reordered later based on the capabilities.
|
||||
# See NOTE [ Linking AVX and non-AVX files ]
|
||||
# The sources list might get reordered later based on the capabilities. See
|
||||
# NOTE [ Linking AVX and non-AVX files ]
|
||||
foreach(i RANGE ${NUM_CPU_CAPABILITY_NAMES})
|
||||
function(process_vec NAME)
|
||||
list(GET CPU_CAPABILITY_NAMES ${i} CPU_CAPABILITY)
|
||||
set(NEW_IMPL ${CMAKE_BINARY_DIR}/aten/src/ATen/${NAME}.${CPU_CAPABILITY}.cpp)
|
||||
configure_file("${PROJECT_SOURCE_DIR}/cmake/IncludeSource.cpp.in" ${NEW_IMPL})
|
||||
set(cpu_kernel_cpp ${NEW_IMPL} ${cpu_kernel_cpp} PARENT_SCOPE) # Create list of copies
|
||||
set(NEW_IMPL
|
||||
${CMAKE_BINARY_DIR}/aten/src/ATen/${NAME}.${CPU_CAPABILITY}.cpp)
|
||||
configure_file("${PROJECT_SOURCE_DIR}/cmake/IncludeSource.cpp.in"
|
||||
${NEW_IMPL})
|
||||
set(cpu_kernel_cpp
|
||||
${NEW_IMPL} ${cpu_kernel_cpp}
|
||||
PARENT_SCOPE) # Create list of copies
|
||||
list(GET CPU_CAPABILITY_FLAGS ${i} FLAGS)
|
||||
if(MSVC)
|
||||
set(EXTRA_FLAGS "/DCPU_CAPABILITY=${CPU_CAPABILITY} /DCPU_CAPABILITY_${CPU_CAPABILITY}")
|
||||
set(EXTRA_FLAGS
|
||||
"/DCPU_CAPABILITY=${CPU_CAPABILITY} /DCPU_CAPABILITY_${CPU_CAPABILITY}"
|
||||
)
|
||||
else(MSVC)
|
||||
set(EXTRA_FLAGS "-DCPU_CAPABILITY=${CPU_CAPABILITY} -DCPU_CAPABILITY_${CPU_CAPABILITY}")
|
||||
set(EXTRA_FLAGS
|
||||
"-DCPU_CAPABILITY=${CPU_CAPABILITY} -DCPU_CAPABILITY_${CPU_CAPABILITY}"
|
||||
)
|
||||
endif(MSVC)
|
||||
|
||||
# Only parallelize the SortingKernel for now to avoid side effects
|
||||
if(${NAME} STREQUAL "native/cpu/SortingKernel.cpp" AND NOT MSVC AND USE_OMP)
|
||||
if(${NAME} STREQUAL "native/cpu/SortingKernel.cpp"
|
||||
AND NOT MSVC
|
||||
AND USE_OMP)
|
||||
string(APPEND EXTRA_FLAGS " -D_GLIBCXX_PARALLEL")
|
||||
endif()
|
||||
|
||||
# Disable certain warnings for GCC-9.X
|
||||
if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
if(("${NAME}" STREQUAL "native/cpu/GridSamplerKernel.cpp") AND ("${CPU_CAPABILITY}" STREQUAL "DEFAULT"))
|
||||
if(("${NAME}" STREQUAL "native/cpu/GridSamplerKernel.cpp")
|
||||
AND ("${CPU_CAPABILITY}" STREQUAL "DEFAULT"))
|
||||
# See https://github.com/pytorch/pytorch/issues/38855
|
||||
set(EXTRA_FLAGS "${EXTRA_FLAGS} -Wno-uninitialized")
|
||||
endif()
|
||||
if("${NAME}" STREQUAL "native/quantized/cpu/kernels/QuantizedOpKernels.cpp")
|
||||
if("${NAME}" STREQUAL
|
||||
"native/quantized/cpu/kernels/QuantizedOpKernels.cpp")
|
||||
# See https://github.com/pytorch/pytorch/issues/38854
|
||||
set(EXTRA_FLAGS "${EXTRA_FLAGS} -Wno-deprecated-copy")
|
||||
endif()
|
||||
endif()
|
||||
set_source_files_properties(${NEW_IMPL} PROPERTIES COMPILE_FLAGS "${FLAGS} ${EXTRA_FLAGS}")
|
||||
set_source_files_properties(
|
||||
${NEW_IMPL} PROPERTIES COMPILE_FLAGS "${FLAGS} ${EXTRA_FLAGS}")
|
||||
endfunction()
|
||||
foreach(IMPL ${cpu_kernel_cpp_in})
|
||||
file(RELATIVE_PATH NAME "${PROJECT_SOURCE_DIR}/aten/src/ATen/" "${IMPL}")
|
||||
@ -456,22 +514,29 @@ endif()
|
||||
function(append_filelist name outputvar)
|
||||
set(_rootdir "${Torch_SOURCE_DIR}/")
|
||||
# configure_file adds its input to the list of CMAKE_RERUN dependencies
|
||||
configure_file(
|
||||
${PROJECT_SOURCE_DIR}/build_variables.bzl
|
||||
${PROJECT_BINARY_DIR}/caffe2/build_variables.bzl)
|
||||
configure_file(${PROJECT_SOURCE_DIR}/build_variables.bzl
|
||||
${PROJECT_BINARY_DIR}/caffe2/build_variables.bzl)
|
||||
execute_process(
|
||||
COMMAND "${Python_EXECUTABLE}" -c
|
||||
"exec(open('${PROJECT_SOURCE_DIR}/build_variables.bzl').read());print(';'.join(['${_rootdir}' + x for x in ${name}]))"
|
||||
COMMAND
|
||||
"${Python_EXECUTABLE}" -c
|
||||
"exec(open('${PROJECT_SOURCE_DIR}/build_variables.bzl').read());print(';'.join(['${_rootdir}' + x for x in ${name}]))"
|
||||
WORKING_DIRECTORY "${_rootdir}"
|
||||
RESULT_VARIABLE _retval
|
||||
OUTPUT_VARIABLE _tempvar)
|
||||
if(NOT _retval EQUAL 0)
|
||||
message(FATAL_ERROR "Failed to fetch filelist ${name} from build_variables.bzl")
|
||||
message(
|
||||
FATAL_ERROR "Failed to fetch filelist ${name} from build_variables.bzl")
|
||||
endif()
|
||||
string(REPLACE "\n" "" _tempvar "${_tempvar}")
|
||||
list(APPEND ${outputvar} ${_tempvar})
|
||||
set(${outputvar} "${${outputvar}}" PARENT_SCOPE)
|
||||
set(${outputvar}
|
||||
"${${outputvar}}"
|
||||
PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
set(NUM_CPU_CAPABILITY_NAMES ${NUM_CPU_CAPABILITY_NAMES} PARENT_SCOPE)
|
||||
set(CPU_CAPABILITY_FLAGS ${CPU_CAPABILITY_FLAGS} PARENT_SCOPE)
|
||||
set(NUM_CPU_CAPABILITY_NAMES
|
||||
${NUM_CPU_CAPABILITY_NAMES}
|
||||
PARENT_SCOPE)
|
||||
set(CPU_CAPABILITY_FLAGS
|
||||
${CPU_CAPABILITY_FLAGS}
|
||||
PARENT_SCOPE)
|
||||
|
||||
@ -1,26 +1,34 @@
|
||||
function(print_target_properties tgt)
|
||||
# Get all properties that cmake supports
|
||||
execute_process(COMMAND cmake --help-property-list OUTPUT_VARIABLE CMAKE_PROPERTY_LIST)
|
||||
execute_process(COMMAND cmake --help-property-list
|
||||
OUTPUT_VARIABLE CMAKE_PROPERTY_LIST)
|
||||
|
||||
# Convert command output into a CMake list
|
||||
STRING(REGEX REPLACE ";" "\\\\;" CMAKE_PROPERTY_LIST "${CMAKE_PROPERTY_LIST}")
|
||||
STRING(REGEX REPLACE "\n" ";" CMAKE_PROPERTY_LIST "${CMAKE_PROPERTY_LIST}")
|
||||
if(NOT TARGET ${tgt})
|
||||
message("There is no target named '${tgt}'")
|
||||
return()
|
||||
endif()
|
||||
string(REGEX REPLACE ";" "\\\\;" CMAKE_PROPERTY_LIST "${CMAKE_PROPERTY_LIST}")
|
||||
string(REGEX REPLACE "\n" ";" CMAKE_PROPERTY_LIST "${CMAKE_PROPERTY_LIST}")
|
||||
if(NOT TARGET ${tgt})
|
||||
message("There is no target named '${tgt}'")
|
||||
return()
|
||||
endif()
|
||||
|
||||
foreach(prop ${CMAKE_PROPERTY_LIST})
|
||||
string(REPLACE "<CONFIG>" "${CMAKE_BUILD_TYPE}" prop ${prop})
|
||||
# Fix https://stackoverflow.com/questions/32197663/how-can-i-remove-the-the-location-property-may-not-be-read-from-target-error-i
|
||||
if(prop STREQUAL "LOCATION" OR prop MATCHES "^LOCATION_" OR prop MATCHES "_LOCATION$")
|
||||
continue()
|
||||
foreach(prop ${CMAKE_PROPERTY_LIST})
|
||||
string(REPLACE "<CONFIG>" "${CMAKE_BUILD_TYPE}" prop ${prop})
|
||||
# Fix
|
||||
# https://stackoverflow.com/questions/32197663/how-can-i-remove-the-the-location-property-may-not-be-read-from-target-error-i
|
||||
if(prop STREQUAL "LOCATION"
|
||||
OR prop MATCHES "^LOCATION_"
|
||||
OR prop MATCHES "_LOCATION$")
|
||||
continue()
|
||||
endif()
|
||||
# message ("Checking ${prop}")
|
||||
get_property(propval TARGET ${tgt} PROPERTY ${prop} SET)
|
||||
if(propval)
|
||||
get_target_property(propval ${tgt} ${prop})
|
||||
message("${tgt} ${prop} = ${propval}")
|
||||
endif()
|
||||
endforeach(prop)
|
||||
# message ("Checking ${prop}")
|
||||
get_property(
|
||||
propval
|
||||
TARGET ${tgt}
|
||||
PROPERTY ${prop}
|
||||
SET)
|
||||
if(propval)
|
||||
get_target_property(propval ${tgt} ${prop})
|
||||
message("${tgt} ${prop} = ${propval}")
|
||||
endif()
|
||||
endforeach(prop)
|
||||
endfunction(print_target_properties)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
73
cmake/External/EigenBLAS.cmake
vendored
73
cmake/External/EigenBLAS.cmake
vendored
@ -7,47 +7,50 @@ if(NOT INTERN_BUILD_MOBILE OR NOT INTERN_USE_EIGEN_BLAS)
|
||||
return()
|
||||
endif()
|
||||
|
||||
##############################################################################
|
||||
# Eigen BLAS is built together with Libtorch mobile.
|
||||
# By default, it builds code from third-party/eigen/blas submodule.
|
||||
##############################################################################
|
||||
# ##############################################################################
|
||||
# Eigen BLAS is built together with Libtorch mobile. By default, it builds code
|
||||
# from third-party/eigen/blas submodule.
|
||||
# ##############################################################################
|
||||
|
||||
set(CAFFE2_THIRD_PARTY_ROOT ${PROJECT_SOURCE_DIR}/third_party)
|
||||
set(EIGEN_BLAS_SRC_DIR "${CAFFE2_THIRD_PARTY_ROOT}/eigen/blas" CACHE STRING "Eigen BLAS source directory")
|
||||
set(EIGEN_BLAS_SRC_DIR
|
||||
"${CAFFE2_THIRD_PARTY_ROOT}/eigen/blas"
|
||||
CACHE STRING "Eigen BLAS source directory")
|
||||
|
||||
set(EigenBlas_SRCS
|
||||
${EIGEN_BLAS_SRC_DIR}/single.cpp
|
||||
${EIGEN_BLAS_SRC_DIR}/double.cpp
|
||||
${EIGEN_BLAS_SRC_DIR}/complex_single.cpp
|
||||
${EIGEN_BLAS_SRC_DIR}/complex_double.cpp
|
||||
${EIGEN_BLAS_SRC_DIR}/xerbla.cpp
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/srotm.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/srotmg.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/drotm.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/drotmg.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/lsame.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/dspmv.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/ssbmv.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/chbmv.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/sspmv.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/zhbmv.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/chpmv.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/dsbmv.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/zhpmv.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/dtbmv.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/stbmv.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/ctbmv.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/ztbmv.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/d_cnjg.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/r_cnjg.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/complexdots.c
|
||||
)
|
||||
${EIGEN_BLAS_SRC_DIR}/single.cpp
|
||||
${EIGEN_BLAS_SRC_DIR}/double.cpp
|
||||
${EIGEN_BLAS_SRC_DIR}/complex_single.cpp
|
||||
${EIGEN_BLAS_SRC_DIR}/complex_double.cpp
|
||||
${EIGEN_BLAS_SRC_DIR}/xerbla.cpp
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/srotm.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/srotmg.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/drotm.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/drotmg.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/lsame.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/dspmv.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/ssbmv.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/chbmv.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/sspmv.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/zhbmv.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/chpmv.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/dsbmv.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/zhpmv.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/dtbmv.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/stbmv.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/ctbmv.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/ztbmv.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/d_cnjg.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/r_cnjg.c
|
||||
${EIGEN_BLAS_SRC_DIR}/f2c/complexdots.c)
|
||||
|
||||
add_library(eigen_blas STATIC ${EigenBlas_SRCS})
|
||||
|
||||
# We build static versions of eigen blas but link into a shared library, so they need PIC.
|
||||
# We build static versions of eigen blas but link into a shared library, so they
|
||||
# need PIC.
|
||||
set_property(TARGET eigen_blas PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
install(TARGETS eigen_blas
|
||||
LIBRARY DESTINATION lib
|
||||
ARCHIVE DESTINATION lib)
|
||||
install(
|
||||
TARGETS eigen_blas
|
||||
LIBRARY DESTINATION lib
|
||||
ARCHIVE DESTINATION lib)
|
||||
|
||||
110
cmake/External/aotriton.cmake
vendored
110
cmake/External/aotriton.cmake
vendored
@ -5,103 +5,113 @@ if(NOT __AOTRITON_INCLUDED)
|
||||
set(__AOTRITON_INSTALL_DIR "${PROJECT_SOURCE_DIR}/torch")
|
||||
add_library(__caffe2_aotriton INTERFACE)
|
||||
|
||||
# AOTriton package information from GitHub Release Pages
|
||||
# Replaces .ci/docker/aotriton_version.txt
|
||||
# Note packages information may have versions skipped (due to no ABI breaks)
|
||||
# But they must be listed from lower version to higher version
|
||||
# AOTriton package information from GitHub Release Pages Replaces
|
||||
# .ci/docker/aotriton_version.txt Note packages information may have versions
|
||||
# skipped (due to no ABI breaks) But they must be listed from lower version to
|
||||
# higher version
|
||||
set(__AOTRITON_VER "0.10b")
|
||||
set(__AOTRITON_MANYLINUX_LIST
|
||||
"manylinux_2_28" # rocm6.3
|
||||
"manylinux_2_28" # rocm6.4
|
||||
"manylinux_2_28" # rocm6.5
|
||||
"manylinux_2_28" # rocm7.0
|
||||
)
|
||||
set(__AOTRITON_ROCM_LIST
|
||||
"rocm6.3"
|
||||
"rocm6.4"
|
||||
"rocm6.5"
|
||||
"rocm7.0"
|
||||
)
|
||||
"manylinux_2_28" # rocm6.3
|
||||
"manylinux_2_28" # rocm6.4
|
||||
"manylinux_2_28" # rocm6.5
|
||||
"manylinux_2_28" # rocm7.0
|
||||
)
|
||||
set(__AOTRITON_ROCM_LIST "rocm6.3" "rocm6.4" "rocm6.5" "rocm7.0")
|
||||
set(__AOTRITON_CI_COMMIT "6fca155f4deeb8d9529326f7b69f350aeeb93477")
|
||||
set(__AOTRITON_SHA256_LIST
|
||||
"861cd9f7479eec943933c27cb86920247e5b5dd139bc7c1376c81808abb7d7fe" # rocm6.3
|
||||
"acea7d811a2d3bbe718b6e07fc2a9f739e49eecd60b4b6a36fcb3fe8edf85d78" # rocm6.4
|
||||
"7e29c325d5bd33ba896ddb106f5d4fc7d715274dca7fe937f724fffa82017838" # rocm6.5
|
||||
"1e9b3dddf0c7fc07131c6f0f5266129e83ce2331f459fa2be8c63f4ae91b0f5b" # rocm7.0
|
||||
)
|
||||
"861cd9f7479eec943933c27cb86920247e5b5dd139bc7c1376c81808abb7d7fe" # rocm6.3
|
||||
"acea7d811a2d3bbe718b6e07fc2a9f739e49eecd60b4b6a36fcb3fe8edf85d78" # rocm6.4
|
||||
"7e29c325d5bd33ba896ddb106f5d4fc7d715274dca7fe937f724fffa82017838" # rocm6.5
|
||||
"1e9b3dddf0c7fc07131c6f0f5266129e83ce2331f459fa2be8c63f4ae91b0f5b" # rocm7.0
|
||||
)
|
||||
set(__AOTRITON_Z "gz")
|
||||
|
||||
# Note it is INSTALL"ED"
|
||||
if(DEFINED ENV{AOTRITON_INSTALLED_PREFIX})
|
||||
install(DIRECTORY
|
||||
$ENV{AOTRITON_INSTALLED_PREFIX}/lib
|
||||
$ENV{AOTRITON_INSTALLED_PREFIX}/include
|
||||
install(DIRECTORY $ENV{AOTRITON_INSTALLED_PREFIX}/lib
|
||||
$ENV{AOTRITON_INSTALLED_PREFIX}/include
|
||||
DESTINATION ${__AOTRITON_INSTALL_DIR})
|
||||
set(__AOTRITON_INSTALL_DIR "$ENV{AOTRITON_INSTALLED_PREFIX}")
|
||||
message(STATUS "Using Preinstalled AOTriton at ${__AOTRITON_INSTALL_DIR}")
|
||||
elseif(DEFINED ENV{AOTRITON_INSTALL_FROM_SOURCE})
|
||||
ExternalProject_Add(aotriton_external
|
||||
ExternalProject_Add(
|
||||
aotriton_external
|
||||
GIT_REPOSITORY https://github.com/ROCm/aotriton.git
|
||||
GIT_TAG ${__AOTRITON_CI_COMMIT}
|
||||
PREFIX ${__AOTRITON_EXTERN_PREFIX}
|
||||
INSTALL_DIR ${__AOTRITON_INSTALL_DIR}
|
||||
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${__AOTRITON_INSTALL_DIR}
|
||||
-DAOTRITON_TARGET_ARCH:STRING=${PYTORCH_ROCM_ARCH}
|
||||
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
|
||||
-DAOTRITON_NO_PYTHON=ON
|
||||
-DAOTRITON_NO_SHARED=OFF
|
||||
-DAOTRITON_TARGET_ARCH:STRING=${PYTORCH_ROCM_ARCH}
|
||||
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
|
||||
-DAOTRITON_NO_PYTHON=ON
|
||||
-DAOTRITON_NO_SHARED=OFF
|
||||
# CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND "" # No build, install command will repeat the build process due to problems in the build system.
|
||||
BUILD_COMMAND "" # No build, install command will repeat the build process
|
||||
# due to problems in the build system.
|
||||
BUILD_BYPRODUCTS "${__AOTRITON_INSTALL_DIR}/lib/libaotriton_v2.so"
|
||||
USES_TERMINAL_DOWNLOAD TRUE
|
||||
USES_TERMINAL_CONFIGURE TRUE
|
||||
USES_TERMINAL_BUILD TRUE
|
||||
USES_TERMINAL_INSTALL TRUE
|
||||
# INSTALL_COMMAND ${MAKE_COMMAND} install
|
||||
)
|
||||
)
|
||||
add_dependencies(__caffe2_aotriton aotriton_external)
|
||||
message(STATUS "Using AOTriton compiled from source directory ${__AOTRITON_EXTERN_PREFIX}")
|
||||
message(
|
||||
STATUS
|
||||
"Using AOTriton compiled from source directory ${__AOTRITON_EXTERN_PREFIX}"
|
||||
)
|
||||
else()
|
||||
set(__AOTRITON_SYSTEM_ROCM "${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}")
|
||||
list(GET __AOTRITON_ROCM_LIST 0 __AOTRITON_ROCM_DEFAULT_STR)
|
||||
# Initialize __AOTRITON_ROCM to lowest version, in case all builds > system's ROCM
|
||||
# Initialize __AOTRITON_ROCM to lowest version, in case all builds >
|
||||
# system's ROCM
|
||||
string(SUBSTRING ${__AOTRITON_ROCM_DEFAULT_STR} 4 -1 __AOTRITON_ROCM)
|
||||
foreach(AOTRITON_ROCM_BUILD_STR IN LISTS __AOTRITON_ROCM_LIST)
|
||||
# len("rocm") == 4
|
||||
string(SUBSTRING ${AOTRITON_ROCM_BUILD_STR} 4 -1 AOTRITON_ROCM_BUILD)
|
||||
# Find the last build that <= system's ROCM
|
||||
# Assume the list is from lower to higher
|
||||
# Find the last build that <= system's ROCM Assume the list is from lower
|
||||
# to higher
|
||||
if(AOTRITON_ROCM_BUILD VERSION_GREATER __AOTRITON_SYSTEM_ROCM)
|
||||
break()
|
||||
endif()
|
||||
set(__AOTRITON_ROCM ${AOTRITON_ROCM_BUILD})
|
||||
endforeach()
|
||||
list(FIND __AOTRITON_ROCM_LIST "rocm${__AOTRITON_ROCM}" __AOTRITON_ROCM_INDEX)
|
||||
list(FIND __AOTRITON_ROCM_LIST "rocm${__AOTRITON_ROCM}"
|
||||
__AOTRITON_ROCM_INDEX)
|
||||
list(GET __AOTRITON_SHA256_LIST ${__AOTRITON_ROCM_INDEX} __AOTRITON_SHA256)
|
||||
list(GET __AOTRITON_MANYLINUX_LIST ${__AOTRITON_ROCM_INDEX} __AOTRITON_MANYLINUX)
|
||||
list(GET __AOTRITON_MANYLINUX_LIST ${__AOTRITON_ROCM_INDEX}
|
||||
__AOTRITON_MANYLINUX)
|
||||
set(__AOTRITON_ARCH ${CMAKE_HOST_SYSTEM_PROCESSOR})
|
||||
string(CONCAT __AOTRITON_FILE "aotriton-"
|
||||
"${__AOTRITON_VER}-${__AOTRITON_MANYLINUX}"
|
||||
"_${__AOTRITON_ARCH}-rocm${__AOTRITON_ROCM}"
|
||||
"-shared.tar.${__AOTRITON_Z}")
|
||||
string(CONCAT __AOTRITON_URL "https://github.com/ROCm/aotriton/releases/download/" # @lint-ignore
|
||||
"${__AOTRITON_VER}/${__AOTRITON_FILE}")
|
||||
ExternalProject_Add(aotriton_external
|
||||
string(
|
||||
CONCAT __AOTRITON_FILE
|
||||
"aotriton-"
|
||||
"${__AOTRITON_VER}-${__AOTRITON_MANYLINUX}"
|
||||
"_${__AOTRITON_ARCH}-rocm${__AOTRITON_ROCM}"
|
||||
"-shared.tar.${__AOTRITON_Z}")
|
||||
string(
|
||||
CONCAT
|
||||
__AOTRITON_URL
|
||||
"https://github.com/ROCm/aotriton/releases/download/" # @lint-ignore
|
||||
"${__AOTRITON_VER}/${__AOTRITON_FILE}")
|
||||
ExternalProject_Add(
|
||||
aotriton_external
|
||||
URL "${__AOTRITON_URL}"
|
||||
URL_HASH SHA256=${__AOTRITON_SHA256}
|
||||
SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/aotriton_tarball
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND ""
|
||||
INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/aotriton_tarball"
|
||||
"${__AOTRITON_INSTALL_DIR}"
|
||||
BUILD_BYPRODUCTS "${__AOTRITON_INSTALL_DIR}/lib/libaotriton_v2.so"
|
||||
)
|
||||
INSTALL_COMMAND
|
||||
${CMAKE_COMMAND} -E copy_directory
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/aotriton_tarball"
|
||||
"${__AOTRITON_INSTALL_DIR}"
|
||||
BUILD_BYPRODUCTS "${__AOTRITON_INSTALL_DIR}/lib/libaotriton_v2.so")
|
||||
add_dependencies(__caffe2_aotriton aotriton_external)
|
||||
message(STATUS "Using AOTriton from pre-compiled binary ${__AOTRITON_URL}.\
|
||||
Set env variables AOTRITON_INSTALL_FROM_SOURCE=1 to build from source.")
|
||||
endif()
|
||||
target_link_libraries(__caffe2_aotriton INTERFACE ${__AOTRITON_INSTALL_DIR}/lib/libaotriton_v2.so)
|
||||
target_include_directories(__caffe2_aotriton INTERFACE ${__AOTRITON_INSTALL_DIR}/include)
|
||||
target_link_libraries(
|
||||
__caffe2_aotriton INTERFACE ${__AOTRITON_INSTALL_DIR}/lib/libaotriton_v2.so)
|
||||
target_include_directories(__caffe2_aotriton
|
||||
INTERFACE ${__AOTRITON_INSTALL_DIR}/include)
|
||||
set(AOTRITON_FOUND TRUE)
|
||||
endif() # __AOTRITON_INCLUDED
|
||||
|
||||
31
cmake/External/nccl.cmake
vendored
31
cmake/External/nccl.cmake
vendored
@ -2,7 +2,8 @@ if(NOT __NCCL_INCLUDED)
|
||||
set(__NCCL_INCLUDED TRUE)
|
||||
|
||||
if(USE_SYSTEM_NCCL)
|
||||
# NCCL_ROOT, NCCL_LIB_DIR, NCCL_INCLUDE_DIR will be accounted in the following line.
|
||||
# NCCL_ROOT, NCCL_LIB_DIR, NCCL_INCLUDE_DIR will be accounted in the
|
||||
# following line.
|
||||
find_package(NCCL REQUIRED)
|
||||
if(NCCL_FOUND)
|
||||
add_library(__caffe2_nccl INTERFACE)
|
||||
@ -30,7 +31,8 @@ if(NOT __NCCL_INCLUDED)
|
||||
|
||||
if("${CMAKE_GENERATOR}" MATCHES "Make")
|
||||
# Recursive make with jobserver for parallelism, and also put a load limit
|
||||
# here to avoid flaky OOM, https://www.gnu.org/software/make/manual/html_node/Parallel.html
|
||||
# here to avoid flaky OOM,
|
||||
# https://www.gnu.org/software/make/manual/html_node/Parallel.html
|
||||
set(MAKE_COMMAND "$(MAKE)" "-l${MAX_JOBS}")
|
||||
else()
|
||||
# Parallel build with CPU load limit to avoid oversubscription
|
||||
@ -38,35 +40,32 @@ if(NOT __NCCL_INCLUDED)
|
||||
endif()
|
||||
|
||||
set(__NCCL_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/nccl")
|
||||
ExternalProject_Add(nccl_external
|
||||
ExternalProject_Add(
|
||||
nccl_external
|
||||
SOURCE_DIR ${PROJECT_SOURCE_DIR}/third_party/nccl
|
||||
BUILD_IN_SOURCE 1
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND
|
||||
${MAKE_COMMAND}
|
||||
"CXX=${CMAKE_CXX_COMPILER}"
|
||||
"CUDA_HOME=${CUDA_TOOLKIT_ROOT_DIR}"
|
||||
"NVCC=${CUDA_NVCC_EXECUTABLE}"
|
||||
"NVCC_GENCODE=${NVCC_GENCODE}"
|
||||
"BUILDDIR=${__NCCL_BUILD_DIR}"
|
||||
"VERBOSE=0"
|
||||
"DEBUG=0"
|
||||
${MAKE_COMMAND} "CXX=${CMAKE_CXX_COMPILER}"
|
||||
"CUDA_HOME=${CUDA_TOOLKIT_ROOT_DIR}" "NVCC=${CUDA_NVCC_EXECUTABLE}"
|
||||
"NVCC_GENCODE=${NVCC_GENCODE}" "BUILDDIR=${__NCCL_BUILD_DIR}"
|
||||
"VERBOSE=0" "DEBUG=0"
|
||||
BUILD_BYPRODUCTS "${__NCCL_BUILD_DIR}/lib/libnccl_static.a"
|
||||
INSTALL_COMMAND ""
|
||||
)
|
||||
INSTALL_COMMAND "")
|
||||
|
||||
set(__NCCL_LIBRARY_DEP nccl_external)
|
||||
set(NCCL_LIBRARIES ${__NCCL_BUILD_DIR}/lib/libnccl_static.a)
|
||||
|
||||
set(NCCL_FOUND TRUE)
|
||||
add_library(__caffe2_nccl INTERFACE)
|
||||
# The following old-style variables are set so that other libs, such as Gloo,
|
||||
# can still use it.
|
||||
# The following old-style variables are set so that other libs, such as
|
||||
# Gloo, can still use it.
|
||||
set(NCCL_INCLUDE_DIRS ${__NCCL_BUILD_DIR}/include)
|
||||
add_dependencies(__caffe2_nccl ${__NCCL_LIBRARY_DEP})
|
||||
target_link_libraries(__caffe2_nccl INTERFACE ${NCCL_LIBRARIES})
|
||||
target_include_directories(__caffe2_nccl INTERFACE ${NCCL_INCLUDE_DIRS})
|
||||
# nccl includes calls to shm_open/shm_close and therefore must depend on librt on Linux
|
||||
# nccl includes calls to shm_open/shm_close and therefore must depend on
|
||||
# librt on Linux
|
||||
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
||||
target_link_libraries(__caffe2_nccl INTERFACE rt)
|
||||
endif()
|
||||
|
||||
113
cmake/External/nnpack.cmake
vendored
113
cmake/External/nnpack.cmake
vendored
@ -7,73 +7,107 @@ if(NOT USE_NNPACK)
|
||||
return()
|
||||
endif()
|
||||
|
||||
##############################################################################
|
||||
# NNPACK is built together with Caffe2
|
||||
# By default, it builds code from third-party/NNPACK submodule.
|
||||
# Define NNPACK_SOURCE_DIR to build with a different version.
|
||||
##############################################################################
|
||||
# ##############################################################################
|
||||
# NNPACK is built together with Caffe2 By default, it builds code from
|
||||
# third-party/NNPACK submodule. Define NNPACK_SOURCE_DIR to build with a
|
||||
# different version.
|
||||
# ##############################################################################
|
||||
|
||||
##############################################################################
|
||||
# ##############################################################################
|
||||
# (1) MSVC - unsupported
|
||||
##############################################################################
|
||||
# ##############################################################################
|
||||
|
||||
if(MSVC)
|
||||
message(WARNING "NNPACK not supported on MSVC yet. Turn this warning off by USE_NNPACK=OFF.")
|
||||
message(
|
||||
WARNING
|
||||
"NNPACK not supported on MSVC yet. Turn this warning off by USE_NNPACK=OFF."
|
||||
)
|
||||
set(USE_NNPACK OFF)
|
||||
return()
|
||||
endif()
|
||||
|
||||
##############################################################################
|
||||
# ##############################################################################
|
||||
# (2) Anything but x86, x86-64, ARM, ARM64 - unsupported
|
||||
##############################################################################
|
||||
# ##############################################################################
|
||||
if(CMAKE_SYSTEM_PROCESSOR)
|
||||
if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(i686|x86_64|armv5te|armv7-a|armv7l|arm64|aarch64)$")
|
||||
message(WARNING "NNPACK is not supported on ${CMAKE_SYSTEM_PROCESSOR} processors. "
|
||||
"The only supported architectures are x86, x86-64, ARM, and ARM64. "
|
||||
"Turn this warning off by USE_NNPACK=OFF.")
|
||||
if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES
|
||||
"^(i686|x86_64|armv5te|armv7-a|armv7l|arm64|aarch64)$")
|
||||
message(
|
||||
WARNING
|
||||
"NNPACK is not supported on ${CMAKE_SYSTEM_PROCESSOR} processors. "
|
||||
"The only supported architectures are x86, x86-64, ARM, and ARM64. "
|
||||
"Turn this warning off by USE_NNPACK=OFF.")
|
||||
set(USE_NNPACK OFF)
|
||||
return()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
##############################################################################
|
||||
# ##############################################################################
|
||||
# (3) Android, iOS, Linux, macOS - supported
|
||||
##############################################################################
|
||||
# ##############################################################################
|
||||
|
||||
if(ANDROID OR IOS OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR ${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
|
||||
if(ANDROID
|
||||
OR IOS
|
||||
OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux"
|
||||
OR ${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
|
||||
message(STATUS "Brace yourself, we are building NNPACK")
|
||||
set(CAFFE2_THIRD_PARTY_ROOT ${PROJECT_SOURCE_DIR}/third_party)
|
||||
|
||||
# Directories for NNPACK dependencies submoduled in Caffe2
|
||||
set(PYTHON_PEACHPY_SOURCE_DIR "${CAFFE2_THIRD_PARTY_ROOT}/python-peachpy" CACHE STRING "PeachPy (Python package) source directory")
|
||||
set(PYTHON_PEACHPY_SOURCE_DIR
|
||||
"${CAFFE2_THIRD_PARTY_ROOT}/python-peachpy"
|
||||
CACHE STRING "PeachPy (Python package) source directory")
|
||||
if(NOT DEFINED CPUINFO_SOURCE_DIR)
|
||||
set(CPUINFO_SOURCE_DIR "${CAFFE2_THIRD_PARTY_ROOT}/cpuinfo" CACHE STRING "cpuinfo source directory")
|
||||
set(CPUINFO_SOURCE_DIR
|
||||
"${CAFFE2_THIRD_PARTY_ROOT}/cpuinfo"
|
||||
CACHE STRING "cpuinfo source directory")
|
||||
endif()
|
||||
set(NNPACK_SOURCE_DIR "${CAFFE2_THIRD_PARTY_ROOT}/NNPACK" CACHE STRING "NNPACK source directory")
|
||||
set(FP16_SOURCE_DIR "${CAFFE2_THIRD_PARTY_ROOT}/FP16" CACHE STRING "FP16 source directory")
|
||||
set(FXDIV_SOURCE_DIR "${CAFFE2_THIRD_PARTY_ROOT}/FXdiv" CACHE STRING "FXdiv source directory")
|
||||
set(PSIMD_SOURCE_DIR "${CAFFE2_THIRD_PARTY_ROOT}/psimd" CACHE STRING "PSimd source directory")
|
||||
set(PTHREADPOOL_SOURCE_DIR "${CAFFE2_THIRD_PARTY_ROOT}/pthreadpool" CACHE STRING "pthreadpool source directory")
|
||||
set(GOOGLETEST_SOURCE_DIR "${CAFFE2_THIRD_PARTY_ROOT}/googletest" CACHE STRING "Google Test source directory")
|
||||
set(NNPACK_SOURCE_DIR
|
||||
"${CAFFE2_THIRD_PARTY_ROOT}/NNPACK"
|
||||
CACHE STRING "NNPACK source directory")
|
||||
set(FP16_SOURCE_DIR
|
||||
"${CAFFE2_THIRD_PARTY_ROOT}/FP16"
|
||||
CACHE STRING "FP16 source directory")
|
||||
set(FXDIV_SOURCE_DIR
|
||||
"${CAFFE2_THIRD_PARTY_ROOT}/FXdiv"
|
||||
CACHE STRING "FXdiv source directory")
|
||||
set(PSIMD_SOURCE_DIR
|
||||
"${CAFFE2_THIRD_PARTY_ROOT}/psimd"
|
||||
CACHE STRING "PSimd source directory")
|
||||
set(PTHREADPOOL_SOURCE_DIR
|
||||
"${CAFFE2_THIRD_PARTY_ROOT}/pthreadpool"
|
||||
CACHE STRING "pthreadpool source directory")
|
||||
set(GOOGLETEST_SOURCE_DIR
|
||||
"${CAFFE2_THIRD_PARTY_ROOT}/googletest"
|
||||
CACHE STRING "Google Test source directory")
|
||||
|
||||
if(NOT TARGET nnpack)
|
||||
set(NNPACK_BUILD_TESTS OFF CACHE BOOL "")
|
||||
set(NNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "")
|
||||
set(NNPACK_LIBRARY_TYPE "static" CACHE STRING "")
|
||||
set(PTHREADPOOL_LIBRARY_TYPE "static" CACHE STRING "")
|
||||
set(CPUINFO_LIBRARY_TYPE "static" CACHE STRING "")
|
||||
set(NNPACK_BUILD_TESTS
|
||||
OFF
|
||||
CACHE BOOL "")
|
||||
set(NNPACK_BUILD_BENCHMARKS
|
||||
OFF
|
||||
CACHE BOOL "")
|
||||
set(NNPACK_LIBRARY_TYPE
|
||||
"static"
|
||||
CACHE STRING "")
|
||||
set(PTHREADPOOL_LIBRARY_TYPE
|
||||
"static"
|
||||
CACHE STRING "")
|
||||
set(CPUINFO_LIBRARY_TYPE
|
||||
"static"
|
||||
CACHE STRING "")
|
||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
|
||||
message(WARNING "Ancient nnpack forces CMake compatibility")
|
||||
set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
|
||||
endif()
|
||||
add_subdirectory(
|
||||
"${NNPACK_SOURCE_DIR}"
|
||||
"${CONFU_DEPENDENCIES_BINARY_DIR}/NNPACK")
|
||||
add_subdirectory("${NNPACK_SOURCE_DIR}"
|
||||
"${CONFU_DEPENDENCIES_BINARY_DIR}/NNPACK")
|
||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
|
||||
unset(CMAKE_POLICY_VERSION_MINIMUM)
|
||||
endif()
|
||||
# We build static versions of nnpack and pthreadpool but link
|
||||
# them into a shared library for Caffe2, so they need PIC.
|
||||
# We build static versions of nnpack and pthreadpool but link them into a
|
||||
# shared library for Caffe2, so they need PIC.
|
||||
set_property(TARGET nnpack PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
set_property(TARGET pthreadpool PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
@ -82,17 +116,16 @@ if(ANDROID OR IOS OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR ${CMAKE_SYSTEM_NAM
|
||||
|
||||
set(NNPACK_FOUND TRUE)
|
||||
if(TARGET nnpack)
|
||||
set(NNPACK_INCLUDE_DIRS
|
||||
$<TARGET_PROPERTY:nnpack,INCLUDE_DIRECTORIES>
|
||||
$<TARGET_PROPERTY:pthreadpool,INCLUDE_DIRECTORIES>)
|
||||
set(NNPACK_INCLUDE_DIRS $<TARGET_PROPERTY:nnpack,INCLUDE_DIRECTORIES>
|
||||
$<TARGET_PROPERTY:pthreadpool,INCLUDE_DIRECTORIES>)
|
||||
set(NNPACK_LIBRARIES $<TARGET_OBJECTS:nnpack> $<TARGET_OBJECTS:cpuinfo>)
|
||||
endif()
|
||||
return()
|
||||
endif()
|
||||
|
||||
##############################################################################
|
||||
# ##############################################################################
|
||||
# (4) Catch-all: not supported.
|
||||
##############################################################################
|
||||
# ##############################################################################
|
||||
|
||||
message(WARNING "Unknown platform - I don't know how to build NNPACK. "
|
||||
"See cmake/External/nnpack.cmake for details.")
|
||||
|
||||
3
cmake/External/rccl.cmake
vendored
3
cmake/External/rccl.cmake
vendored
@ -2,7 +2,8 @@ if(NOT __NCCL_INCLUDED)
|
||||
set(__NCCL_INCLUDED TRUE)
|
||||
|
||||
if(USE_SYSTEM_NCCL)
|
||||
# NCCL_ROOT, NCCL_LIB_DIR, NCCL_INCLUDE_DIR will be accounted in the following line.
|
||||
# NCCL_ROOT, NCCL_LIB_DIR, NCCL_INCLUDE_DIR will be accounted in the
|
||||
# following line.
|
||||
find_package(rccl REQUIRED)
|
||||
if(rccl_FOUND)
|
||||
message(STATUS "RCCL Found!")
|
||||
|
||||
3
cmake/External/ucc.cmake
vendored
3
cmake/External/ucc.cmake
vendored
@ -10,6 +10,7 @@ if(NOT __UCC_INCLUDED)
|
||||
target_include_directories(__caffe2_ucc INTERFACE ${UCC_INCLUDE_DIRS})
|
||||
endif()
|
||||
else()
|
||||
message(FATAL_ERROR "USE_SYSTEM_UCC=OFF is not supported yet when using UCC")
|
||||
message(
|
||||
FATAL_ERROR "USE_SYSTEM_UCC=OFF is not supported yet when using UCC")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -1,10 +1,5 @@
|
||||
set(FlatBuffers_Include ${PROJECT_SOURCE_DIR}/third_party/flatbuffers/include)
|
||||
file(GLOB FlatBuffers_Library_SRCS
|
||||
${FlatBuffers_Include}/flatbuffers/*.h
|
||||
)
|
||||
file(GLOB FlatBuffers_Library_SRCS ${FlatBuffers_Include}/flatbuffers/*.h)
|
||||
add_library(flatbuffers INTERFACE)
|
||||
target_sources(
|
||||
flatbuffers
|
||||
INTERFACE ${FlatBuffers_Library_SRCS}
|
||||
)
|
||||
target_sources(flatbuffers INTERFACE ${FlatBuffers_Library_SRCS})
|
||||
target_include_directories(flatbuffers INTERFACE ${FlatBuffers_Include})
|
||||
|
||||
@ -1,107 +1,135 @@
|
||||
if(NOT APPLE)
|
||||
return()
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(METAL_CFLAGS -Wall -Wextra -fno-fast-math)
|
||||
if(WERROR)
|
||||
string(APPEND METAL_CFLAGS -Werror)
|
||||
string(APPEND METAL_CFLAGS -Werror)
|
||||
endif()
|
||||
|
||||
function(metal_to_air SRC TARGET FLAGS)
|
||||
add_custom_command(COMMAND xcrun metal -c ${SRC} -I ${CMAKE_SOURCE_DIR} -I ${CMAKE_SOURCE_DIR}/aten/src -o ${TARGET} ${FLAGS} ${METAL_CFLAGS}
|
||||
DEPENDS ${SRC}
|
||||
OUTPUT ${TARGET}
|
||||
COMMENT "Compiling ${SRC} to ${TARGET}"
|
||||
VERBATIM)
|
||||
add_custom_command(
|
||||
COMMAND xcrun metal -c ${SRC} -I ${CMAKE_SOURCE_DIR} -I
|
||||
${CMAKE_SOURCE_DIR}/aten/src -o ${TARGET} ${FLAGS} ${METAL_CFLAGS}
|
||||
DEPENDS ${SRC}
|
||||
OUTPUT ${TARGET}
|
||||
COMMENT "Compiling ${SRC} to ${TARGET}"
|
||||
VERBATIM)
|
||||
endfunction()
|
||||
|
||||
function(air_to_metallib TARGET OBJECTS)
|
||||
set(_OBJECTS ${OBJECTS} ${ARGN})
|
||||
add_custom_command(COMMAND xcrun metallib -o ${TARGET} ${_OBJECTS}
|
||||
DEPENDS ${_OBJECTS}
|
||||
OUTPUT ${TARGET}
|
||||
COMMENT "Linking ${TARGET}"
|
||||
VERBATIM)
|
||||
set(_OBJECTS ${OBJECTS} ${ARGN})
|
||||
add_custom_command(
|
||||
COMMAND xcrun metallib -o ${TARGET} ${_OBJECTS}
|
||||
DEPENDS ${_OBJECTS}
|
||||
OUTPUT ${TARGET}
|
||||
COMMENT "Linking ${TARGET}"
|
||||
VERBATIM)
|
||||
endfunction()
|
||||
|
||||
function(metal_to_metallib_h SRC TGT)
|
||||
execute_process(COMMAND ${Python_EXECUTABLE} torch/utils/_cpp_embed_headers.py ${SRC}
|
||||
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE SHADER_CONTENT
|
||||
RESULT_VARIABLE _exitcode)
|
||||
if(NOT _exitcode EQUAL 0)
|
||||
message(FATAL_ERROR "Failed to preprocess Metal shader ${SRC}")
|
||||
return()
|
||||
endif()
|
||||
file(WRITE ${TGT} "#include <ATen/native/mps/OperationUtils.h>\n")
|
||||
file(APPEND ${TGT} "static ::at::native::mps::MetalShaderLibrary lib(R\"SHDR(\n")
|
||||
file(APPEND ${TGT} "${SHADER_CONTENT}")
|
||||
file(APPEND ${TGT} ")SHDR\");\n")
|
||||
execute_process(
|
||||
COMMAND ${Python_EXECUTABLE} torch/utils/_cpp_embed_headers.py ${SRC}
|
||||
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE SHADER_CONTENT
|
||||
RESULT_VARIABLE _exitcode)
|
||||
if(NOT _exitcode EQUAL 0)
|
||||
message(FATAL_ERROR "Failed to preprocess Metal shader ${SRC}")
|
||||
return()
|
||||
endif()
|
||||
file(WRITE ${TGT} "#include <ATen/native/mps/OperationUtils.h>\n")
|
||||
file(APPEND ${TGT}
|
||||
"static ::at::native::mps::MetalShaderLibrary lib(R\"SHDR(\n")
|
||||
file(APPEND ${TGT} "${SHADER_CONTENT}")
|
||||
file(APPEND ${TGT} ")SHDR\");\n")
|
||||
endfunction()
|
||||
|
||||
set(BFLOAT_METAL_CODE "
|
||||
set(BFLOAT_METAL_CODE
|
||||
"
|
||||
kernel void inc(device bfloat* ptr,
|
||||
uint idx [[thread_position_in_grid]]) {
|
||||
ptr[idx] += 1;
|
||||
}
|
||||
")
|
||||
if(NOT CAN_COMPILE_METAL_FOUND)
|
||||
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/bfloat_inc.metal" "${BFLOAT_METAL_CODE}")
|
||||
execute_process(COMMAND xcrun metal -std=metal3.1 bfloat_inc.metal
|
||||
WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
|
||||
OUTPUT_VARIABLE XCRUN_OUTPUT
|
||||
ERROR_VARIABLE XCRUN_OUTPUT
|
||||
RESULT_VARIABLE XCRUN_RC)
|
||||
if(${XCRUN_RC} EQUAL 0)
|
||||
message(STATUS "Machine can compile metal shaders")
|
||||
set(CAN_COMPILE_METAL YES CACHE BOOL "Host can compile metal shaders")
|
||||
else()
|
||||
message(WARNING "Machine can not compile metal shaders, fails with ${XCRUN_OUTPUT}")
|
||||
set(CAN_COMPILE_METAL NO CACHE BOOL "Host can compile metal shaders")
|
||||
endif()
|
||||
set(CAN_COMPILE_METAL_FOUND YES CACHE INTERNAL "Run check for shader compiler")
|
||||
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/bfloat_inc.metal"
|
||||
"${BFLOAT_METAL_CODE}")
|
||||
execute_process(
|
||||
COMMAND xcrun metal -std=metal3.1 bfloat_inc.metal
|
||||
WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
|
||||
OUTPUT_VARIABLE XCRUN_OUTPUT
|
||||
ERROR_VARIABLE XCRUN_OUTPUT
|
||||
RESULT_VARIABLE XCRUN_RC)
|
||||
if(${XCRUN_RC} EQUAL 0)
|
||||
message(STATUS "Machine can compile metal shaders")
|
||||
set(CAN_COMPILE_METAL
|
||||
YES
|
||||
CACHE BOOL "Host can compile metal shaders")
|
||||
else()
|
||||
message(
|
||||
WARNING
|
||||
"Machine can not compile metal shaders, fails with ${XCRUN_OUTPUT}")
|
||||
set(CAN_COMPILE_METAL
|
||||
NO
|
||||
CACHE BOOL "Host can compile metal shaders")
|
||||
endif()
|
||||
set(CAN_COMPILE_METAL_FOUND
|
||||
YES
|
||||
CACHE INTERNAL "Run check for shader compiler")
|
||||
endif()
|
||||
|
||||
if(NOT USE_PYTORCH_METAL)
|
||||
return()
|
||||
return()
|
||||
endif()
|
||||
|
||||
if(IOS OR INTERN_BUILD_MOBILE)
|
||||
return()
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(OSX_PLATFORM "MacOSX.platform")
|
||||
exec_program(/usr/bin/xcode-select ARGS -print-path OUTPUT_VARIABLE CMAKE_XCODE_DEVELOPER_DIR)
|
||||
set(XCODE_POST_43_ROOT "${CMAKE_XCODE_DEVELOPER_DIR}/Platforms/${OSX_PLATFORM}/Developer")
|
||||
exec_program(
|
||||
/usr/bin/xcode-select ARGS
|
||||
-print-path
|
||||
OUTPUT_VARIABLE CMAKE_XCODE_DEVELOPER_DIR)
|
||||
set(XCODE_POST_43_ROOT
|
||||
"${CMAKE_XCODE_DEVELOPER_DIR}/Platforms/${OSX_PLATFORM}/Developer")
|
||||
set(XCODE_PRE_43_ROOT "/Developer/Platforms/${OSX_PLATFORM}/Developer")
|
||||
if(NOT DEFINED CMAKE_OSX_DEVELOPER_ROOT)
|
||||
if(EXISTS ${XCODE_POST_43_ROOT})
|
||||
set(CMAKE_OSX_DEVELOPER_ROOT ${XCODE_POST_43_ROOT})
|
||||
elseif(EXISTS ${XCODE_PRE_43_ROOT})
|
||||
set(CMAKE_OSX_DEVELOPER_ROOT ${XCODE_PRE_43_ROOT})
|
||||
elseif(EXISTS ${CMAKE_XCODE_DEVELOPER_DIR} AND ${CMAKE_XCODE_DEVELOPER_DIR} STREQUAL "/Library/Developer/CommandLineTools")
|
||||
set(CMAKE_OSX_DEVELOPER_ROOT ${CMAKE_XCODE_DEVELOPER_DIR})
|
||||
endif()
|
||||
if(EXISTS ${XCODE_POST_43_ROOT})
|
||||
set(CMAKE_OSX_DEVELOPER_ROOT ${XCODE_POST_43_ROOT})
|
||||
elseif(EXISTS ${XCODE_PRE_43_ROOT})
|
||||
set(CMAKE_OSX_DEVELOPER_ROOT ${XCODE_PRE_43_ROOT})
|
||||
elseif(EXISTS ${CMAKE_XCODE_DEVELOPER_DIR}
|
||||
AND ${CMAKE_XCODE_DEVELOPER_DIR} STREQUAL
|
||||
"/Library/Developer/CommandLineTools")
|
||||
set(CMAKE_OSX_DEVELOPER_ROOT ${CMAKE_XCODE_DEVELOPER_DIR})
|
||||
endif()
|
||||
endif(NOT DEFINED CMAKE_OSX_DEVELOPER_ROOT)
|
||||
set(CMAKE_OSX_DEVELOPER_ROOT ${CMAKE_OSX_DEVELOPER_ROOT} CACHE PATH "Location of OSX SDKs root directory")
|
||||
set(CMAKE_OSX_DEVELOPER_ROOT
|
||||
${CMAKE_OSX_DEVELOPER_ROOT}
|
||||
CACHE PATH "Location of OSX SDKs root directory")
|
||||
|
||||
if(NOT DEFINED CMAKE_OSX_SDK_ROOT)
|
||||
file(GLOB _CMAKE_OSX_SDKS "${CMAKE_OSX_DEVELOPER_ROOT}/SDKs/*")
|
||||
if(_CMAKE_OSX_SDKS)
|
||||
list(SORT _CMAKE_OSX_SDKS)
|
||||
list(REVERSE _CMAKE_OSX_SDKS)
|
||||
list(GET _CMAKE_OSX_SDKS 0 CMAKE_OSX_SDK_ROOT)
|
||||
message(STATUS "_CMAKE_OSX_SDKS: ${_CMAKE_OSX_SDKS}")
|
||||
else(_CMAKE_OSX_SDKS)
|
||||
message(FATAL_ERROR "No OSX SDK's found in default search path ${CMAKE_OSX_DEVELOPER_ROOT}.")
|
||||
endif(_CMAKE_OSX_SDKS)
|
||||
message(STATUS "Toolchain using default OSX SDK: ${CMAKE_OSX_SDK_ROOT}")
|
||||
file(GLOB _CMAKE_OSX_SDKS "${CMAKE_OSX_DEVELOPER_ROOT}/SDKs/*")
|
||||
if(_CMAKE_OSX_SDKS)
|
||||
list(SORT _CMAKE_OSX_SDKS)
|
||||
list(REVERSE _CMAKE_OSX_SDKS)
|
||||
list(GET _CMAKE_OSX_SDKS 0 CMAKE_OSX_SDK_ROOT)
|
||||
message(STATUS "_CMAKE_OSX_SDKS: ${_CMAKE_OSX_SDKS}")
|
||||
else(_CMAKE_OSX_SDKS)
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"No OSX SDK's found in default search path ${CMAKE_OSX_DEVELOPER_ROOT}."
|
||||
)
|
||||
endif(_CMAKE_OSX_SDKS)
|
||||
message(STATUS "Toolchain using default OSX SDK: ${CMAKE_OSX_SDK_ROOT}")
|
||||
endif(NOT DEFINED CMAKE_OSX_SDK_ROOT)
|
||||
set(CMAKE_OSX_SDK_ROOT ${CMAKE_OSX_SDK_ROOT} CACHE PATH "Location of the selected OSX SDK")
|
||||
set(CMAKE_OSX_SDK_ROOT
|
||||
${CMAKE_OSX_SDK_ROOT}
|
||||
CACHE PATH "Location of the selected OSX SDK")
|
||||
set(CMAKE_FRAMEWORK_PATH
|
||||
${CMAKE_OSX_SDK_ROOT}/System/Library/Frameworks
|
||||
${CMAKE_OSX_SDK_ROOT}/System/Library/PrivateFrameworks
|
||||
${CMAKE_OSX_SDK_ROOT}/Developer/Library/Frameworks
|
||||
)
|
||||
${CMAKE_OSX_SDK_ROOT}/Developer/Library/Frameworks)
|
||||
message(STATUS "CMAKE_FRAMEWORK_PATH: ${CMAKE_FRAMEWORK_PATH}")
|
||||
set(CMAKE_FIND_FRAMEWORK FIRST)
|
||||
|
||||
@ -6,12 +6,14 @@ include(CMakePushCheckState)
|
||||
if(USE_GLOG)
|
||||
cmake_push_check_state(RESET)
|
||||
set(CMAKE_REQUIRED_FLAGS "-std=c++17")
|
||||
CHECK_CXX_SOURCE_COMPILES(
|
||||
"#include <glog/stl_logging.h>
|
||||
check_cxx_source_compiles(
|
||||
"#include <glog/stl_logging.h>
|
||||
int main(int argc, char** argv) {
|
||||
return 0;
|
||||
}" CAFFE2_NEED_TO_TURN_OFF_DEPRECATION_WARNING
|
||||
FAIL_REGEX ".*-Wno-deprecated.*")
|
||||
}"
|
||||
CAFFE2_NEED_TO_TURN_OFF_DEPRECATION_WARNING
|
||||
FAIL_REGEX
|
||||
".*-Wno-deprecated.*")
|
||||
|
||||
if(NOT CAFFE2_NEED_TO_TURN_OFF_DEPRECATION_WARNING AND NOT MSVC)
|
||||
message(STATUS "Turning off deprecation warning due to glog.")
|
||||
@ -24,7 +26,9 @@ endif()
|
||||
if(NOT INTERN_BUILD_MOBILE)
|
||||
find_package(AVX) # checks AVX and AVX2
|
||||
if(CXX_AVX2_FOUND)
|
||||
message(STATUS "Current compiler supports avx2 extension. Will build perfkernels.")
|
||||
message(
|
||||
STATUS "Current compiler supports avx2 extension. Will build perfkernels."
|
||||
)
|
||||
# Also see CMakeLists.txt under caffe2/perfkernels.
|
||||
set(CAFFE2_PERF_WITH_AVX 1)
|
||||
set(CAFFE2_PERF_WITH_AVX2 1)
|
||||
@ -39,14 +43,13 @@ if(MSVC AND NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
||||
set(CMAKE_REQUIRED_FLAGS "/D__AVX512F__ /D__AVX512DQ__ /D__AVX512VL__")
|
||||
else()
|
||||
# We only consider the case where all of avx512f, avx512dq, and avx512vl are
|
||||
# supported.
|
||||
# Platforms where avx512f is supported by not avx512dq and avx512vl as of
|
||||
# Jan 15 2019 : linux_manywheel_2.7mu_cpu_build and
|
||||
# supported. Platforms where avx512f is supported by not avx512dq and avx512vl
|
||||
# as of Jan 15 2019 : linux_manywheel_2.7mu_cpu_build and
|
||||
# linux_conda_3.7_cu100_build
|
||||
set(CMAKE_REQUIRED_FLAGS "-mavx512f -mavx512dq -mavx512vl")
|
||||
endif()
|
||||
CHECK_CXX_SOURCE_COMPILES(
|
||||
"#if defined(_MSC_VER)
|
||||
check_cxx_source_compiles(
|
||||
"#if defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <immintrin.h>
|
||||
@ -65,24 +68,27 @@ CHECK_CXX_SOURCE_COMPILES(
|
||||
ymm = _mm256_abs_epi64(ymm); // check avx512vl
|
||||
__mmask16 m = _mm512_cmp_epi32_mask(a, a, _MM_CMPINT_EQ);
|
||||
__m512i r = _mm512_andnot_si512(a, a);
|
||||
}" CAFFE2_COMPILER_SUPPORTS_AVX512_EXTENSIONS)
|
||||
}"
|
||||
CAFFE2_COMPILER_SUPPORTS_AVX512_EXTENSIONS)
|
||||
if(CAFFE2_COMPILER_SUPPORTS_AVX512_EXTENSIONS)
|
||||
message(STATUS "Current compiler supports avx512f extension. Will build fbgemm.")
|
||||
message(
|
||||
STATUS "Current compiler supports avx512f extension. Will build fbgemm.")
|
||||
endif()
|
||||
cmake_pop_check_state()
|
||||
|
||||
# ---[ Checks if compiler supports -fvisibility=hidden
|
||||
check_cxx_compiler_flag("-fvisibility=hidden" COMPILER_SUPPORTS_HIDDEN_VISIBILITY)
|
||||
check_cxx_compiler_flag("-fvisibility-inlines-hidden" COMPILER_SUPPORTS_HIDDEN_INLINE_VISIBILITY)
|
||||
check_cxx_compiler_flag("-fvisibility=hidden"
|
||||
COMPILER_SUPPORTS_HIDDEN_VISIBILITY)
|
||||
check_cxx_compiler_flag("-fvisibility-inlines-hidden"
|
||||
COMPILER_SUPPORTS_HIDDEN_INLINE_VISIBILITY)
|
||||
if(${COMPILER_SUPPORTS_HIDDEN_INLINE_VISIBILITY})
|
||||
set(CAFFE2_VISIBILITY_FLAG "-fvisibility-inlines-hidden")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CAFFE2_VISIBILITY_FLAG}")
|
||||
endif()
|
||||
|
||||
# ---[ Checks if linker supports -rdynamic. `-rdynamic` tells linker
|
||||
# -to add all (including unused) symbols into the dynamic symbol
|
||||
# -table. We need this to get symbols when generating backtrace at
|
||||
# -runtime.
|
||||
# ---[ Checks if linker supports -rdynamic. `-rdynamic` tells linker -to add all
|
||||
# (including unused) symbols into the dynamic symbol -table. We need this to get
|
||||
# symbols when generating backtrace at -runtime.
|
||||
if(NOT MSVC)
|
||||
check_cxx_compiler_flag("-rdynamic" COMPILER_SUPPORTS_RDYNAMIC)
|
||||
if(${COMPILER_SUPPORTS_RDYNAMIC})
|
||||
@ -92,12 +98,10 @@ if(NOT MSVC)
|
||||
endif()
|
||||
|
||||
# ---[ If we are building on ios, or building with opengl support, we will
|
||||
# enable -mfpu=neon-fp16 for iOS Metal build. For Android, this fpu setting
|
||||
# is going to be done with android-cmake by setting
|
||||
# -DANDROID_ABI="armeabi-v7a with NEON FP16"
|
||||
# in the build command.
|
||||
# Also, we will turn off deprecated-declarations
|
||||
# due to protobuf.
|
||||
# enable -mfpu=neon-fp16 for iOS Metal build. For Android, this fpu setting is
|
||||
# going to be done with android-cmake by setting -DANDROID_ABI="armeabi-v7a with
|
||||
# NEON FP16" in the build command. Also, we will turn off
|
||||
# deprecated-declarations due to protobuf.
|
||||
|
||||
# ---[ Check if the compiler has SVE support.
|
||||
find_package(ARM) # checks SVE
|
||||
@ -106,7 +110,9 @@ if(CXX_SVE_FOUND)
|
||||
# Also see CMakeLists.txt under caffe2/perfkernels.
|
||||
add_compile_definitions(CAFFE2_PERF_WITH_SVE=1)
|
||||
else()
|
||||
message(STATUS "Compiler does not support SVE extension. Will not build perfkernels.")
|
||||
message(
|
||||
STATUS
|
||||
"Compiler does not support SVE extension. Will not build perfkernels.")
|
||||
endif()
|
||||
|
||||
if(IOS AND (${IOS_ARCH} MATCHES "armv7*"))
|
||||
@ -124,7 +130,7 @@ if(USE_NATIVE_ARCH AND NOT MSVC)
|
||||
add_definitions("-march=native")
|
||||
else()
|
||||
message(
|
||||
WARNING
|
||||
WARNING
|
||||
"Your compiler does not support -march=native. Turn off this warning "
|
||||
"by setting -DUSE_NATIVE_ARCH=OFF.")
|
||||
endif()
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
# Finds Google Protocol Buffers library and compilers and extends
|
||||
# the standard cmake script with version and python generation support
|
||||
# Finds Google Protocol Buffers library and compilers and extends the standard
|
||||
# cmake script with version and python generation support
|
||||
macro(custom_protobuf_find)
|
||||
message(STATUS "Use custom protobuf build.")
|
||||
option(protobuf_BUILD_TESTS "" OFF)
|
||||
option(protobuf_BUILD_EXAMPLES "" OFF)
|
||||
option(protobuf_WITH_ZLIB "" OFF)
|
||||
if(${CAFFE2_LINK_LOCAL_PROTOBUF})
|
||||
# If we are going to link protobuf locally, we will need to turn off
|
||||
# shared libs build for protobuf.
|
||||
# If we are going to link protobuf locally, we will need to turn off shared
|
||||
# libs build for protobuf.
|
||||
option(protobuf_BUILD_SHARED_LIBS "" OFF)
|
||||
else()
|
||||
# If we are building Caffe2 as shared libs, we will also build protobuf as
|
||||
@ -18,7 +18,8 @@ macro(custom_protobuf_find)
|
||||
option(protobuf_MSVC_STATIC_RUNTIME "" ${CAFFE2_USE_MSVC_STATIC_RUNTIME})
|
||||
|
||||
if(${CAFFE2_LINK_LOCAL_PROTOBUF})
|
||||
set(__caffe2_CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ${CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS})
|
||||
set(__caffe2_CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS
|
||||
${CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS})
|
||||
set(__caffe2_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS OFF)
|
||||
set(BUILD_SHARED_LIBS OFF)
|
||||
@ -30,7 +31,8 @@ macro(custom_protobuf_find)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(__caffe2_CMAKE_POSITION_INDEPENDENT_CODE ${CMAKE_POSITION_INDEPENDENT_CODE})
|
||||
set(__caffe2_CMAKE_POSITION_INDEPENDENT_CODE
|
||||
${CMAKE_POSITION_INDEPENDENT_CODE})
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
|
||||
@ -42,22 +44,24 @@ macro(custom_protobuf_find)
|
||||
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/protobuf/cmake)
|
||||
endif()
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ${__caffe2_CMAKE_POSITION_INDEPENDENT_CODE})
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE
|
||||
${__caffe2_CMAKE_POSITION_INDEPENDENT_CODE})
|
||||
|
||||
if(${CAFFE2_LINK_LOCAL_PROTOBUF})
|
||||
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ${__caffe2_CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS})
|
||||
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS
|
||||
${__caffe2_CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS})
|
||||
set(BUILD_SHARED_LIBS ON)
|
||||
set(CMAKE_CXX_FLAGS ${__caffe2_CMAKE_CXX_FLAGS})
|
||||
endif()
|
||||
|
||||
# Protobuf "namespaced" target is only added post protobuf 3.5.1. As a
|
||||
# result, for older versions, we will manually add alias.
|
||||
# Protobuf "namespaced" target is only added post protobuf 3.5.1. As a result,
|
||||
# for older versions, we will manually add alias.
|
||||
if(NOT TARGET protobuf::libprotobuf)
|
||||
add_library(protobuf::libprotobuf ALIAS libprotobuf)
|
||||
add_library(protobuf::libprotobuf-lite ALIAS libprotobuf-lite)
|
||||
# There is link error when cross compiling protoc on mobile:
|
||||
# https://github.com/protocolbuffers/protobuf/issues/2719
|
||||
# And protoc is very unlikely needed for mobile builds.
|
||||
# https://github.com/protocolbuffers/protobuf/issues/2719 And protoc is very
|
||||
# unlikely needed for mobile builds.
|
||||
if(NOT (ANDROID OR IOS))
|
||||
add_executable(protobuf::protoc ALIAS protoc)
|
||||
endif()
|
||||
@ -65,23 +69,27 @@ macro(custom_protobuf_find)
|
||||
endmacro()
|
||||
|
||||
# Main entry for protobuf. If we are building on Android, iOS or we have hard
|
||||
# coded BUILD_CUSTOM_PROTOBUF, we will hard code the use of custom protobuf
|
||||
# in the submodule.
|
||||
# coded BUILD_CUSTOM_PROTOBUF, we will hard code the use of custom protobuf in
|
||||
# the submodule.
|
||||
if(ANDROID OR IOS)
|
||||
if(NOT BUILD_CUSTOM_PROTOBUF)
|
||||
message(WARNING
|
||||
"For Android and iOS cross compilation, I am automatically using "
|
||||
"custom protobuf under third party. Note that this behavior may "
|
||||
"change in the future, and you will need to specify "
|
||||
"-DBUILD_CUSTOM_PROTOBUF=ON explicitly.")
|
||||
message(
|
||||
WARNING "For Android and iOS cross compilation, I am automatically using "
|
||||
"custom protobuf under third party. Note that this behavior may "
|
||||
"change in the future, and you will need to specify "
|
||||
"-DBUILD_CUSTOM_PROTOBUF=ON explicitly.")
|
||||
endif()
|
||||
# There is link error when cross compiling protoc on mobile:
|
||||
# https://github.com/protocolbuffers/protobuf/issues/2719
|
||||
# And protoc is very unlikely needed for mobile builds.
|
||||
# https://github.com/protocolbuffers/protobuf/issues/2719 And protoc is very
|
||||
# unlikely needed for mobile builds.
|
||||
set(__caffe2_protobuf_BUILD_PROTOC_BINARIES ${protobuf_BUILD_PROTOC_BINARIES})
|
||||
set(protobuf_BUILD_PROTOC_BINARIES OFF CACHE BOOL "" FORCE)
|
||||
set(protobuf_BUILD_PROTOC_BINARIES
|
||||
OFF
|
||||
CACHE BOOL "" FORCE)
|
||||
custom_protobuf_find()
|
||||
set(protobuf_BUILD_PROTOC_BINARIES ${__caffe2_protobuf_BUILD_PROTOC_BINARIES} CACHE BOOL "" FORCE)
|
||||
set(protobuf_BUILD_PROTOC_BINARIES
|
||||
${__caffe2_protobuf_BUILD_PROTOC_BINARIES}
|
||||
CACHE BOOL "" FORCE)
|
||||
elseif(BUILD_CUSTOM_PROTOBUF)
|
||||
message(STATUS "Building using own protobuf under third_party per request.")
|
||||
custom_protobuf_find()
|
||||
@ -89,20 +97,22 @@ else()
|
||||
include(cmake/public/protobuf.cmake)
|
||||
endif()
|
||||
|
||||
if((NOT TARGET protobuf::libprotobuf) AND (NOT TARGET protobuf::libprotobuf-lite))
|
||||
message(WARNING
|
||||
if((NOT TARGET protobuf::libprotobuf) AND (NOT TARGET protobuf::libprotobuf-lite
|
||||
))
|
||||
message(
|
||||
WARNING
|
||||
"Protobuf cannot be found. Caffe2 will automatically switch to use "
|
||||
"own protobuf under third_party. Note that this behavior may change in "
|
||||
"the future, and you will need to specify -DBUILD_CUSTOM_PROTOBUF=ON "
|
||||
"explicitly.")
|
||||
custom_protobuf_find()
|
||||
|
||||
# TODO(jiayq): enable this in the future, when Jenkins Mac support is
|
||||
# properly set up with protobuf installs.
|
||||
# TODO(jiayq): enable this in the future, when Jenkins Mac support is properly
|
||||
# set up with protobuf installs.
|
||||
|
||||
# message(FATAL_ERROR
|
||||
# "Protobuf cannot be found. Caffe2 will have to build with libprotobuf. "
|
||||
# "Please set the proper paths so that I can find protobuf correctly.")
|
||||
# message(FATAL_ERROR "Protobuf cannot be found. Caffe2 will have to build
|
||||
# with libprotobuf. " "Please set the proper paths so that I can find protobuf
|
||||
# correctly.")
|
||||
endif()
|
||||
|
||||
get_target_property(__tmp protobuf::libprotobuf INTERFACE_INCLUDE_DIRECTORIES)
|
||||
@ -119,28 +129,28 @@ else()
|
||||
set(Protobuf_VERSION "Protobuf_VERSION_NOTFOUND")
|
||||
endif()
|
||||
|
||||
|
||||
# Figure out which protoc to use.
|
||||
# If CAFFE2_CUSTOM_PROTOC_EXECUTABLE is set, we assume the user knows
|
||||
# what they're doing and we blindly use the specified protoc. This
|
||||
# is typically the case when cross-compiling where protoc must be
|
||||
# compiled for the host architecture and libprotobuf must be
|
||||
# compiled for the target architecture.
|
||||
# If CAFFE2_CUSTOM_PROTOC_EXECUTABLE is NOT set, we use the protoc
|
||||
# target that is built as part of including the protobuf project.
|
||||
# Figure out which protoc to use. If CAFFE2_CUSTOM_PROTOC_EXECUTABLE is set, we
|
||||
# assume the user knows what they're doing and we blindly use the specified
|
||||
# protoc. This is typically the case when cross-compiling where protoc must be
|
||||
# compiled for the host architecture and libprotobuf must be compiled for the
|
||||
# target architecture. If CAFFE2_CUSTOM_PROTOC_EXECUTABLE is NOT set, we use the
|
||||
# protoc target that is built as part of including the protobuf project.
|
||||
if(EXISTS "${CAFFE2_CUSTOM_PROTOC_EXECUTABLE}")
|
||||
set(CAFFE2_PROTOC_EXECUTABLE ${CAFFE2_CUSTOM_PROTOC_EXECUTABLE})
|
||||
else()
|
||||
set(CAFFE2_PROTOC_EXECUTABLE protobuf::protoc)
|
||||
endif()
|
||||
|
||||
################################################################################################
|
||||
# Modification of standard 'protobuf_generate_cpp()' with output dir parameter and python support
|
||||
# Usage:
|
||||
# caffe2_protobuf_generate_cpp_py(<srcs_var> <hdrs_var> <python_var> <proto_files>)
|
||||
# ##############################################################################
|
||||
# Modification of standard 'protobuf_generate_cpp()' with output dir parameter
|
||||
# and python support Usage: caffe2_protobuf_generate_cpp_py(<srcs_var>
|
||||
# <hdrs_var> <python_var> <proto_files>)
|
||||
function(caffe2_protobuf_generate_cpp_py srcs_var hdrs_var python_var)
|
||||
if(NOT ARGN)
|
||||
message(SEND_ERROR "Error: caffe_protobuf_generate_cpp_py() called without any proto files")
|
||||
message(
|
||||
SEND_ERROR
|
||||
"Error: caffe_protobuf_generate_cpp_py() called without any proto files"
|
||||
)
|
||||
return()
|
||||
endif()
|
||||
|
||||
@ -158,31 +168,34 @@ function(caffe2_protobuf_generate_cpp_py srcs_var hdrs_var python_var)
|
||||
# Add TORCH_API prefix to protobuf classes and methods in all cases
|
||||
set(DLLEXPORT_STR "dllexport_decl=TORCH_API:")
|
||||
|
||||
# Note: the following depends on PROTOBUF_PROTOC_EXECUTABLE. This
|
||||
# is done to make sure protoc is built before attempting to
|
||||
# generate sources if we're using protoc from the third_party
|
||||
# directory and are building it as part of the Caffe2 build. If
|
||||
# points to an existing path, it is a no-op.
|
||||
# Note: the following depends on PROTOBUF_PROTOC_EXECUTABLE. This is done to
|
||||
# make sure protoc is built before attempting to generate sources if we're
|
||||
# using protoc from the third_party directory and are building it as part of
|
||||
# the Caffe2 build. If points to an existing path, it is a no-op.
|
||||
|
||||
if(${CAFFE2_LINK_LOCAL_PROTOBUF})
|
||||
# We need to rewrite the pb.h files to route GetEmptyStringAlreadyInited
|
||||
# through our wrapper in proto_utils so the memory location test
|
||||
# is correct.
|
||||
# through our wrapper in proto_utils so the memory location test is
|
||||
# correct.
|
||||
add_custom_command(
|
||||
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${fil_we}.pb.cc"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${fil_we}.pb.h"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${fil_we}_pb2.py"
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_CURRENT_BINARY_DIR}"
|
||||
COMMAND ${CAFFE2_PROTOC_EXECUTABLE} -I${PROJECT_SOURCE_DIR} --cpp_out=${DLLEXPORT_STR}${PROJECT_BINARY_DIR} ${abs_fil}
|
||||
COMMAND ${CAFFE2_PROTOC_EXECUTABLE} -I${PROJECT_SOURCE_DIR} --python_out "${PROJECT_BINARY_DIR}" ${abs_fil}
|
||||
|
||||
# If we remove all reference to these pb.h files from external
|
||||
# libraries and binaries this rewrite can be removed.
|
||||
COMMAND ${CMAKE_COMMAND} -DFILENAME=${CMAKE_CURRENT_BINARY_DIR}/${fil_we}.pb.h -DNAMESPACES=caffe\;caffe2\;onnx\;torch -P ${PROJECT_SOURCE_DIR}/cmake/ProtoBufPatch.cmake
|
||||
|
||||
COMMAND ${CAFFE2_PROTOC_EXECUTABLE} -I${PROJECT_SOURCE_DIR}
|
||||
--cpp_out=${DLLEXPORT_STR}${PROJECT_BINARY_DIR} ${abs_fil}
|
||||
COMMAND ${CAFFE2_PROTOC_EXECUTABLE} -I${PROJECT_SOURCE_DIR} --python_out
|
||||
"${PROJECT_BINARY_DIR}" ${abs_fil}
|
||||
# If we remove all reference to these pb.h files from external libraries
|
||||
# and binaries this rewrite can be removed.
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -DFILENAME=${CMAKE_CURRENT_BINARY_DIR}/${fil_we}.pb.h
|
||||
-DNAMESPACES=caffe\;caffe2\;onnx\;torch -P
|
||||
${PROJECT_SOURCE_DIR}/cmake/ProtoBufPatch.cmake
|
||||
DEPENDS ${CAFFE2_PROTOC_EXECUTABLE} ${abs_fil}
|
||||
COMMENT "Running C++/Python protocol buffer compiler on ${fil}" VERBATIM )
|
||||
COMMENT "Running C++/Python protocol buffer compiler on ${fil}"
|
||||
VERBATIM)
|
||||
else()
|
||||
add_custom_command(
|
||||
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${fil_we}.pb.cc"
|
||||
@ -190,16 +203,29 @@ function(caffe2_protobuf_generate_cpp_py srcs_var hdrs_var python_var)
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${fil_we}_pb2.py"
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_CURRENT_BINARY_DIR}"
|
||||
COMMAND ${CAFFE2_PROTOC_EXECUTABLE} -I${PROJECT_SOURCE_DIR} --cpp_out=${DLLEXPORT_STR}${PROJECT_BINARY_DIR} ${abs_fil}
|
||||
COMMAND ${CAFFE2_PROTOC_EXECUTABLE} -I${PROJECT_SOURCE_DIR} --python_out "${PROJECT_BINARY_DIR}" ${abs_fil}
|
||||
COMMAND ${CMAKE_COMMAND} -DFILENAME=${CMAKE_CURRENT_BINARY_DIR}/${fil_we}.pb.h -DNAMESPACES=caffe\;caffe2\;onnx\;torch -DSYSTEM_PROTOBUF=YES -P ${PROJECT_SOURCE_DIR}/cmake/ProtoBufPatch.cmake
|
||||
COMMAND ${CAFFE2_PROTOC_EXECUTABLE} -I${PROJECT_SOURCE_DIR}
|
||||
--cpp_out=${DLLEXPORT_STR}${PROJECT_BINARY_DIR} ${abs_fil}
|
||||
COMMAND ${CAFFE2_PROTOC_EXECUTABLE} -I${PROJECT_SOURCE_DIR} --python_out
|
||||
"${PROJECT_BINARY_DIR}" ${abs_fil}
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -DFILENAME=${CMAKE_CURRENT_BINARY_DIR}/${fil_we}.pb.h
|
||||
-DNAMESPACES=caffe\;caffe2\;onnx\;torch -DSYSTEM_PROTOBUF=YES -P
|
||||
${PROJECT_SOURCE_DIR}/cmake/ProtoBufPatch.cmake
|
||||
DEPENDS ${CAFFE2_PROTOC_EXECUTABLE} ${abs_fil}
|
||||
COMMENT "Running C++/Python protocol buffer compiler on ${fil}" VERBATIM )
|
||||
COMMENT "Running C++/Python protocol buffer compiler on ${fil}"
|
||||
VERBATIM)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
set_source_files_properties(${${srcs_var}} ${${hdrs_var}} ${${python_var}} PROPERTIES GENERATED TRUE)
|
||||
set(${srcs_var} ${${srcs_var}} PARENT_SCOPE)
|
||||
set(${hdrs_var} ${${hdrs_var}} PARENT_SCOPE)
|
||||
set(${python_var} ${${python_var}} PARENT_SCOPE)
|
||||
set_source_files_properties(${${srcs_var}} ${${hdrs_var}} ${${python_var}}
|
||||
PROPERTIES GENERATED TRUE)
|
||||
set(${srcs_var}
|
||||
${${srcs_var}}
|
||||
PARENT_SCOPE)
|
||||
set(${hdrs_var}
|
||||
${${hdrs_var}}
|
||||
PARENT_SCOPE)
|
||||
set(${python_var}
|
||||
${${python_var}}
|
||||
PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
@ -1,48 +1,31 @@
|
||||
# CMake file to replace the string contents in ONNX, Caffe, and Caffe2 proto.
|
||||
# Usage example:
|
||||
# cmake -DFILENAME=caffe2.pb.h -DLOCAL_PROTOBUF=ON -P ProtoBufPatch.cmake
|
||||
# Usage example: cmake -DFILENAME=caffe2.pb.h -DLOCAL_PROTOBUF=ON -P
|
||||
# ProtoBufPatch.cmake
|
||||
|
||||
file(READ ${FILENAME} content)
|
||||
|
||||
if(NOT SYSTEM_PROTOBUF)
|
||||
# protobuf-3.6.0 pattern
|
||||
string(
|
||||
REPLACE
|
||||
"::google::protobuf::internal::GetEmptyStringAlreadyInited"
|
||||
"GetEmptyStringAlreadyInited"
|
||||
content
|
||||
"${content}")
|
||||
string(REPLACE "::google::protobuf::internal::GetEmptyStringAlreadyInited"
|
||||
"GetEmptyStringAlreadyInited" content "${content}")
|
||||
|
||||
# protobuf-3.8.0+ pattern
|
||||
string(
|
||||
REPLACE
|
||||
"::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited"
|
||||
"GetEmptyStringAlreadyInited"
|
||||
content
|
||||
"${content}")
|
||||
REPLACE "::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited"
|
||||
"GetEmptyStringAlreadyInited" content "${content}")
|
||||
|
||||
string(
|
||||
REPLACE
|
||||
"PROTOBUF_CONSTEXPR"
|
||||
""
|
||||
content
|
||||
"${content}")
|
||||
string(REPLACE "PROTOBUF_CONSTEXPR" "" content "${content}")
|
||||
|
||||
# https://github.com/protocolbuffers/protobuf/commit/0400cca3236de1ca303af38bf81eab332d042b7c
|
||||
# changes PROTOBUF_CONSTEXPR to constexpr, which breaks windows
|
||||
# build.
|
||||
# changes PROTOBUF_CONSTEXPR to constexpr, which breaks windows build.
|
||||
if(MSVC)
|
||||
string(
|
||||
REGEX REPLACE
|
||||
"static constexpr ([^ ]+) ([^ ]+) ="
|
||||
"static \\1 const \\2 ="
|
||||
content
|
||||
"${content}")
|
||||
string(REGEX REPLACE "static constexpr ([^ ]+) ([^ ]+) ="
|
||||
"static \\1 const \\2 =" content "${content}")
|
||||
endif()
|
||||
|
||||
foreach(ns ${NAMESPACES})
|
||||
# Insert "const ::std::string& GetEmptyStringAlreadyInited();" within
|
||||
# the namespace and make sure we only do it once in the file. Unfortunately
|
||||
# Insert "const ::std::string& GetEmptyStringAlreadyInited();" within the
|
||||
# namespace and make sure we only do it once in the file. Unfortunately
|
||||
# using string(REPLACE ...) doesn't work because it will replace at all
|
||||
# locations and there might be multiple declarations of the namespace
|
||||
# depending on how the proto is structured.
|
||||
@ -53,48 +36,47 @@ if(NOT SYSTEM_PROTOBUF)
|
||||
math(EXPR pos "${pos}+${search_len}")
|
||||
string(SUBSTRING "${content}" 0 ${pos} content_pre)
|
||||
string(SUBSTRING "${content}" ${pos} -1 content_post)
|
||||
string(
|
||||
CONCAT
|
||||
content
|
||||
"${content_pre}"
|
||||
" const ::std::string& GetEmptyStringAlreadyInited(); "
|
||||
"${content_post}")
|
||||
string(CONCAT content "${content_pre}"
|
||||
" const ::std::string& GetEmptyStringAlreadyInited(); "
|
||||
"${content_post}")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# The moving constructor is defined in the header file, which will cause
|
||||
# a link error that claims that the vftable is not found. Luckily, we
|
||||
# could move the definition into the source file to solve the problem.
|
||||
# The moving constructor is defined in the header file, which will cause a
|
||||
# link error that claims that the vftable is not found. Luckily, we could move
|
||||
# the definition into the source file to solve the problem.
|
||||
list(LENGTH NAMESPACES ns_count)
|
||||
if("${FILENAME}" MATCHES ".pb.h" AND ns_count EQUAL 1)
|
||||
string(REPLACE ".pb.h" ".pb.cc" SOURCE_FILENAME ${FILENAME})
|
||||
file(READ ${SOURCE_FILENAME} content_cc_origin)
|
||||
|
||||
string(REGEX MATCHALL "([a-zA-Z_]+)\\([a-zA-Z_]+&& from\\) noexcept[^}]*}" content_cc "${content}")
|
||||
string(REGEX MATCHALL "([a-zA-Z_]+)\\([a-zA-Z_]+&& from\\) noexcept[^}]*}"
|
||||
content_cc "${content}")
|
||||
string(REGEX REPLACE "};" "}\n" content_cc "${content_cc}")
|
||||
string(REGEX REPLACE "([a-zA-Z_]+)\\([a-zA-Z_]+&& from\\) noexcept" " \\1::\\1(\\1&& from) noexcept" content_cc "${content_cc}")
|
||||
set(content_cc "${content_cc_origin}\nnamespace ${NAMESPACES} {\n#if LANG_CXX11\n${content_cc}\n#endif\n}")
|
||||
string(REGEX
|
||||
REPLACE "([a-zA-Z_]+)\\([a-zA-Z_]+&& from\\) noexcept"
|
||||
" \\1::\\1(\\1&& from) noexcept" content_cc "${content_cc}")
|
||||
set(content_cc
|
||||
"${content_cc_origin}\nnamespace ${NAMESPACES} {\n#if LANG_CXX11\n${content_cc}\n#endif\n}"
|
||||
)
|
||||
|
||||
string(REGEX REPLACE "([a-zA-Z_]+)\\([a-zA-Z_]+&& from\\) noexcept([^}]*)}" "\\1(\\1&& from) noexcept;" content "${content}")
|
||||
string(REGEX REPLACE "([a-zA-Z_]+)\\([a-zA-Z_]+&& from\\) noexcept([^}]*)}"
|
||||
"\\1(\\1&& from) noexcept;" content "${content}")
|
||||
|
||||
file(WRITE ${SOURCE_FILENAME} "${content_cc}")
|
||||
endif()
|
||||
endif(NOT SYSTEM_PROTOBUF)
|
||||
|
||||
# constexpr int TensorBoundShape_DimType_DimType_ARRAYSIZE = TensorBoundShape_DimType_DimType_MAX + 1;
|
||||
# throws
|
||||
# error: more than one operator "+" matches these operands:
|
||||
# built-in operator "arithmetic + arithmetic"
|
||||
# function "c10::operator+(int, c10::BFloat16)"
|
||||
# function "c10::operator+(c10::BFloat16, int)"
|
||||
# function "c10::operator+(int, c10::Half)"
|
||||
# function "c10::operator+(c10::Half, int)"
|
||||
# operand types are: const caffe2::ExternalDataProto_SourceType + int
|
||||
# constexpr int TensorBoundShape_DimType_DimType_ARRAYSIZE =
|
||||
# TensorBoundShape_DimType_DimType_MAX + 1; throws error: more than one operator
|
||||
# "+" matches these operands: built-in operator "arithmetic + arithmetic"
|
||||
# function "c10::operator+(int, c10::BFloat16)" function
|
||||
# "c10::operator+(c10::BFloat16, int)" function "c10::operator+(int, c10::Half)"
|
||||
# function "c10::operator+(c10::Half, int)" operand types are: const
|
||||
# caffe2::ExternalDataProto_SourceType + int
|
||||
string(
|
||||
REGEX REPLACE
|
||||
"constexpr ([^ ]+) ([^ ]+_ARRAYSIZE) = ([^ ]+_MAX) \\+ 1;"
|
||||
"constexpr \\1 \\2 = static_cast<\\1>(\\3) + 1;"
|
||||
content
|
||||
"${content}")
|
||||
REGEX
|
||||
REPLACE "constexpr ([^ ]+) ([^ ]+_ARRAYSIZE) = ([^ ]+_MAX) \\+ 1;"
|
||||
"constexpr \\1 \\2 = static_cast<\\1>(\\3) + 1;" content "${content}")
|
||||
|
||||
file(WRITE ${FILENAME} "${content}")
|
||||
|
||||
@ -18,7 +18,8 @@ function(caffe2_print_configuration_summary)
|
||||
message(STATUS " Static LD flags : ${CMAKE_STATIC_LINKER_FLAGS}")
|
||||
message(STATUS " Module LD flags : ${CMAKE_MODULE_LINKER_FLAGS}")
|
||||
message(STATUS " Build type : ${CMAKE_BUILD_TYPE}")
|
||||
get_directory_property(tmp DIRECTORY ${PROJECT_SOURCE_DIR} COMPILE_DEFINITIONS)
|
||||
get_directory_property(tmp DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
COMPILE_DEFINITIONS)
|
||||
message(STATUS " Compile definitions : ${tmp}")
|
||||
message(STATUS " CMAKE_PREFIX_PATH : ${CMAKE_PREFIX_PATH}")
|
||||
message(STATUS " CMAKE_INSTALL_PREFIX : ${CMAKE_INSTALL_PREFIX}")
|
||||
@ -26,7 +27,9 @@ function(caffe2_print_configuration_summary)
|
||||
message(STATUS "")
|
||||
|
||||
message(STATUS " TORCH_VERSION : ${TORCH_VERSION}")
|
||||
message(STATUS " BUILD_STATIC_RUNTIME_BENCHMARK: ${BUILD_STATIC_RUNTIME_BENCHMARK}")
|
||||
message(
|
||||
STATUS " BUILD_STATIC_RUNTIME_BENCHMARK: ${BUILD_STATIC_RUNTIME_BENCHMARK}"
|
||||
)
|
||||
message(STATUS " BUILD_BINARY : ${BUILD_BINARY}")
|
||||
message(STATUS " BUILD_CUSTOM_PROTOBUF : ${BUILD_CUSTOM_PROTOBUF}")
|
||||
if(${CAFFE2_LINK_LOCAL_PROTOBUF})
|
||||
@ -45,7 +48,10 @@ function(caffe2_print_configuration_summary)
|
||||
message(STATUS " Python site-package : ${Python_SITELIB}")
|
||||
endif()
|
||||
message(STATUS " BUILD_SHARED_LIBS : ${BUILD_SHARED_LIBS}")
|
||||
message(STATUS " CAFFE2_USE_MSVC_STATIC_RUNTIME : ${CAFFE2_USE_MSVC_STATIC_RUNTIME}")
|
||||
message(
|
||||
STATUS
|
||||
" CAFFE2_USE_MSVC_STATIC_RUNTIME : ${CAFFE2_USE_MSVC_STATIC_RUNTIME}"
|
||||
)
|
||||
message(STATUS " BUILD_TEST : ${BUILD_TEST}")
|
||||
message(STATUS " BUILD_JNI : ${BUILD_JNI}")
|
||||
message(STATUS " BUILD_MOBILE_AUTOGRAD : ${BUILD_MOBILE_AUTOGRAD}")
|
||||
@ -183,8 +189,11 @@ function(caffe2_print_configuration_summary)
|
||||
endif()
|
||||
message(STATUS " USE_VULKAN : ${USE_VULKAN}")
|
||||
if(${USE_VULKAN})
|
||||
message(STATUS " USE_VULKAN_FP16_INFERENCE : ${USE_VULKAN_FP16_INFERENCE}")
|
||||
message(STATUS " USE_VULKAN_RELAXED_PRECISION : ${USE_VULKAN_RELAXED_PRECISION}")
|
||||
message(
|
||||
STATUS " USE_VULKAN_FP16_INFERENCE : ${USE_VULKAN_FP16_INFERENCE}")
|
||||
message(
|
||||
STATUS
|
||||
" USE_VULKAN_RELAXED_PRECISION : ${USE_VULKAN_RELAXED_PRECISION}")
|
||||
endif()
|
||||
message(STATUS " USE_PROF : ${USE_PROF}")
|
||||
message(STATUS " USE_PYTORCH_QNNPACK : ${USE_PYTORCH_QNNPACK}")
|
||||
@ -202,7 +211,8 @@ function(caffe2_print_configuration_summary)
|
||||
endif()
|
||||
message(STATUS " Public Dependencies : ${Caffe2_PUBLIC_DEPENDENCY_LIBS}")
|
||||
message(STATUS " Private Dependencies : ${Caffe2_DEPENDENCY_LIBS}")
|
||||
message(STATUS " Public CUDA Deps. : ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS}")
|
||||
message(
|
||||
STATUS " Public CUDA Deps. : ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS}")
|
||||
message(STATUS " Private CUDA Deps. : ${Caffe2_CUDA_DEPENDENCY_LIBS}")
|
||||
# coreml
|
||||
message(STATUS " USE_COREML_DELEGATE : ${USE_COREML_DELEGATE}")
|
||||
|
||||
@ -21,15 +21,14 @@ if(ANDROID)
|
||||
message(FATAL_ERROR "ANDROID_NDK not set")
|
||||
endif()
|
||||
|
||||
set(GLSLC_PATH "${ANDROID_NDK}/shader-tools/${ANDROID_NDK_HOST_SYSTEM_NAME}/glslc")
|
||||
set(GLSLC_PATH
|
||||
"${ANDROID_NDK}/shader-tools/${ANDROID_NDK_HOST_SYSTEM_NAME}/glslc")
|
||||
else()
|
||||
find_program(
|
||||
GLSLC_PATH glslc
|
||||
PATHS
|
||||
ENV VULKAN_SDK
|
||||
PATHS ENV VULKAN_SDK
|
||||
PATHS "$ENV{VULKAN_SDK}/${CMAKE_HOST_SYSTEM_PROCESSOR}/bin"
|
||||
PATHS "$ENV{VULKAN_SDK}/bin"
|
||||
)
|
||||
PATHS "$ENV{VULKAN_SDK}/bin")
|
||||
|
||||
if(NOT GLSLC_PATH)
|
||||
message(FATAL_ERROR "USE_VULKAN glslc not found")
|
||||
@ -42,18 +41,18 @@ list(APPEND NEW_PYTHONPATH "${CMAKE_CURRENT_LIST_DIR}/..")
|
||||
set(ENV{PYTHONPATH} ${NEW_PYTHONPATH})
|
||||
execute_process(
|
||||
COMMAND
|
||||
"${Python_EXECUTABLE}"
|
||||
${CMAKE_CURRENT_LIST_DIR}/../tools/gen_vulkan_spv.py
|
||||
--glsl-path ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/vulkan/glsl
|
||||
--output-path ${VULKAN_GEN_OUTPUT_PATH}
|
||||
--glslc-path=${GLSLC_PATH}
|
||||
--tmp-dir-path=${CMAKE_BINARY_DIR}/vulkan/spv
|
||||
--env ${VULKAN_GEN_ARG_ENV}
|
||||
"${Python_EXECUTABLE}" ${CMAKE_CURRENT_LIST_DIR}/../tools/gen_vulkan_spv.py
|
||||
--glsl-path ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/vulkan/glsl
|
||||
--output-path ${VULKAN_GEN_OUTPUT_PATH} --glslc-path=${GLSLC_PATH}
|
||||
--tmp-dir-path=${CMAKE_BINARY_DIR}/vulkan/spv --env ${VULKAN_GEN_ARG_ENV}
|
||||
RESULT_VARIABLE error_code)
|
||||
set(ENV{PYTHONPATH} ${PYTHONPATH})
|
||||
|
||||
if(error_code)
|
||||
message(FATAL_ERROR "Failed to gen spv.h and spv.cpp with precompiled shaders for Vulkan backend")
|
||||
endif()
|
||||
if(error_code)
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"Failed to gen spv.h and spv.cpp with precompiled shaders for Vulkan backend"
|
||||
)
|
||||
endif()
|
||||
|
||||
set(vulkan_generated_cpp ${VULKAN_GEN_OUTPUT_PATH}/spv.cpp)
|
||||
|
||||
@ -8,18 +8,18 @@ if(ANDROID)
|
||||
endif()
|
||||
|
||||
# Vulkan from ANDROID_NDK
|
||||
set(VULKAN_INCLUDE_DIR "${ANDROID_NDK}/sources/third_party/vulkan/src/include")
|
||||
set(VULKAN_INCLUDE_DIR
|
||||
"${ANDROID_NDK}/sources/third_party/vulkan/src/include")
|
||||
message(STATUS "VULKAN_INCLUDE_DIR:${VULKAN_INCLUDE_DIR}")
|
||||
|
||||
set(VULKAN_ANDROID_NDK_WRAPPER_DIR "${ANDROID_NDK}/sources/third_party/vulkan/src/common")
|
||||
message(STATUS "Vulkan_ANDROID_NDK_WRAPPER_DIR:${VULKAN_ANDROID_NDK_WRAPPER_DIR}")
|
||||
set(VULKAN_ANDROID_NDK_WRAPPER_DIR
|
||||
"${ANDROID_NDK}/sources/third_party/vulkan/src/common")
|
||||
message(
|
||||
STATUS "Vulkan_ANDROID_NDK_WRAPPER_DIR:${VULKAN_ANDROID_NDK_WRAPPER_DIR}")
|
||||
set(VULKAN_WRAPPER_DIR "${VULKAN_ANDROID_NDK_WRAPPER_DIR}")
|
||||
|
||||
add_library(
|
||||
VulkanWrapper
|
||||
STATIC
|
||||
${VULKAN_WRAPPER_DIR}/vulkan_wrapper.h
|
||||
${VULKAN_WRAPPER_DIR}/vulkan_wrapper.cpp)
|
||||
add_library(VulkanWrapper STATIC ${VULKAN_WRAPPER_DIR}/vulkan_wrapper.h
|
||||
${VULKAN_WRAPPER_DIR}/vulkan_wrapper.cpp)
|
||||
|
||||
target_include_directories(VulkanWrapper PUBLIC .)
|
||||
target_include_directories(VulkanWrapper PUBLIC "${VULKAN_INCLUDE_DIR}")
|
||||
@ -33,7 +33,10 @@ else()
|
||||
find_package(Vulkan)
|
||||
|
||||
if(NOT Vulkan_FOUND)
|
||||
message(FATAL_ERROR "USE_VULKAN requires either Vulkan installed on system path or environment var VULKAN_SDK set.")
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"USE_VULKAN requires either Vulkan installed on system path or environment var VULKAN_SDK set."
|
||||
)
|
||||
endif()
|
||||
|
||||
list(APPEND Vulkan_INCLUDES ${Vulkan_INCLUDE_DIRS})
|
||||
|
||||
@ -1,11 +1,14 @@
|
||||
if(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
|
||||
message(FATAL_ERROR "Cannot find install manifest: @CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"Cannot find install manifest: @CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt"
|
||||
)
|
||||
endif(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
|
||||
|
||||
if(NOT DEFINED CMAKE_INSTALL_PREFIX)
|
||||
set (CMAKE_INSTALL_PREFIX "@CMAKE_INSTALL_PREFIX@")
|
||||
set(CMAKE_INSTALL_PREFIX "@CMAKE_INSTALL_PREFIX@")
|
||||
endif()
|
||||
message(${CMAKE_INSTALL_PREFIX})
|
||||
message(${CMAKE_INSTALL_PREFIX})
|
||||
|
||||
file(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files)
|
||||
string(REGEX REPLACE "\n" ";" files "${files}")
|
||||
@ -13,10 +16,10 @@ foreach(file ${files})
|
||||
message(STATUS "Uninstalling $ENV{DESTDIR}${file}")
|
||||
if(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
|
||||
exec_program(
|
||||
"@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\""
|
||||
"@CMAKE_COMMAND@" ARGS
|
||||
"-E remove \"$ENV{DESTDIR}${file}\""
|
||||
OUTPUT_VARIABLE rm_out
|
||||
RETURN_VALUE rm_retval
|
||||
)
|
||||
RETURN_VALUE rm_retval)
|
||||
if(NOT "${rm_retval}" STREQUAL 0)
|
||||
message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}")
|
||||
endif(NOT "${rm_retval}" STREQUAL 0)
|
||||
|
||||
249
cmake/iOS.cmake
249
cmake/iOS.cmake
@ -1,32 +1,36 @@
|
||||
# This file is based off of the Platform/Darwin.cmake and Platform/UnixPaths.cmake
|
||||
# files which are included with CMake 2.8.4
|
||||
# It has been altered for iOS development
|
||||
# This file is based off of the Platform/Darwin.cmake and
|
||||
# Platform/UnixPaths.cmake files which are included with CMake 2.8.4 It has been
|
||||
# altered for iOS development
|
||||
|
||||
# Options:
|
||||
#
|
||||
# IOS_PLATFORM = OS (default) or SIMULATOR
|
||||
# This decides if SDKS will be selected from the iPhoneOS.platform or iPhoneSimulator.platform folders
|
||||
# OS - the default, used to build for iPhone and iPad physical devices, which have an arm arch.
|
||||
# SIMULATOR - used to build for the Simulator platforms, which now uses arm64 arch.
|
||||
# IOS_PLATFORM = OS (default) or SIMULATOR This decides if SDKS will be selected
|
||||
# from the iPhoneOS.platform or iPhoneSimulator.platform folders OS - the
|
||||
# default, used to build for iPhone and iPad physical devices, which have an arm
|
||||
# arch. SIMULATOR - used to build for the Simulator platforms, which now uses
|
||||
# arm64 arch.
|
||||
#
|
||||
# CMAKE_IOS_DEVELOPER_ROOT = automatic(default) or /path/to/platform/Developer folder
|
||||
# By default this location is automatcially chosen based on the IOS_PLATFORM value above.
|
||||
# If set manually, it will override the default location and force the user of a particular Developer Platform
|
||||
# CMAKE_IOS_DEVELOPER_ROOT = automatic(default) or /path/to/platform/Developer
|
||||
# folder By default this location is automatcially chosen based on the
|
||||
# IOS_PLATFORM value above. If set manually, it will override the default
|
||||
# location and force the user of a particular Developer Platform
|
||||
#
|
||||
# CMAKE_IOS_SDK_ROOT = automatic(default) or /path/to/platform/Developer/SDKs/SDK folder
|
||||
# By default this location is automatcially chosen based on the CMAKE_IOS_DEVELOPER_ROOT value.
|
||||
# In this case it will always be the most up-to-date SDK found in the CMAKE_IOS_DEVELOPER_ROOT path.
|
||||
# If set manually, this will force the use of a specific SDK version
|
||||
# CMAKE_IOS_SDK_ROOT = automatic(default) or
|
||||
# /path/to/platform/Developer/SDKs/SDK folder By default this location is
|
||||
# automatcially chosen based on the CMAKE_IOS_DEVELOPER_ROOT value. In this case
|
||||
# it will always be the most up-to-date SDK found in the
|
||||
# CMAKE_IOS_DEVELOPER_ROOT path. If set manually, this will force the use of a
|
||||
# specific SDK version
|
||||
|
||||
# Macros:
|
||||
#
|
||||
# set_xcode_property (TARGET XCODE_PROPERTY XCODE_VALUE)
|
||||
# A convenience macro for setting xcode specific properties on targets
|
||||
# example: set_xcode_property (myioslib IPHONEOS_DEPLOYMENT_TARGET "3.1")
|
||||
# set_xcode_property (TARGET XCODE_PROPERTY XCODE_VALUE) A convenience macro for
|
||||
# setting xcode specific properties on targets example: set_xcode_property
|
||||
# (myioslib IPHONEOS_DEPLOYMENT_TARGET "3.1")
|
||||
#
|
||||
# find_host_package (PROGRAM ARGS)
|
||||
# A macro used to find executable programs on the host system, not within the iOS environment.
|
||||
# Thanks to the android-cmake project for providing the command
|
||||
# find_host_package (PROGRAM ARGS) A macro used to find executable programs on
|
||||
# the host system, not within the iOS environment. Thanks to the android-cmake
|
||||
# project for providing the command
|
||||
|
||||
# Standard settings
|
||||
set(CMAKE_SYSTEM_NAME Darwin)
|
||||
@ -36,51 +40,68 @@ set(APPLE True)
|
||||
set(IOS True)
|
||||
|
||||
# Required as of cmake 2.8.10
|
||||
set(CMAKE_OSX_DEPLOYMENT_TARGET "" CACHE STRING "Force unset of the deployment target for iOS" FORCE)
|
||||
set(CMAKE_OSX_DEPLOYMENT_TARGET
|
||||
""
|
||||
CACHE STRING "Force unset of the deployment target for iOS" FORCE)
|
||||
|
||||
# Determine the cmake host system version so we know where to find the iOS SDKs
|
||||
find_program(CMAKE_UNAME uname /bin /usr/bin /usr/local/bin)
|
||||
if(CMAKE_UNAME)
|
||||
execute_process(COMMAND uname -r OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
string(REGEX REPLACE "^([0-9]+)\\.([0-9]+).*$" "\\1" DARWIN_MAJOR_VERSION "${CMAKE_HOST_SYSTEM_VERSION}")
|
||||
execute_process(
|
||||
COMMAND uname -r
|
||||
OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_VERSION
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
string(REGEX REPLACE "^([0-9]+)\\.([0-9]+).*$" "\\1" DARWIN_MAJOR_VERSION
|
||||
"${CMAKE_HOST_SYSTEM_VERSION}")
|
||||
endif(CMAKE_UNAME)
|
||||
|
||||
# Force the compilers to gcc for iOS
|
||||
set(CMAKE_C_COMPILER /usr/bin/gcc CACHE STRING "")
|
||||
set(CMAKE_CXX_COMPILER /usr/bin/g++ CACHE STRING "")
|
||||
set(CMAKE_AR ar CACHE FILEPATH "" FORCE)
|
||||
set(CMAKE_RANLIB ranlib CACHE FILEPATH "" FORCE)
|
||||
set(PKG_CONFIG_EXECUTABLE pkg-config CACHE FILEPATH "" FORCE)
|
||||
set(CMAKE_C_COMPILER
|
||||
/usr/bin/gcc
|
||||
CACHE STRING "")
|
||||
set(CMAKE_CXX_COMPILER
|
||||
/usr/bin/g++
|
||||
CACHE STRING "")
|
||||
set(CMAKE_AR
|
||||
ar
|
||||
CACHE FILEPATH "" FORCE)
|
||||
set(CMAKE_RANLIB
|
||||
ranlib
|
||||
CACHE FILEPATH "" FORCE)
|
||||
set(PKG_CONFIG_EXECUTABLE
|
||||
pkg-config
|
||||
CACHE FILEPATH "" FORCE)
|
||||
|
||||
# Setup iOS platform unless specified manually with IOS_PLATFORM
|
||||
if(NOT IOS_PLATFORM)
|
||||
set(IOS_PLATFORM "OS")
|
||||
set(IOS_PLATFORM "OS")
|
||||
endif(NOT IOS_PLATFORM)
|
||||
set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform")
|
||||
set(IOS_PLATFORM
|
||||
${IOS_PLATFORM}
|
||||
CACHE STRING "Type of iOS Platform")
|
||||
|
||||
# Check the platform selection and setup for developer root
|
||||
if(${IOS_PLATFORM} STREQUAL "OS")
|
||||
set(IOS_PLATFORM_LOCATION "iPhoneOS.platform")
|
||||
set(XCODE_IOS_PLATFORM iphoneos)
|
||||
set(IOS_PLATFORM_LOCATION "iPhoneOS.platform")
|
||||
set(XCODE_IOS_PLATFORM iphoneos)
|
||||
|
||||
# This causes the installers to properly locate the output libraries
|
||||
set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphoneos")
|
||||
# This causes the installers to properly locate the output libraries
|
||||
set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphoneos")
|
||||
elseif(${IOS_PLATFORM} STREQUAL "SIMULATOR")
|
||||
set(IOS_PLATFORM_LOCATION "iPhoneSimulator.platform")
|
||||
set(XCODE_IOS_PLATFORM iphonesimulator)
|
||||
set(IOS_PLATFORM_LOCATION "iPhoneSimulator.platform")
|
||||
set(XCODE_IOS_PLATFORM iphonesimulator)
|
||||
|
||||
# This causes the installers to properly locate the output libraries
|
||||
set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphonesimulator")
|
||||
# This causes the installers to properly locate the output libraries
|
||||
set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphonesimulator")
|
||||
elseif(${IOS_PLATFORM} STREQUAL "WATCHOS")
|
||||
set(IOS_PLATFORM_LOCATION "WatchOS.platform")
|
||||
set(XCODE_IOS_PLATFORM watchos)
|
||||
set(IOS_PLATFORM_LOCATION "WatchOS.platform")
|
||||
set(XCODE_IOS_PLATFORM watchos)
|
||||
|
||||
# This causes the installers to properly locate the output libraries
|
||||
set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-watchos")
|
||||
# This causes the installers to properly locate the output libraries
|
||||
set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-watchos")
|
||||
else(${IOS_PLATFORM} STREQUAL "OS")
|
||||
message(FATAL_ERROR
|
||||
"Unsupported IOS_PLATFORM value selected. "
|
||||
"Please choose OS, SIMULATOR, or WATCHOS.")
|
||||
message(FATAL_ERROR "Unsupported IOS_PLATFORM value selected. "
|
||||
"Please choose OS, SIMULATOR, or WATCHOS.")
|
||||
endif()
|
||||
|
||||
# All iOS/Darwin specific settings - some may be redundant
|
||||
@ -93,87 +114,115 @@ set(CMAKE_DL_LIBS "")
|
||||
|
||||
set(CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG "-compatibility_version ")
|
||||
set(CMAKE_C_OSX_CURRENT_VERSION_FLAG "-current_version ")
|
||||
set(CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG "${CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG}")
|
||||
set(CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG
|
||||
"${CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG}")
|
||||
set(CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}")
|
||||
|
||||
if(IOS_DEPLOYMENT_TARGET)
|
||||
set(XCODE_IOS_PLATFORM_VERSION_FLAGS "-m${XCODE_IOS_PLATFORM}-version-min=${IOS_DEPLOYMENT_TARGET}")
|
||||
set(XCODE_IOS_PLATFORM_VERSION_FLAGS
|
||||
"-m${XCODE_IOS_PLATFORM}-version-min=${IOS_DEPLOYMENT_TARGET}")
|
||||
endif()
|
||||
|
||||
# Hidden visibility is required for cxx on iOS
|
||||
set(CMAKE_C_FLAGS_INIT "${XCODE_IOS_PLATFORM_VERSION_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS_INIT "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -fvisibility-inlines-hidden")
|
||||
set(CMAKE_CXX_FLAGS_INIT
|
||||
"${XCODE_IOS_PLATFORM_VERSION_FLAGS} -fvisibility-inlines-hidden")
|
||||
|
||||
set(CMAKE_C_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_C_LINK_FLAGS}")
|
||||
set(CMAKE_CXX_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_CXX_LINK_FLAGS}")
|
||||
set(CMAKE_C_LINK_FLAGS
|
||||
"${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_C_LINK_FLAGS}"
|
||||
)
|
||||
set(CMAKE_CXX_LINK_FLAGS
|
||||
"${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_CXX_LINK_FLAGS}"
|
||||
)
|
||||
|
||||
set(CMAKE_PLATFORM_HAS_INSTALLNAME 1)
|
||||
set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib -headerpad_max_install_names")
|
||||
set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS
|
||||
"-dynamiclib -headerpad_max_install_names")
|
||||
set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle -headerpad_max_install_names")
|
||||
set(CMAKE_SHARED_MODULE_LOADER_C_FLAG "-Wl,-bundle_loader,")
|
||||
set(CMAKE_SHARED_MODULE_LOADER_CXX_FLAG "-Wl,-bundle_loader,")
|
||||
set(CMAKE_FIND_LIBRARY_SUFFIXES ".dylib" ".so" ".a")
|
||||
|
||||
# hack: if a new cmake (which uses CMAKE_INSTALL_NAME_TOOL) runs on an old build tree
|
||||
# (where install_name_tool was hardcoded) and where CMAKE_INSTALL_NAME_TOOL isn't in the cache
|
||||
# and still cmake didn't fail in CMakeFindBinUtils.cmake (because it isn't rerun)
|
||||
# hardcode CMAKE_INSTALL_NAME_TOOL here to install_name_tool, so it behaves as it did before, Alex
|
||||
# hack: if a new cmake (which uses CMAKE_INSTALL_NAME_TOOL) runs on an old build
|
||||
# tree (where install_name_tool was hardcoded) and where CMAKE_INSTALL_NAME_TOOL
|
||||
# isn't in the cache and still cmake didn't fail in CMakeFindBinUtils.cmake
|
||||
# (because it isn't rerun) hardcode CMAKE_INSTALL_NAME_TOOL here to
|
||||
# install_name_tool, so it behaves as it did before, Alex
|
||||
if(NOT CMAKE_INSTALL_NAME_TOOL)
|
||||
find_program(CMAKE_INSTALL_NAME_TOOL install_name_tool)
|
||||
find_program(CMAKE_INSTALL_NAME_TOOL install_name_tool)
|
||||
endif(NOT CMAKE_INSTALL_NAME_TOOL)
|
||||
|
||||
# Setup iOS deployment target
|
||||
set(IOS_DEPLOYMENT_TARGET ${IOS_DEPLOYMENT_TARGET} CACHE STRING "Minimum iOS version")
|
||||
set(IOS_DEPLOYMENT_TARGET
|
||||
${IOS_DEPLOYMENT_TARGET}
|
||||
CACHE STRING "Minimum iOS version")
|
||||
|
||||
# Setup iOS developer location unless specified manually with CMAKE_IOS_DEVELOPER_ROOT
|
||||
# Note Xcode 4.3 changed the installation location, choose the most recent one available
|
||||
# Setup iOS developer location unless specified manually with
|
||||
# CMAKE_IOS_DEVELOPER_ROOT Note Xcode 4.3 changed the installation location,
|
||||
# choose the most recent one available
|
||||
execute_process(
|
||||
COMMAND /usr/bin/xcode-select -print-path
|
||||
OUTPUT_VARIABLE CMAKE_XCODE_DEVELOPER_DIR
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
)
|
||||
set(XCODE_POST_43_ROOT "${CMAKE_XCODE_DEVELOPER_DIR}/Platforms/${IOS_PLATFORM_LOCATION}/Developer")
|
||||
COMMAND /usr/bin/xcode-select -print-path
|
||||
OUTPUT_VARIABLE CMAKE_XCODE_DEVELOPER_DIR
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
set(XCODE_POST_43_ROOT
|
||||
"${CMAKE_XCODE_DEVELOPER_DIR}/Platforms/${IOS_PLATFORM_LOCATION}/Developer")
|
||||
set(XCODE_PRE_43_ROOT "/Developer/Platforms/${IOS_PLATFORM_LOCATION}/Developer")
|
||||
if(NOT CMAKE_IOS_DEVELOPER_ROOT)
|
||||
if(EXISTS ${XCODE_POST_43_ROOT})
|
||||
set(CMAKE_IOS_DEVELOPER_ROOT ${XCODE_POST_43_ROOT})
|
||||
elseif(EXISTS ${XCODE_PRE_43_ROOT})
|
||||
set(CMAKE_IOS_DEVELOPER_ROOT ${XCODE_PRE_43_ROOT})
|
||||
endif(EXISTS ${XCODE_POST_43_ROOT})
|
||||
if(EXISTS ${XCODE_POST_43_ROOT})
|
||||
set(CMAKE_IOS_DEVELOPER_ROOT ${XCODE_POST_43_ROOT})
|
||||
elseif(EXISTS ${XCODE_PRE_43_ROOT})
|
||||
set(CMAKE_IOS_DEVELOPER_ROOT ${XCODE_PRE_43_ROOT})
|
||||
endif(EXISTS ${XCODE_POST_43_ROOT})
|
||||
endif(NOT CMAKE_IOS_DEVELOPER_ROOT)
|
||||
set(CMAKE_IOS_DEVELOPER_ROOT ${CMAKE_IOS_DEVELOPER_ROOT} CACHE PATH "Location of iOS Platform")
|
||||
set(CMAKE_IOS_DEVELOPER_ROOT
|
||||
${CMAKE_IOS_DEVELOPER_ROOT}
|
||||
CACHE PATH "Location of iOS Platform")
|
||||
|
||||
# Find and use the most recent iOS sdk unless specified manually with CMAKE_IOS_SDK_ROOT
|
||||
# Find and use the most recent iOS sdk unless specified manually with
|
||||
# CMAKE_IOS_SDK_ROOT
|
||||
if(NOT CMAKE_IOS_SDK_ROOT)
|
||||
file(GLOB _CMAKE_IOS_SDKS "${CMAKE_IOS_DEVELOPER_ROOT}/SDKs/*")
|
||||
if(_CMAKE_IOS_SDKS)
|
||||
list(SORT _CMAKE_IOS_SDKS)
|
||||
list(REVERSE _CMAKE_IOS_SDKS)
|
||||
list(GET _CMAKE_IOS_SDKS 0 CMAKE_IOS_SDK_ROOT)
|
||||
else(_CMAKE_IOS_SDKS)
|
||||
message(FATAL_ERROR "No iOS SDK's found in default search path ${CMAKE_IOS_DEVELOPER_ROOT}. Manually set CMAKE_IOS_SDK_ROOT or install the iOS SDK.")
|
||||
endif(_CMAKE_IOS_SDKS)
|
||||
message(STATUS "Toolchain using default iOS SDK: ${CMAKE_IOS_SDK_ROOT}")
|
||||
file(GLOB _CMAKE_IOS_SDKS "${CMAKE_IOS_DEVELOPER_ROOT}/SDKs/*")
|
||||
if(_CMAKE_IOS_SDKS)
|
||||
list(SORT _CMAKE_IOS_SDKS)
|
||||
list(REVERSE _CMAKE_IOS_SDKS)
|
||||
list(GET _CMAKE_IOS_SDKS 0 CMAKE_IOS_SDK_ROOT)
|
||||
else(_CMAKE_IOS_SDKS)
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"No iOS SDK's found in default search path ${CMAKE_IOS_DEVELOPER_ROOT}. Manually set CMAKE_IOS_SDK_ROOT or install the iOS SDK."
|
||||
)
|
||||
endif(_CMAKE_IOS_SDKS)
|
||||
message(STATUS "Toolchain using default iOS SDK: ${CMAKE_IOS_SDK_ROOT}")
|
||||
endif(NOT CMAKE_IOS_SDK_ROOT)
|
||||
set(CMAKE_IOS_SDK_ROOT ${CMAKE_IOS_SDK_ROOT} CACHE PATH "Location of the selected iOS SDK")
|
||||
set(CMAKE_IOS_SDK_ROOT
|
||||
${CMAKE_IOS_SDK_ROOT}
|
||||
CACHE PATH "Location of the selected iOS SDK")
|
||||
|
||||
# Set the sysroot default to the most recent SDK
|
||||
set(CMAKE_OSX_SYSROOT ${CMAKE_IOS_SDK_ROOT} CACHE PATH "Sysroot used for iOS support")
|
||||
set(CMAKE_OSX_SYSROOT
|
||||
${CMAKE_IOS_SDK_ROOT}
|
||||
CACHE PATH "Sysroot used for iOS support")
|
||||
|
||||
# set the architecture for iOS
|
||||
if(IOS_PLATFORM STREQUAL "OS")
|
||||
set(DEFAULT_IOS_ARCH "arm64")
|
||||
set(DEFAULT_IOS_ARCH "arm64")
|
||||
elseif(IOS_PLATFORM STREQUAL "SIMULATOR")
|
||||
set(DEFAULT_IOS_ARCH "arm64")
|
||||
set(DEFAULT_IOS_ARCH "arm64")
|
||||
elseif(IOS_PLATFORM STREQUAL "WATCHOS")
|
||||
set(DEFAULT_IOS_ARCH "armv7k;arm64_32")
|
||||
set(DEFAULT_IOS_ARCH "armv7k;arm64_32")
|
||||
endif()
|
||||
|
||||
set(IOS_ARCH ${DEFAULT_IOS_ARCH} CACHE STRING "Build architecture for iOS")
|
||||
set(CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE STRING "Build architecture for iOS")
|
||||
set(IOS_ARCH
|
||||
${DEFAULT_IOS_ARCH}
|
||||
CACHE STRING "Build architecture for iOS")
|
||||
set(CMAKE_OSX_ARCHITECTURES
|
||||
${IOS_ARCH}
|
||||
CACHE STRING "Build architecture for iOS")
|
||||
|
||||
# Set the find root to the iOS developer roots and to user defined paths
|
||||
set(CMAKE_FIND_ROOT_PATH ${CMAKE_IOS_DEVELOPER_ROOT} ${CMAKE_IOS_SDK_ROOT} ${CMAKE_PREFIX_PATH} CACHE STRING "iOS find search path root")
|
||||
set(CMAKE_FIND_ROOT_PATH
|
||||
${CMAKE_IOS_DEVELOPER_ROOT} ${CMAKE_IOS_SDK_ROOT} ${CMAKE_PREFIX_PATH}
|
||||
CACHE STRING "iOS find search path root")
|
||||
|
||||
# default to searching for frameworks first
|
||||
set(CMAKE_FIND_FRAMEWORK FIRST)
|
||||
@ -182,8 +231,7 @@ set(CMAKE_FIND_FRAMEWORK FIRST)
|
||||
set(CMAKE_SYSTEM_FRAMEWORK_PATH
|
||||
${CMAKE_IOS_SDK_ROOT}/System/Library/Frameworks
|
||||
${CMAKE_IOS_SDK_ROOT}/System/Library/PrivateFrameworks
|
||||
${CMAKE_IOS_SDK_ROOT}/Developer/Library/Frameworks
|
||||
)
|
||||
${CMAKE_IOS_SDK_ROOT}/Developer/Library/Frameworks)
|
||||
|
||||
# only search the iOS sdks, not the remainder of the host filesystem
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY)
|
||||
@ -192,20 +240,21 @@ set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
|
||||
|
||||
# This little macro lets you set any XCode specific property
|
||||
macro(set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE)
|
||||
set_property(TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY} ${XCODE_VALUE})
|
||||
set_property(TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY}
|
||||
${XCODE_VALUE})
|
||||
endmacro(set_xcode_property)
|
||||
|
||||
# This macro lets you find executable programs on the host system
|
||||
macro(find_host_package)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER)
|
||||
set(IOS FALSE)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER)
|
||||
set(IOS FALSE)
|
||||
|
||||
find_package(${ARGN})
|
||||
find_package(${ARGN})
|
||||
|
||||
set(IOS TRUE)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
|
||||
set(IOS TRUE)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
|
||||
endmacro(find_host_package)
|
||||
|
||||
@ -1,34 +1,47 @@
|
||||
# Build with Compute Library backend for the Arm architecture
|
||||
# Note: Compute Library is available from: https://github.com/ARM-software/ComputeLibrary
|
||||
# and must be built separately. The location of the Compute Library build
|
||||
# must be set with the env var ACL_ROOT_DIR. This path will be checked later
|
||||
# as part of FindACL.cmake in oneDNN.
|
||||
# Build with Compute Library backend for the Arm architecture Note: Compute
|
||||
# Library is available from: https://github.com/ARM-software/ComputeLibrary and
|
||||
# must be built separately. The location of the Compute Library build must be
|
||||
# set with the env var ACL_ROOT_DIR. This path will be checked later as part of
|
||||
# FindACL.cmake in oneDNN.
|
||||
|
||||
if(NOT USE_MKLDNN_ACL)
|
||||
RETURN()
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(DNNL_AARCH64_USE_ACL ON CACHE BOOL "" FORCE)
|
||||
set(DNNL_AARCH64_USE_ACL
|
||||
ON
|
||||
CACHE BOOL "" FORCE)
|
||||
|
||||
# Check the Compute Library version number.
|
||||
# Note: oneDNN / MKL-DNN v2.2 onwards will check the Compute Library version
|
||||
# the version check here can be removed once PyTorch transitions to v2.2.
|
||||
# Check the Compute Library version number. Note: oneDNN / MKL-DNN v2.2 onwards
|
||||
# will check the Compute Library version the version check here can be removed
|
||||
# once PyTorch transitions to v2.2.
|
||||
set(ACL_MINIMUM_VERSION "21.02")
|
||||
|
||||
file(GLOB_RECURSE ACL_VERSION_FILE $ENV{ACL_ROOT_DIR}/*/arm_compute_version.embed)
|
||||
file(GLOB_RECURSE ACL_VERSION_FILE
|
||||
$ENV{ACL_ROOT_DIR}/*/arm_compute_version.embed)
|
||||
|
||||
if("${ACL_VERSION_FILE}" STREQUAL "")
|
||||
message(WARNING "Build may fail: Could not determine ACL version (minimum required is ${ACL_MINIMUM_VERSION})")
|
||||
message(
|
||||
WARNING
|
||||
"Build may fail: Could not determine ACL version (minimum required is ${ACL_MINIMUM_VERSION})"
|
||||
)
|
||||
else()
|
||||
file(READ ${ACL_VERSION_FILE} ACL_VERSION_STRING)
|
||||
string(REGEX MATCH "v([0-9]+\\.[0-9]+)" ACL_VERSION "${ACL_VERSION_STRING}")
|
||||
set(ACL_VERSION "${CMAKE_MATCH_1}")
|
||||
|
||||
if("${ACL_VERSION}" VERSION_EQUAL "0.0")
|
||||
# Unreleased ACL versions come with version string "v0.0-unreleased", and may not be compatible with oneDNN.
|
||||
# It is recommended to use the latest release of ACL.
|
||||
message(WARNING "Build may fail: Using unreleased ACL version (minimum required is ${ACL_MINIMUM_VERSION})")
|
||||
# Unreleased ACL versions come with version string "v0.0-unreleased", and
|
||||
# may not be compatible with oneDNN. It is recommended to use the latest
|
||||
# release of ACL.
|
||||
message(
|
||||
WARNING
|
||||
"Build may fail: Using unreleased ACL version (minimum required is ${ACL_MINIMUM_VERSION})"
|
||||
)
|
||||
elseif(${ACL_VERSION} VERSION_LESS ${ACL_MINIMUM_VERSION})
|
||||
message(FATAL_ERROR "Detected ACL version ${ACL_VERSION}, but minimum required is ${ACL_MINIMUM_VERSION}")
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"Detected ACL version ${ACL_VERSION}, but minimum required is ${ACL_MINIMUM_VERSION}"
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -1,16 +1,16 @@
|
||||
set(PYTORCH_FOUND_HIP FALSE)
|
||||
|
||||
# If ROCM_PATH is set, assume intention is to compile with
|
||||
# ROCm support and error out if the ROCM_PATH does not exist.
|
||||
# Else ROCM_PATH does not exist, assume a default of /opt/rocm
|
||||
# In the latter case, if /opt/rocm does not exist emit status
|
||||
# message and return.
|
||||
# If ROCM_PATH is set, assume intention is to compile with ROCm support and
|
||||
# error out if the ROCM_PATH does not exist. Else ROCM_PATH does not exist,
|
||||
# assume a default of /opt/rocm In the latter case, if /opt/rocm does not exist
|
||||
# emit status message and return.
|
||||
if(DEFINED ENV{ROCM_PATH})
|
||||
set(ROCM_PATH $ENV{ROCM_PATH})
|
||||
if(NOT EXISTS ${ROCM_PATH})
|
||||
message(FATAL_ERROR
|
||||
"ROCM_PATH environment variable is set to ${ROCM_PATH} but does not exist.\n"
|
||||
"Set a valid ROCM_PATH or unset ROCM_PATH environment variable to fix.")
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"ROCM_PATH environment variable is set to ${ROCM_PATH} but does not exist.\n"
|
||||
"Set a valid ROCM_PATH or unset ROCM_PATH environment variable to fix.")
|
||||
endif()
|
||||
else()
|
||||
if(UNIX)
|
||||
@ -19,7 +19,8 @@ else()
|
||||
set(ROCM_PATH C:/opt/rocm)
|
||||
endif()
|
||||
if(NOT EXISTS ${ROCM_PATH})
|
||||
message(STATUS
|
||||
message(
|
||||
STATUS
|
||||
"ROCM_PATH environment variable is not set and ${ROCM_PATH} does not exist.\n"
|
||||
"Building without ROCm support.")
|
||||
return()
|
||||
@ -46,12 +47,15 @@ endif()
|
||||
|
||||
torch_hip_get_arch_list(PYTORCH_ROCM_ARCH)
|
||||
if(PYTORCH_ROCM_ARCH STREQUAL "")
|
||||
message(FATAL_ERROR "No GPU arch specified for ROCm build. Please use PYTORCH_ROCM_ARCH environment variable to specify GPU archs to build for.")
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"No GPU arch specified for ROCm build. Please use PYTORCH_ROCM_ARCH environment variable to specify GPU archs to build for."
|
||||
)
|
||||
endif()
|
||||
message("Building PyTorch for GPU arch: ${PYTORCH_ROCM_ARCH}")
|
||||
|
||||
# Add HIP to the CMAKE Module Path
|
||||
# needed because the find_package call to this module uses the Module mode search
|
||||
# Add HIP to the CMAKE Module Path needed because the find_package call to this
|
||||
# module uses the Module mode search
|
||||
# https://cmake.org/cmake/help/latest/command/find_package.html#search-modes
|
||||
if(UNIX)
|
||||
set(CMAKE_MODULE_PATH ${ROCM_PATH}/lib/cmake/hip ${CMAKE_MODULE_PATH})
|
||||
@ -59,8 +63,8 @@ else() # Win32
|
||||
set(CMAKE_MODULE_PATH ${ROCM_PATH}/cmake/ ${CMAKE_MODULE_PATH})
|
||||
endif()
|
||||
|
||||
# Add ROCM_PATH to CMAKE_PREFIX_PATH, needed because the find_package
|
||||
# call to individual ROCM components uses the Config mode search
|
||||
# Add ROCM_PATH to CMAKE_PREFIX_PATH, needed because the find_package call to
|
||||
# individual ROCM components uses the Config mode search
|
||||
list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH})
|
||||
|
||||
macro(find_package_and_print_version PACKAGE_NAME)
|
||||
@ -75,28 +79,28 @@ macro(find_package_and_print_version PACKAGE_NAME)
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
# Find the HIP Package
|
||||
# MODULE argument is added for clarity that CMake is searching
|
||||
# for FindHIP.cmake in Module mode
|
||||
# Find the HIP Package MODULE argument is added for clarity that CMake is
|
||||
# searching for FindHIP.cmake in Module mode
|
||||
find_package_and_print_version(HIP 1.0 MODULE)
|
||||
|
||||
if(HIP_FOUND)
|
||||
set(PYTORCH_FOUND_HIP TRUE)
|
||||
find_package_and_print_version(hip REQUIRED CONFIG)
|
||||
|
||||
# The rocm-core package was only introduced in ROCm 6.4, so we make it optional.
|
||||
# The rocm-core package was only introduced in ROCm 6.4, so we make it
|
||||
# optional.
|
||||
find_package(rocm-core CONFIG)
|
||||
|
||||
# Some old consumer HIP SDKs do not distribute rocm_version.h, so we allow
|
||||
# falling back to the hip version, which everyone should have.
|
||||
# rocm_version.h lives in the rocm-core package and hip_version.h lives in the
|
||||
# hip (lower-case) package. Both are probed above and will be in
|
||||
# ROCM_INCLUDE_DIRS if available.
|
||||
find_file(ROCM_VERSION_HEADER_PATH
|
||||
# falling back to the hip version, which everyone should have. rocm_version.h
|
||||
# lives in the rocm-core package and hip_version.h lives in the hip
|
||||
# (lower-case) package. Both are probed above and will be in ROCM_INCLUDE_DIRS
|
||||
# if available.
|
||||
find_file(
|
||||
ROCM_VERSION_HEADER_PATH
|
||||
NAMES rocm-core/rocm_version.h hip/hip_version.h
|
||||
NO_DEFAULT_PATH
|
||||
PATHS ${ROCM_INCLUDE_DIRS}
|
||||
)
|
||||
PATHS ${ROCM_INCLUDE_DIRS})
|
||||
if(ROCM_VERSION_HEADER_PATH MATCHES "rocm-core/rocm_version.h$")
|
||||
set(ROCM_LIB_NAME "ROCM")
|
||||
else()
|
||||
@ -104,14 +108,20 @@ if(HIP_FOUND)
|
||||
endif()
|
||||
|
||||
if(NOT ROCM_VERSION_HEADER_PATH)
|
||||
message(FATAL_ERROR "Could not find hip/hip_version.h or rocm-core/rocm_version.h in ${ROCM_INCLUDE_DIRS}")
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"Could not find hip/hip_version.h or rocm-core/rocm_version.h in ${ROCM_INCLUDE_DIRS}"
|
||||
)
|
||||
endif()
|
||||
get_filename_component(ROCM_HEADER_NAME ${ROCM_VERSION_HEADER_PATH} NAME)
|
||||
|
||||
if(EXISTS ${ROCM_VERSION_HEADER_PATH})
|
||||
set(ROCM_HEADER_FILE ${ROCM_VERSION_HEADER_PATH})
|
||||
else()
|
||||
message(FATAL_ERROR "********************* ${ROCM_HEADER_NAME} could not be found ******************\n")
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"********************* ${ROCM_HEADER_NAME} could not be found ******************\n"
|
||||
)
|
||||
endif()
|
||||
|
||||
# Read the ROCM headerfile into a variable
|
||||
@ -119,28 +129,34 @@ if(HIP_FOUND)
|
||||
message(STATUS "Content: ${ROCM_HEADER_CONTENT}")
|
||||
file(READ "${ROCM_HEADER_FILE}" ROCM_HEADER_CONTENT)
|
||||
|
||||
# Below we use a RegEx to find ROCM version numbers.
|
||||
# Note that CMake does not support \s for blank space. That is
|
||||
# why in the regular expressions below we have a blank space in
|
||||
# the square brackets.
|
||||
# There are three steps:
|
||||
# 1. Match regular expression
|
||||
# 2. Strip the non-numerical part of the string
|
||||
# 3. Strip leading and trailing spaces
|
||||
# Below we use a RegEx to find ROCM version numbers. Note that CMake does not
|
||||
# support \s for blank space. That is why in the regular expressions below we
|
||||
# have a blank space in the square brackets. There are three steps: 1. Match
|
||||
# regular expression 2. Strip the non-numerical part of the string 3. Strip
|
||||
# leading and trailing spaces
|
||||
|
||||
string(REGEX MATCH "${ROCM_LIB_NAME}_VERSION_MAJOR[ ]+[0-9]+" TEMP1 ${ROCM_HEADER_CONTENT})
|
||||
string(REGEX MATCH "${ROCM_LIB_NAME}_VERSION_MAJOR[ ]+[0-9]+" TEMP1
|
||||
${ROCM_HEADER_CONTENT})
|
||||
string(REPLACE "${ROCM_LIB_NAME}_VERSION_MAJOR" "" TEMP2 ${TEMP1})
|
||||
string(STRIP ${TEMP2} ROCM_VERSION_DEV_MAJOR)
|
||||
string(REGEX MATCH "${ROCM_LIB_NAME}_VERSION_MINOR[ ]+[0-9]+" TEMP1 ${ROCM_HEADER_CONTENT})
|
||||
string(REGEX MATCH "${ROCM_LIB_NAME}_VERSION_MINOR[ ]+[0-9]+" TEMP1
|
||||
${ROCM_HEADER_CONTENT})
|
||||
string(REPLACE "${ROCM_LIB_NAME}_VERSION_MINOR" "" TEMP2 ${TEMP1})
|
||||
string(STRIP ${TEMP2} ROCM_VERSION_DEV_MINOR)
|
||||
string(REGEX MATCH "${ROCM_LIB_NAME}_VERSION_PATCH[ ]+[0-9]+" TEMP1 ${ROCM_HEADER_CONTENT})
|
||||
string(REGEX MATCH "${ROCM_LIB_NAME}_VERSION_PATCH[ ]+[0-9]+" TEMP1
|
||||
${ROCM_HEADER_CONTENT})
|
||||
string(REPLACE "${ROCM_LIB_NAME}_VERSION_PATCH" "" TEMP2 ${TEMP1})
|
||||
string(STRIP ${TEMP2} ROCM_VERSION_DEV_PATCH)
|
||||
|
||||
# Create ROCM_VERSION_DEV_INT which is later used as a preprocessor macros
|
||||
set(ROCM_VERSION_DEV "${ROCM_VERSION_DEV_MAJOR}.${ROCM_VERSION_DEV_MINOR}.${ROCM_VERSION_DEV_PATCH}")
|
||||
math(EXPR ROCM_VERSION_DEV_INT "(${ROCM_VERSION_DEV_MAJOR}*10000) + (${ROCM_VERSION_DEV_MINOR}*100) + ${ROCM_VERSION_DEV_PATCH}")
|
||||
set(ROCM_VERSION_DEV
|
||||
"${ROCM_VERSION_DEV_MAJOR}.${ROCM_VERSION_DEV_MINOR}.${ROCM_VERSION_DEV_PATCH}"
|
||||
)
|
||||
math(
|
||||
EXPR
|
||||
ROCM_VERSION_DEV_INT
|
||||
"(${ROCM_VERSION_DEV_MAJOR}*10000) + (${ROCM_VERSION_DEV_MINOR}*100) + ${ROCM_VERSION_DEV_PATCH}"
|
||||
)
|
||||
|
||||
message("\n***** ROCm version from ${ROCM_HEADER_NAME} ****\n")
|
||||
message("ROCM_VERSION_DEV: ${ROCM_VERSION_DEV}")
|
||||
@ -149,13 +165,14 @@ if(HIP_FOUND)
|
||||
message("ROCM_VERSION_DEV_PATCH: ${ROCM_VERSION_DEV_PATCH}")
|
||||
message("ROCM_VERSION_DEV_INT: ${ROCM_VERSION_DEV_INT}")
|
||||
|
||||
math(EXPR TORCH_HIP_VERSION "(${HIP_VERSION_MAJOR} * 100) + ${HIP_VERSION_MINOR}")
|
||||
math(EXPR TORCH_HIP_VERSION
|
||||
"(${HIP_VERSION_MAJOR} * 100) + ${HIP_VERSION_MINOR}")
|
||||
message("HIP_VERSION_MAJOR: ${HIP_VERSION_MAJOR}")
|
||||
message("HIP_VERSION_MINOR: ${HIP_VERSION_MINOR}")
|
||||
message("TORCH_HIP_VERSION: ${TORCH_HIP_VERSION}")
|
||||
|
||||
# Find ROCM components using Config mode
|
||||
# These components will be searced for recursively in ${ROCM_PATH}
|
||||
# Find ROCM components using Config mode These components will be searced for
|
||||
# recursively in ${ROCM_PATH}
|
||||
message("\n***** Library versions from cmake find_package *****\n")
|
||||
find_package_and_print_version(amd_comgr REQUIRED)
|
||||
find_package_and_print_version(rocrand REQUIRED)
|
||||
@ -187,7 +204,7 @@ if(HIP_FOUND)
|
||||
endif()
|
||||
|
||||
# Optional components.
|
||||
find_package_and_print_version(hipsparselt) # Will be required when ready.
|
||||
find_package_and_print_version(hipsparselt) # Will be required when ready.
|
||||
|
||||
list(REMOVE_DUPLICATES ROCM_INCLUDE_DIRS)
|
||||
|
||||
@ -198,32 +215,40 @@ if(HIP_FOUND)
|
||||
set(PROJECT_RANDOM_BINARY_DIR "${PROJECT_BINARY_DIR}")
|
||||
|
||||
if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "5.7.0")
|
||||
# check whether hipblaslt provides HIPBLASLT_MATMUL_MATRIX_SCALE_OUTER_VEC_32F
|
||||
# check whether hipblaslt provides
|
||||
# HIPBLASLT_MATMUL_MATRIX_SCALE_OUTER_VEC_32F
|
||||
set(file "${PROJECT_BINARY_DIR}/hipblaslt_test_outer_vec.cc")
|
||||
file(WRITE ${file} ""
|
||||
file(
|
||||
WRITE ${file}
|
||||
""
|
||||
"#define LEGACY_HIPBLAS_DIRECT\n"
|
||||
"#include <hipblaslt/hipblaslt.h>\n"
|
||||
"int main() {\n"
|
||||
" hipblasLtMatmulMatrixScale_t attr = HIPBLASLT_MATMUL_MATRIX_SCALE_OUTER_VEC_32F;\n"
|
||||
" return 0;\n"
|
||||
"}\n"
|
||||
)
|
||||
try_compile(hipblaslt_compile_result_outer_vec ${PROJECT_RANDOM_BINARY_DIR} ${file}
|
||||
"}\n")
|
||||
try_compile(
|
||||
hipblaslt_compile_result_outer_vec ${PROJECT_RANDOM_BINARY_DIR}
|
||||
${file}
|
||||
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${ROCM_INCLUDE_DIRS}"
|
||||
COMPILE_DEFINITIONS -D__HIP_PLATFORM_AMD__ -D__HIP_PLATFORM_HCC__
|
||||
OUTPUT_VARIABLE hipblaslt_compile_output_outer_vec)
|
||||
|
||||
# check whether hipblaslt provides HIPBLASLT_MATMUL_DESC_A_SCALE_POINTER_VEC_EXT
|
||||
# check whether hipblaslt provides
|
||||
# HIPBLASLT_MATMUL_DESC_A_SCALE_POINTER_VEC_EXT
|
||||
set(file "${PROJECT_BINARY_DIR}/hipblaslt_test_vec_ext.cc")
|
||||
file(WRITE ${file} ""
|
||||
file(
|
||||
WRITE ${file}
|
||||
""
|
||||
"#define LEGACY_HIPBLAS_DIRECT\n"
|
||||
"#include <hipblaslt/hipblaslt.h>\n"
|
||||
"int main() {\n"
|
||||
" hipblasLtMatmulDescAttributes_t attr = HIPBLASLT_MATMUL_DESC_A_SCALE_POINTER_VEC_EXT;\n"
|
||||
" return 0;\n"
|
||||
"}\n"
|
||||
)
|
||||
try_compile(hipblaslt_compile_result_vec_ext ${PROJECT_RANDOM_BINARY_DIR} ${file}
|
||||
"}\n")
|
||||
try_compile(
|
||||
hipblaslt_compile_result_vec_ext ${PROJECT_RANDOM_BINARY_DIR}
|
||||
${file}
|
||||
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${ROCM_INCLUDE_DIRS}"
|
||||
COMPILE_DEFINITIONS -D__HIP_PLATFORM_AMD__ -D__HIP_PLATFORM_HCC__
|
||||
OUTPUT_VARIABLE hipblaslt_compile_output_vec_ext)
|
||||
@ -239,8 +264,12 @@ if(HIP_FOUND)
|
||||
else()
|
||||
set(HIPBLASLT_OUTER_VEC OFF)
|
||||
set(HIPBLASLT_VEC_EXT OFF)
|
||||
message("hipblaslt is NOT using scale pointer outer vec: ${hipblaslt_compile_output_outer_vec}")
|
||||
message("hipblaslt is NOT using scale pointer vec ext: ${hipblaslt_compile_output_vec_ext}")
|
||||
message(
|
||||
"hipblaslt is NOT using scale pointer outer vec: ${hipblaslt_compile_output_outer_vec}"
|
||||
)
|
||||
message(
|
||||
"hipblaslt is NOT using scale pointer vec ext: ${hipblaslt_compile_output_vec_ext}"
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -6,46 +6,47 @@ if(TARGET torch::cudart)
|
||||
endif()
|
||||
|
||||
# sccache is only supported in CMake master and not in the newest official
|
||||
# release (3.11.3) yet. Hence we need our own Modules_CUDA_fix to enable sccache.
|
||||
# release (3.11.3) yet. Hence we need our own Modules_CUDA_fix to enable
|
||||
# sccache.
|
||||
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/../Modules_CUDA_fix)
|
||||
|
||||
# We don't want to statically link cudart, because we rely on it's dynamic linkage in
|
||||
# python (follow along torch/cuda/__init__.py and usage of cudaGetErrorName).
|
||||
# Technically, we can link cudart here statically, and link libtorch_python.so
|
||||
# to a dynamic libcudart.so, but that's just wasteful.
|
||||
# However, on Windows, if this one gets switched off, the error "cuda: unknown error"
|
||||
# will be raised when running the following code:
|
||||
# >>> import torch
|
||||
# >>> torch.cuda.is_available()
|
||||
# >>> torch.cuda.current_device()
|
||||
# More details can be found in the following links.
|
||||
# https://github.com/pytorch/pytorch/issues/20635
|
||||
# We don't want to statically link cudart, because we rely on it's dynamic
|
||||
# linkage in python (follow along torch/cuda/__init__.py and usage of
|
||||
# cudaGetErrorName). Technically, we can link cudart here statically, and link
|
||||
# libtorch_python.so to a dynamic libcudart.so, but that's just wasteful.
|
||||
# However, on Windows, if this one gets switched off, the error "cuda: unknown
|
||||
# error" will be raised when running the following code: >>> import torch >>>
|
||||
# torch.cuda.is_available() >>> torch.cuda.current_device() More details can be
|
||||
# found in the following links. https://github.com/pytorch/pytorch/issues/20635
|
||||
# https://github.com/pytorch/pytorch/issues/17108
|
||||
if(NOT MSVC)
|
||||
set(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE INTERNAL "")
|
||||
set(CUDA_USE_STATIC_CUDA_RUNTIME
|
||||
OFF
|
||||
CACHE INTERNAL "")
|
||||
endif()
|
||||
|
||||
# Find CUDA.
|
||||
find_package(CUDA)
|
||||
if(NOT CUDA_FOUND)
|
||||
message(WARNING
|
||||
"PyTorch: CUDA cannot be found. Depending on whether you are building "
|
||||
"PyTorch or a PyTorch dependent library, the next warning / error will "
|
||||
"give you more info.")
|
||||
message(
|
||||
WARNING
|
||||
"PyTorch: CUDA cannot be found. Depending on whether you are building "
|
||||
"PyTorch or a PyTorch dependent library, the next warning / error will "
|
||||
"give you more info.")
|
||||
set(CAFFE2_USE_CUDA OFF)
|
||||
return()
|
||||
endif()
|
||||
|
||||
# Enable CUDA language support
|
||||
set(CUDAToolkit_ROOT "${CUDA_TOOLKIT_ROOT_DIR}")
|
||||
# Pass clang as host compiler, which according to the docs
|
||||
# Must be done before CUDA language is enabled, see
|
||||
# Pass clang as host compiler, which according to the docs Must be done before
|
||||
# CUDA language is enabled, see
|
||||
# https://cmake.org/cmake/help/v3.15/variable/CMAKE_CUDA_HOST_COMPILER.html
|
||||
if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
|
||||
set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}")
|
||||
endif()
|
||||
enable_language(CUDA)
|
||||
if("X${CMAKE_CUDA_STANDARD}" STREQUAL "X" )
|
||||
if("X${CMAKE_CUDA_STANDARD}" STREQUAL "X")
|
||||
set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD})
|
||||
endif()
|
||||
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
|
||||
@ -61,9 +62,11 @@ find_package(CUDAToolkit REQUIRED)
|
||||
cmake_policy(POP)
|
||||
|
||||
if(NOT CMAKE_CUDA_COMPILER_VERSION VERSION_EQUAL CUDAToolkit_VERSION)
|
||||
message(FATAL_ERROR "Found two conflicting CUDA versions:\n"
|
||||
"V${CMAKE_CUDA_COMPILER_VERSION} in '${CUDA_INCLUDE_DIRS}' and\n"
|
||||
"V${CUDAToolkit_VERSION} in '${CUDAToolkit_INCLUDE_DIRS}'")
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"Found two conflicting CUDA versions:\n"
|
||||
"V${CMAKE_CUDA_COMPILER_VERSION} in '${CUDA_INCLUDE_DIRS}' and\n"
|
||||
"V${CUDAToolkit_VERSION} in '${CUDAToolkit_INCLUDE_DIRS}'")
|
||||
endif()
|
||||
|
||||
message(STATUS "PyTorch: CUDA detected: " ${CUDA_VERSION})
|
||||
@ -74,10 +77,10 @@ if(CUDA_VERSION VERSION_LESS 12.0)
|
||||
endif()
|
||||
|
||||
if(CUDA_FOUND)
|
||||
# Sometimes, we may mismatch nvcc with the CUDA headers we are
|
||||
# compiling with, e.g., if a ccache nvcc is fed to us by CUDA_NVCC_EXECUTABLE
|
||||
# but the PATH is not consistent with CUDA_HOME. It's better safe
|
||||
# than sorry: make sure everything is consistent.
|
||||
# Sometimes, we may mismatch nvcc with the CUDA headers we are compiling with,
|
||||
# e.g., if a ccache nvcc is fed to us by CUDA_NVCC_EXECUTABLE but the PATH is
|
||||
# not consistent with CUDA_HOME. It's better safe than sorry: make sure
|
||||
# everything is consistent.
|
||||
if(MSVC AND CMAKE_GENERATOR MATCHES "Visual Studio")
|
||||
# When using Visual Studio, it attempts to lock the whole binary dir when
|
||||
# `try_run` is called, which will cause the build to fail.
|
||||
@ -87,40 +90,44 @@ if(CUDA_FOUND)
|
||||
set(PROJECT_RANDOM_BINARY_DIR "${PROJECT_BINARY_DIR}")
|
||||
endif()
|
||||
set(file "${PROJECT_BINARY_DIR}/detect_cuda_version.cc")
|
||||
file(WRITE ${file} ""
|
||||
file(
|
||||
WRITE ${file}
|
||||
""
|
||||
"#include <cuda.h>\n"
|
||||
"#include <cstdio>\n"
|
||||
"int main() {\n"
|
||||
" printf(\"%d.%d\", CUDA_VERSION / 1000, (CUDA_VERSION / 10) % 100);\n"
|
||||
" return 0;\n"
|
||||
"}\n"
|
||||
)
|
||||
"}\n")
|
||||
if(NOT CMAKE_CROSSCOMPILING)
|
||||
try_run(run_result compile_result ${PROJECT_RANDOM_BINARY_DIR} ${file}
|
||||
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CUDA_INCLUDE_DIRS}"
|
||||
LINK_LIBRARIES ${CUDA_LIBRARIES}
|
||||
try_run(
|
||||
run_result compile_result ${PROJECT_RANDOM_BINARY_DIR} ${file}
|
||||
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CUDA_INCLUDE_DIRS}" LINK_LIBRARIES
|
||||
${CUDA_LIBRARIES}
|
||||
RUN_OUTPUT_VARIABLE cuda_version_from_header
|
||||
COMPILE_OUTPUT_VARIABLE output_var
|
||||
)
|
||||
COMPILE_OUTPUT_VARIABLE output_var)
|
||||
if(NOT compile_result)
|
||||
message(FATAL_ERROR "PyTorch: Couldn't determine version from header: " ${output_var})
|
||||
message(FATAL_ERROR "PyTorch: Couldn't determine version from header: "
|
||||
${output_var})
|
||||
endif()
|
||||
message(STATUS "PyTorch: Header version is: " ${cuda_version_from_header})
|
||||
if(NOT cuda_version_from_header STREQUAL ${CUDA_VERSION_STRING})
|
||||
# Force CUDA to be processed for again next time
|
||||
# TODO: I'm not sure if this counts as an implementation detail of
|
||||
# FindCUDA
|
||||
# Force CUDA to be processed for again next time TODO: I'm not sure if
|
||||
# this counts as an implementation detail of FindCUDA
|
||||
set(cuda_version_from_findcuda ${CUDA_VERSION_STRING})
|
||||
unset(CUDA_TOOLKIT_ROOT_DIR_INTERNAL CACHE)
|
||||
# Not strictly necessary, but for good luck.
|
||||
unset(CUDA_VERSION CACHE)
|
||||
# Error out
|
||||
message(FATAL_ERROR "FindCUDA says CUDA version is ${cuda_version_from_findcuda} (usually determined by nvcc), "
|
||||
"but the CUDA headers say the version is ${cuda_version_from_header}. This often occurs "
|
||||
"when you set both CUDA_HOME and CUDA_NVCC_EXECUTABLE to "
|
||||
"non-standard locations, without also setting PATH to point to the correct nvcc. "
|
||||
"Perhaps, try re-running this command again with PATH=${CUDA_TOOLKIT_ROOT_DIR}/bin:$PATH. "
|
||||
"See above log messages for more diagnostics, and see https://github.com/pytorch/pytorch/issues/8092 for more details.")
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"FindCUDA says CUDA version is ${cuda_version_from_findcuda} (usually determined by nvcc), "
|
||||
"but the CUDA headers say the version is ${cuda_version_from_header}. This often occurs "
|
||||
"when you set both CUDA_HOME and CUDA_NVCC_EXECUTABLE to "
|
||||
"non-standard locations, without also setting PATH to point to the correct nvcc. "
|
||||
"Perhaps, try re-running this command again with PATH=${CUDA_TOOLKIT_ROOT_DIR}/bin:$PATH. "
|
||||
"See above log messages for more diagnostics, and see https://github.com/pytorch/pytorch/issues/8092 for more details."
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
@ -128,12 +135,15 @@ endif()
|
||||
# ---[ CUDA libraries wrapper
|
||||
|
||||
# find lbnvrtc.so
|
||||
set(CUDA_NVRTC_LIB "${CUDA_nvrtc_LIBRARY}" CACHE FILEPATH "")
|
||||
set(CUDA_NVRTC_LIB
|
||||
"${CUDA_nvrtc_LIBRARY}"
|
||||
CACHE FILEPATH "")
|
||||
if(CUDA_NVRTC_LIB AND NOT CUDA_NVRTC_SHORTHASH)
|
||||
find_package(Python COMPONENTS Interpreter)
|
||||
execute_process(
|
||||
COMMAND Python::Interpreter -c
|
||||
"import hashlib;hash=hashlib.sha256();hash.update(open('${CUDA_NVRTC_LIB}','rb').read());print(hash.hexdigest()[:8])"
|
||||
COMMAND
|
||||
Python::Interpreter -c
|
||||
"import hashlib;hash=hashlib.sha256();hash.update(open('${CUDA_NVRTC_LIB}','rb').read());print(hash.hexdigest()[:8])"
|
||||
RESULT_VARIABLE _retval
|
||||
OUTPUT_VARIABLE CUDA_NVRTC_SHORTHASH)
|
||||
if(NOT _retval EQUAL 0)
|
||||
@ -145,61 +155,60 @@ if(CUDA_NVRTC_LIB AND NOT CUDA_NVRTC_SHORTHASH)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Create new style imported libraries.
|
||||
# Several of these libraries have a hardcoded path if CAFFE2_STATIC_LINK_CUDA
|
||||
# is set. This path is where sane CUDA installations have their static
|
||||
# libraries installed. This flag should only be used for binary builds, so
|
||||
# end-users should never have this flag set.
|
||||
# Create new style imported libraries. Several of these libraries have a
|
||||
# hardcoded path if CAFFE2_STATIC_LINK_CUDA is set. This path is where sane CUDA
|
||||
# installations have their static libraries installed. This flag should only be
|
||||
# used for binary builds, so end-users should never have this flag set.
|
||||
|
||||
# cuda
|
||||
add_library(caffe2::cuda INTERFACE IMPORTED)
|
||||
set_property(
|
||||
TARGET caffe2::cuda PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::cuda_driver)
|
||||
set_property(TARGET caffe2::cuda PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::cuda_driver)
|
||||
|
||||
# cudart
|
||||
add_library(torch::cudart INTERFACE IMPORTED)
|
||||
if(CAFFE2_STATIC_LINK_CUDA)
|
||||
set_property(
|
||||
TARGET torch::cudart PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::cudart_static)
|
||||
set_property(TARGET torch::cudart PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::cudart_static)
|
||||
else()
|
||||
set_property(
|
||||
TARGET torch::cudart PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::cudart)
|
||||
set_property(TARGET torch::cudart PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::cudart)
|
||||
endif()
|
||||
|
||||
|
||||
# cublas
|
||||
add_library(caffe2::cublas INTERFACE IMPORTED)
|
||||
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
|
||||
set_property(
|
||||
TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
# NOTE: cublas is always linked dynamically
|
||||
CUDA::cublas CUDA::cublasLt)
|
||||
set_property(
|
||||
TARGET caffe2::cublas APPEND PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::cudart_static rt)
|
||||
set_property(
|
||||
TARGET caffe2::cublas
|
||||
PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
# NOTE: cublas is always linked dynamically
|
||||
CUDA::cublas CUDA::cublasLt)
|
||||
set_property(
|
||||
TARGET caffe2::cublas
|
||||
APPEND
|
||||
PROPERTY INTERFACE_LINK_LIBRARIES CUDA::cudart_static rt)
|
||||
else()
|
||||
set_property(
|
||||
TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::cublas CUDA::cublasLt)
|
||||
set_property(TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::cublas CUDA::cublasLt)
|
||||
endif()
|
||||
|
||||
# cudnn interface
|
||||
# static linking is handled by USE_STATIC_CUDNN environment variable
|
||||
# cudnn interface static linking is handled by USE_STATIC_CUDNN environment
|
||||
# variable
|
||||
if(CAFFE2_USE_CUDNN)
|
||||
if(USE_STATIC_CUDNN)
|
||||
set(CUDNN_STATIC ON CACHE BOOL "")
|
||||
set(CUDNN_STATIC
|
||||
ON
|
||||
CACHE BOOL "")
|
||||
else()
|
||||
set(CUDNN_STATIC OFF CACHE BOOL "")
|
||||
set(CUDNN_STATIC
|
||||
OFF
|
||||
CACHE BOOL "")
|
||||
endif()
|
||||
|
||||
find_package(CUDNN)
|
||||
|
||||
if(NOT CUDNN_FOUND)
|
||||
message(WARNING
|
||||
"Cannot find cuDNN library. Turning the option off")
|
||||
message(WARNING "Cannot find cuDNN library. Turning the option off")
|
||||
set(CAFFE2_USE_CUDNN OFF)
|
||||
else()
|
||||
if(CUDNN_VERSION VERSION_LESS "8.1.0")
|
||||
@ -211,7 +220,7 @@ if(CAFFE2_USE_CUDNN)
|
||||
target_include_directories(torch::cudnn INTERFACE ${CUDNN_INCLUDE_PATH})
|
||||
if(CUDNN_STATIC AND NOT WIN32)
|
||||
target_link_options(torch::cudnn INTERFACE
|
||||
"-Wl,--exclude-libs,libcudnn_static.a")
|
||||
"-Wl,--exclude-libs,libcudnn_static.a")
|
||||
else()
|
||||
target_link_libraries(torch::cudnn INTERFACE ${CUDNN_LIBRARY_PATH})
|
||||
endif()
|
||||
@ -223,24 +232,25 @@ if(CAFFE2_USE_CUSPARSELT)
|
||||
find_package(CUSPARSELT)
|
||||
|
||||
if(NOT CUSPARSELT_FOUND)
|
||||
message(WARNING
|
||||
"Cannot find cuSPARSELt library. Turning the option off")
|
||||
message(WARNING "Cannot find cuSPARSELt library. Turning the option off")
|
||||
set(CAFFE2_USE_CUSPARSELT OFF)
|
||||
else()
|
||||
add_library(torch::cusparselt INTERFACE IMPORTED)
|
||||
target_include_directories(torch::cusparselt INTERFACE ${CUSPARSELT_INCLUDE_PATH})
|
||||
target_link_libraries(torch::cusparselt INTERFACE ${CUSPARSELT_LIBRARY_PATH})
|
||||
target_include_directories(torch::cusparselt
|
||||
INTERFACE ${CUSPARSELT_INCLUDE_PATH})
|
||||
target_link_libraries(torch::cusparselt
|
||||
INTERFACE ${CUSPARSELT_LIBRARY_PATH})
|
||||
endif()
|
||||
else()
|
||||
message(STATUS "USE_CUSPARSELT is set to 0. Compiling without cuSPARSELt support")
|
||||
message(
|
||||
STATUS "USE_CUSPARSELT is set to 0. Compiling without cuSPARSELt support")
|
||||
endif()
|
||||
|
||||
if(USE_CUDSS)
|
||||
find_package(CUDSS)
|
||||
|
||||
if(NOT CUDSS_FOUND)
|
||||
message(WARNING
|
||||
"Cannot find CUDSS library. Turning the option off")
|
||||
message(WARNING "Cannot find CUDSS library. Turning the option off")
|
||||
set(USE_CUDSS OFF)
|
||||
else()
|
||||
add_library(torch::cudss INTERFACE IMPORTED)
|
||||
@ -255,13 +265,11 @@ endif()
|
||||
if(CAFFE2_USE_CUFILE)
|
||||
add_library(torch::cufile INTERFACE IMPORTED)
|
||||
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
|
||||
set_property(
|
||||
TARGET torch::cufile PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::cuFile_static)
|
||||
set_property(TARGET torch::cufile PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::cuFile_static)
|
||||
else()
|
||||
set_property(
|
||||
TARGET torch::cufile PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::cuFile)
|
||||
set_property(TARGET torch::cufile PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::cuFile)
|
||||
endif()
|
||||
else()
|
||||
message(STATUS "USE_CUFILE is set to 0. Compiling without cuFile support")
|
||||
@ -270,32 +278,27 @@ endif()
|
||||
# curand
|
||||
add_library(caffe2::curand INTERFACE IMPORTED)
|
||||
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
|
||||
set_property(
|
||||
TARGET caffe2::curand PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::curand_static)
|
||||
set_property(TARGET caffe2::curand PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::curand_static)
|
||||
else()
|
||||
set_property(
|
||||
TARGET caffe2::curand PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::curand)
|
||||
set_property(TARGET caffe2::curand PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::curand)
|
||||
endif()
|
||||
|
||||
# cufft
|
||||
add_library(caffe2::cufft INTERFACE IMPORTED)
|
||||
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
|
||||
set_property(
|
||||
TARGET caffe2::cufft PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::cufft_static_nocallback)
|
||||
set_property(TARGET caffe2::cufft PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::cufft_static_nocallback)
|
||||
else()
|
||||
set_property(
|
||||
TARGET caffe2::cufft PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::cufft)
|
||||
set_property(TARGET caffe2::cufft PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::cufft)
|
||||
endif()
|
||||
|
||||
# nvrtc
|
||||
add_library(caffe2::nvrtc INTERFACE IMPORTED)
|
||||
set_property(
|
||||
TARGET caffe2::nvrtc PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
CUDA::nvrtc caffe2::cuda)
|
||||
set_property(TARGET caffe2::nvrtc PROPERTY INTERFACE_LINK_LIBRARIES CUDA::nvrtc
|
||||
caffe2::cuda)
|
||||
|
||||
# Add onnx namespace definition to nvcc
|
||||
if(ONNX_NAMESPACE)
|
||||
@ -304,19 +307,23 @@ else()
|
||||
list(APPEND CUDA_NVCC_FLAGS "-DONNX_NAMESPACE=onnx_c2")
|
||||
endif()
|
||||
|
||||
# Don't activate VC env again for Ninja generators with MSVC on Windows if CUDAHOSTCXX is not defined
|
||||
# by adding --use-local-env.
|
||||
if(MSVC AND CMAKE_GENERATOR STREQUAL "Ninja" AND NOT DEFINED ENV{CUDAHOSTCXX})
|
||||
# Don't activate VC env again for Ninja generators with MSVC on Windows if
|
||||
# CUDAHOSTCXX is not defined by adding --use-local-env.
|
||||
if(MSVC
|
||||
AND CMAKE_GENERATOR STREQUAL "Ninja"
|
||||
AND NOT DEFINED ENV{CUDAHOSTCXX})
|
||||
list(APPEND CUDA_NVCC_FLAGS "--use-local-env")
|
||||
endif()
|
||||
|
||||
# setting nvcc arch flags
|
||||
torch_cuda_get_nvcc_gencode_flag(NVCC_FLAGS_EXTRA)
|
||||
# CMake 3.18 adds integrated support for architecture selection, but we can't rely on it
|
||||
# CMake 3.18 adds integrated support for architecture selection, but we can't
|
||||
# rely on it
|
||||
if(DEFINED CMAKE_CUDA_ARCHITECTURES)
|
||||
message(WARNING
|
||||
"pytorch is not compatible with `CMAKE_CUDA_ARCHITECTURES` and will ignore its value. "
|
||||
"Please configure `TORCH_CUDA_ARCH_LIST` instead.")
|
||||
message(
|
||||
WARNING
|
||||
"pytorch is not compatible with `CMAKE_CUDA_ARCHITECTURES` and will ignore its value. "
|
||||
"Please configure `TORCH_CUDA_ARCH_LIST` instead.")
|
||||
set(CMAKE_CUDA_ARCHITECTURES OFF)
|
||||
endif()
|
||||
|
||||
@ -324,12 +331,11 @@ list(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA})
|
||||
message(STATUS "Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA}")
|
||||
|
||||
# disable some nvcc diagnostic that appears in boost, glog, glags, opencv, etc.
|
||||
foreach(diag cc_clobber_ignored
|
||||
field_without_dll_interface
|
||||
base_class_has_different_dll_interface
|
||||
dll_interface_conflict_none_assumed
|
||||
dll_interface_conflict_dllexport_assumed
|
||||
bad_friend_decl)
|
||||
foreach(
|
||||
diag
|
||||
cc_clobber_ignored field_without_dll_interface
|
||||
base_class_has_different_dll_interface dll_interface_conflict_none_assumed
|
||||
dll_interface_conflict_dllexport_assumed bad_friend_decl)
|
||||
list(APPEND SUPPRESS_WARNING_FLAGS --diag_suppress=${diag})
|
||||
endforeach()
|
||||
string(REPLACE ";" "," SUPPRESS_WARNING_FLAGS "${SUPPRESS_WARNING_FLAGS}")
|
||||
@ -358,7 +364,8 @@ if(MSVC)
|
||||
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-FS")
|
||||
endif()
|
||||
elseif(CUDA_DEVICE_DEBUG)
|
||||
list(APPEND CUDA_NVCC_FLAGS "-g" "-G") # -G enables device code debugging symbols
|
||||
list(APPEND CUDA_NVCC_FLAGS "-g" "-G") # -G enables device code debugging
|
||||
# symbols
|
||||
endif()
|
||||
|
||||
# Set expt-relaxed-constexpr to suppress Eigen warnings
|
||||
|
||||
@ -11,72 +11,67 @@ if(TARGET gflags)
|
||||
elseif(GFLAGS_FOUND)
|
||||
message(STATUS "Caffe2: Found gflags with old-style gflag starget.")
|
||||
add_library(gflags UNKNOWN IMPORTED)
|
||||
set_property(
|
||||
TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARY})
|
||||
set_property(
|
||||
TARGET gflags PROPERTY INTERFACE_INCLUDE_DIRECTORIES
|
||||
${GFLAGS_INCLUDE_DIR})
|
||||
set_property(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARY})
|
||||
set_property(TARGET gflags PROPERTY INTERFACE_INCLUDE_DIRECTORIES
|
||||
${GFLAGS_INCLUDE_DIR})
|
||||
else()
|
||||
message(STATUS
|
||||
"Caffe2: Cannot find gflags automatically. Using legacy find.")
|
||||
message(STATUS "Caffe2: Cannot find gflags automatically. Using legacy find.")
|
||||
|
||||
# - Try to find GFLAGS in the legacy way.
|
||||
# * Try to find GFLAGS in the legacy way.
|
||||
#
|
||||
# The following variables are optionally searched for defaults
|
||||
# GFLAGS_ROOT_DIR: Base directory where all GFLAGS components are found
|
||||
# GFLAGS_ROOT_DIR: Base directory where all GFLAGS components are found
|
||||
#
|
||||
# The following are set after configuration is done:
|
||||
# GFLAGS_FOUND
|
||||
# GFLAGS_INCLUDE_DIRS
|
||||
# GFLAGS_LIBRARIES
|
||||
# GFLAGS_LIBRARYRARY_DIRS
|
||||
# The following are set after configuration is done: GFLAGS_FOUND
|
||||
# GFLAGS_INCLUDE_DIRS GFLAGS_LIBRARIES GFLAGS_LIBRARYRARY_DIRS
|
||||
include(FindPackageHandleStandardArgs)
|
||||
set(GFLAGS_ROOT_DIR "" CACHE PATH "Folder contains Gflags")
|
||||
set(GFLAGS_ROOT_DIR
|
||||
""
|
||||
CACHE PATH "Folder contains Gflags")
|
||||
|
||||
# We are testing only a couple of files in the include directories
|
||||
if(WIN32)
|
||||
find_path(GFLAGS_INCLUDE_DIR gflags/gflags.h
|
||||
PATHS ${GFLAGS_ROOT_DIR}/src/windows)
|
||||
PATHS ${GFLAGS_ROOT_DIR}/src/windows)
|
||||
else()
|
||||
find_path(GFLAGS_INCLUDE_DIR gflags/gflags.h
|
||||
PATHS ${GFLAGS_ROOT_DIR})
|
||||
find_path(GFLAGS_INCLUDE_DIR gflags/gflags.h PATHS ${GFLAGS_ROOT_DIR})
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
find_library(GFLAGS_LIBRARY_RELEASE
|
||||
NAMES libgflags
|
||||
PATHS ${GFLAGS_ROOT_DIR}
|
||||
PATH_SUFFIXES Release)
|
||||
find_library(
|
||||
GFLAGS_LIBRARY_RELEASE
|
||||
NAMES libgflags
|
||||
PATHS ${GFLAGS_ROOT_DIR}
|
||||
PATH_SUFFIXES Release)
|
||||
|
||||
find_library(GFLAGS_LIBRARY_DEBUG
|
||||
NAMES libgflags-debug
|
||||
PATHS ${GFLAGS_ROOT_DIR}
|
||||
PATH_SUFFIXES Debug)
|
||||
set(GFLAGS_LIBRARY optimized ${GFLAGS_LIBRARY_RELEASE} debug ${GFLAGS_LIBRARY_DEBUG})
|
||||
find_library(
|
||||
GFLAGS_LIBRARY_DEBUG
|
||||
NAMES libgflags-debug
|
||||
PATHS ${GFLAGS_ROOT_DIR}
|
||||
PATH_SUFFIXES Debug)
|
||||
set(GFLAGS_LIBRARY optimized ${GFLAGS_LIBRARY_RELEASE} debug
|
||||
${GFLAGS_LIBRARY_DEBUG})
|
||||
else()
|
||||
find_library(GFLAGS_LIBRARY gflags)
|
||||
endif()
|
||||
|
||||
find_package_handle_standard_args(
|
||||
gflags DEFAULT_MSG GFLAGS_INCLUDE_DIR GFLAGS_LIBRARY)
|
||||
find_package_handle_standard_args(gflags DEFAULT_MSG GFLAGS_INCLUDE_DIR
|
||||
GFLAGS_LIBRARY)
|
||||
|
||||
if(GFLAGS_FOUND)
|
||||
message(
|
||||
STATUS
|
||||
"Caffe2: Found gflags (include: ${GFLAGS_INCLUDE_DIR}, "
|
||||
"library: ${GFLAGS_LIBRARY})")
|
||||
message(STATUS "Caffe2: Found gflags (include: ${GFLAGS_INCLUDE_DIR}, "
|
||||
"library: ${GFLAGS_LIBRARY})")
|
||||
add_library(gflags UNKNOWN IMPORTED)
|
||||
set_property(
|
||||
TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARY})
|
||||
set_property(
|
||||
TARGET gflags PROPERTY INTERFACE_INCLUDE_DIRECTORIES
|
||||
${GFLAGS_INCLUDE_DIR})
|
||||
set_property(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARY})
|
||||
set_property(TARGET gflags PROPERTY INTERFACE_INCLUDE_DIRECTORIES
|
||||
${GFLAGS_INCLUDE_DIR})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# After above, we should have the gflags target now.
|
||||
if(NOT TARGET gflags)
|
||||
message(WARNING
|
||||
message(
|
||||
WARNING
|
||||
"Caffe2: gflags cannot be found. Depending on whether you are building "
|
||||
"Caffe2 or a Caffe2 dependent library, the next warning / error will "
|
||||
"give you more info.")
|
||||
|
||||
@ -10,60 +10,56 @@ if(TARGET glog::glog)
|
||||
message(STATUS "Caffe2: Found glog with new-style glog target.")
|
||||
elseif(GLOG_FOUND)
|
||||
message(
|
||||
STATUS
|
||||
STATUS
|
||||
"Caffe2: Found glog with old-style glog starget. Glog never shipped "
|
||||
"old style glog targets, so somewhere in your cmake path there might "
|
||||
"be a custom Findglog.cmake file that got triggered. We will make a "
|
||||
"best effort to create the new style glog target for you.")
|
||||
add_library(glog::glog UNKNOWN IMPORTED)
|
||||
set_property(
|
||||
TARGET glog::glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARY})
|
||||
set_property(
|
||||
TARGET glog::glog PROPERTY INTERFACE_INCLUDE_DIRECTORIES
|
||||
${GLOG_INCLUDE_DIR})
|
||||
set_property(TARGET glog::glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARY})
|
||||
set_property(TARGET glog::glog PROPERTY INTERFACE_INCLUDE_DIRECTORIES
|
||||
${GLOG_INCLUDE_DIR})
|
||||
else()
|
||||
message(STATUS "Caffe2: Cannot find glog automatically. Using legacy find.")
|
||||
|
||||
# - Try to find Glog
|
||||
# * Try to find Glog
|
||||
#
|
||||
# The following variables are optionally searched for defaults
|
||||
# GLOG_ROOT_DIR: Base directory where all GLOG components are found
|
||||
# The following variables are optionally searched for defaults GLOG_ROOT_DIR:
|
||||
# Base directory where all GLOG components are found
|
||||
#
|
||||
# The following are set after configuration is done:
|
||||
# GLOG_FOUND
|
||||
# GLOG_INCLUDE_DIRS
|
||||
# GLOG_LIBRARIES
|
||||
# GLOG_LIBRARYRARY_DIRS
|
||||
# The following are set after configuration is done: GLOG_FOUND
|
||||
# GLOG_INCLUDE_DIRS GLOG_LIBRARIES GLOG_LIBRARYRARY_DIRS
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
set(GLOG_ROOT_DIR "" CACHE PATH "Folder contains Google glog")
|
||||
set(GLOG_ROOT_DIR
|
||||
""
|
||||
CACHE PATH "Folder contains Google glog")
|
||||
if(NOT WIN32)
|
||||
find_path(GLOG_INCLUDE_DIR glog/logging.h
|
||||
PATHS ${GLOG_ROOT_DIR})
|
||||
find_path(GLOG_INCLUDE_DIR glog/logging.h PATHS ${GLOG_ROOT_DIR})
|
||||
endif()
|
||||
|
||||
find_library(GLOG_LIBRARY glog
|
||||
PATHS ${GLOG_ROOT_DIR}
|
||||
PATH_SUFFIXES lib lib64)
|
||||
find_library(
|
||||
GLOG_LIBRARY glog
|
||||
PATHS ${GLOG_ROOT_DIR}
|
||||
PATH_SUFFIXES lib lib64)
|
||||
|
||||
find_package_handle_standard_args(glog DEFAULT_MSG GLOG_INCLUDE_DIR GLOG_LIBRARY)
|
||||
find_package_handle_standard_args(glog DEFAULT_MSG GLOG_INCLUDE_DIR
|
||||
GLOG_LIBRARY)
|
||||
|
||||
if(GLOG_FOUND)
|
||||
message(STATUS
|
||||
"Caffe2: Found glog (include: ${GLOG_INCLUDE_DIR}, "
|
||||
"library: ${GLOG_LIBRARY})")
|
||||
message(STATUS "Caffe2: Found glog (include: ${GLOG_INCLUDE_DIR}, "
|
||||
"library: ${GLOG_LIBRARY})")
|
||||
add_library(glog::glog UNKNOWN IMPORTED)
|
||||
set_property(
|
||||
TARGET glog::glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARY})
|
||||
set_property(
|
||||
TARGET glog::glog PROPERTY INTERFACE_INCLUDE_DIRECTORIES
|
||||
${GLOG_INCLUDE_DIR})
|
||||
set_property(TARGET glog::glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARY})
|
||||
set_property(TARGET glog::glog PROPERTY INTERFACE_INCLUDE_DIRECTORIES
|
||||
${GLOG_INCLUDE_DIR})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# After above, we should have the glog::glog target now.
|
||||
if(NOT TARGET glog::glog)
|
||||
message(WARNING
|
||||
message(
|
||||
WARNING
|
||||
"Caffe2: glog cannot be found. Depending on whether you are building "
|
||||
"Caffe2 or a Caffe2 dependent library, the next warning / error will "
|
||||
"give you more info.")
|
||||
|
||||
@ -16,11 +16,12 @@ foreach(MKL_LIB IN LISTS MKL_LIBRARIES)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# TODO: This is a hack, it will not pick up architecture dependent
|
||||
# MKL libraries correctly; see https://github.com/pytorch/pytorch/issues/73008
|
||||
# TODO: This is a hack, it will not pick up architecture dependent MKL libraries
|
||||
# correctly; see https://github.com/pytorch/pytorch/issues/73008
|
||||
set_property(
|
||||
TARGET caffe2::mkl PROPERTY INTERFACE_LINK_DIRECTORIES
|
||||
${MKL_ROOT}/lib ${MKL_ROOT}/lib/intel64 ${MKL_ROOT}/lib/intel64_win ${MKL_ROOT}/lib/win-x64)
|
||||
TARGET caffe2::mkl
|
||||
PROPERTY INTERFACE_LINK_DIRECTORIES ${MKL_ROOT}/lib ${MKL_ROOT}/lib/intel64
|
||||
${MKL_ROOT}/lib/intel64_win ${MKL_ROOT}/lib/win-x64)
|
||||
|
||||
if(UNIX)
|
||||
if(USE_STATIC_MKL)
|
||||
@ -33,7 +34,8 @@ if(UNIX)
|
||||
|
||||
# Match archive libraries starting with "libmkl_"
|
||||
if(MKL_LIB_NAME MATCHES "^libmkl_" AND MKL_LIB_NAME MATCHES ".a$")
|
||||
target_link_options(caffe2::mkl INTERFACE "-Wl,--exclude-libs,${MKL_LIB_NAME}")
|
||||
target_link_options(caffe2::mkl INTERFACE
|
||||
"-Wl,--exclude-libs,${MKL_LIB_NAME}")
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
@ -10,9 +10,7 @@ if(NOT TARGET caffe2::mkldnn)
|
||||
add_library(caffe2::mkldnn INTERFACE IMPORTED)
|
||||
endif()
|
||||
|
||||
set_property(
|
||||
TARGET caffe2::mkldnn PROPERTY INTERFACE_INCLUDE_DIRECTORIES
|
||||
${MKLDNN_INCLUDE_DIR})
|
||||
set_property(
|
||||
TARGET caffe2::mkldnn PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
${MKLDNN_LIBRARIES})
|
||||
set_property(TARGET caffe2::mkldnn PROPERTY INTERFACE_INCLUDE_DIRECTORIES
|
||||
${MKLDNN_INCLUDE_DIR})
|
||||
set_property(TARGET caffe2::mkldnn PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
${MKLDNN_LIBRARIES})
|
||||
|
||||
@ -6,71 +6,92 @@ if(NOT Protobuf_FOUND)
|
||||
find_package(Protobuf MODULE QUIET)
|
||||
endif()
|
||||
|
||||
if((TARGET protobuf::libprotobuf OR TARGET protobuf::libprotobuf-lite) AND TARGET protobuf::protoc)
|
||||
if((TARGET protobuf::libprotobuf OR TARGET protobuf::libprotobuf-lite)
|
||||
AND TARGET protobuf::protoc)
|
||||
# Hooray. This is the most ideal situation, meaning that you either have a
|
||||
# Protobuf config file installed (like on Windows), or you are using a
|
||||
# modern CMake that ships with a FindProtobuf.cmake file that produces
|
||||
# modern targets.
|
||||
# Protobuf config file installed (like on Windows), or you are using a modern
|
||||
# CMake that ships with a FindProtobuf.cmake file that produces modern
|
||||
# targets.
|
||||
message(STATUS "Caffe2: Found protobuf with new-style protobuf targets.")
|
||||
elseif(Protobuf_FOUND OR PROTOBUF_FOUND)
|
||||
# If the modern targets are not present, we will generate them for you for
|
||||
# backward compatibility. This is backported from CMake's new FindProtobuf.cmake
|
||||
# content.
|
||||
# backward compatibility. This is backported from CMake's new
|
||||
# FindProtobuf.cmake content.
|
||||
if((NOT PROTOBUF_LIBRARY) AND (NOT PROTOBUF_LITE_LIBRARY))
|
||||
message(FATAL_ERROR
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"Caffe2: Found protobuf with old style targets, but could not find targets."
|
||||
" PROTOBUF_LIBRARY: " ${PROTOBUF_LIBRARY}
|
||||
" PROTOBUF_LITE_LIBRARY: " ${PROTOBUF_LITE_LIBRARY}
|
||||
" Protobuf_LIBRARY: " ${Protobuf_LIBRARY}
|
||||
" Protobuf_LITE_LIBRARY: " ${Protobuf_LITE_LIBRARY})
|
||||
" PROTOBUF_LIBRARY: "
|
||||
${PROTOBUF_LIBRARY}
|
||||
" PROTOBUF_LITE_LIBRARY: "
|
||||
${PROTOBUF_LITE_LIBRARY}
|
||||
" Protobuf_LIBRARY: "
|
||||
${Protobuf_LIBRARY}
|
||||
" Protobuf_LITE_LIBRARY: "
|
||||
${Protobuf_LITE_LIBRARY})
|
||||
endif()
|
||||
message(STATUS "Caffe2: Found protobuf with old-style protobuf targets.")
|
||||
|
||||
if(PROTOBUF_LIBRARY)
|
||||
if(NOT TARGET protobuf::libprotobuf)
|
||||
add_library(protobuf::libprotobuf UNKNOWN IMPORTED)
|
||||
set_target_properties(protobuf::libprotobuf PROPERTIES
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${PROTOBUF_INCLUDE_DIRS}")
|
||||
set_target_properties(
|
||||
protobuf::libprotobuf PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
|
||||
"${PROTOBUF_INCLUDE_DIRS}")
|
||||
endif()
|
||||
if(EXISTS "${PROTOBUF_LIBRARY}")
|
||||
set_target_properties(protobuf::libprotobuf PROPERTIES
|
||||
IMPORTED_LOCATION "${PROTOBUF_LIBRARY}")
|
||||
set_target_properties(protobuf::libprotobuf
|
||||
PROPERTIES IMPORTED_LOCATION "${PROTOBUF_LIBRARY}")
|
||||
endif()
|
||||
if(EXISTS "${PROTOBUF_LIBRARY_RELEASE}")
|
||||
set_property(TARGET protobuf::libprotobuf APPEND PROPERTY
|
||||
IMPORTED_CONFIGURATIONS RELEASE)
|
||||
set_target_properties(protobuf::libprotobuf PROPERTIES
|
||||
IMPORTED_LOCATION_RELEASE "${PROTOBUF_LIBRARY_RELEASE}")
|
||||
set_property(
|
||||
TARGET protobuf::libprotobuf
|
||||
APPEND
|
||||
PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
|
||||
set_target_properties(
|
||||
protobuf::libprotobuf PROPERTIES IMPORTED_LOCATION_RELEASE
|
||||
"${PROTOBUF_LIBRARY_RELEASE}")
|
||||
endif()
|
||||
if(EXISTS "${PROTOBUF_LIBRARY_DEBUG}")
|
||||
set_property(TARGET protobuf::libprotobuf APPEND PROPERTY
|
||||
IMPORTED_CONFIGURATIONS DEBUG)
|
||||
set_target_properties(protobuf::libprotobuf PROPERTIES
|
||||
IMPORTED_LOCATION_DEBUG "${PROTOBUF_LIBRARY_DEBUG}")
|
||||
set_property(
|
||||
TARGET protobuf::libprotobuf
|
||||
APPEND
|
||||
PROPERTY IMPORTED_CONFIGURATIONS DEBUG)
|
||||
set_target_properties(
|
||||
protobuf::libprotobuf PROPERTIES IMPORTED_LOCATION_DEBUG
|
||||
"${PROTOBUF_LIBRARY_DEBUG}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(PROTOBUF_LITE_LIBRARY)
|
||||
if(NOT TARGET protobuf::libprotobuf-lite)
|
||||
add_library(protobuf::libprotobuf-lite UNKNOWN IMPORTED)
|
||||
set_target_properties(protobuf::libprotobuf-lite PROPERTIES
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${PROTOBUF_INCLUDE_DIRS}")
|
||||
set_target_properties(
|
||||
protobuf::libprotobuf-lite PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
|
||||
"${PROTOBUF_INCLUDE_DIRS}")
|
||||
endif()
|
||||
if(EXISTS "${PROTOBUF_LITE_LIBRARY}")
|
||||
set_target_properties(protobuf::libprotobuf-lite PROPERTIES
|
||||
IMPORTED_LOCATION "${PROTOBUF_LITE_LIBRARY}")
|
||||
set_target_properties(
|
||||
protobuf::libprotobuf-lite PROPERTIES IMPORTED_LOCATION
|
||||
"${PROTOBUF_LITE_LIBRARY}")
|
||||
endif()
|
||||
if(EXISTS "${PROTOBUF_LITE_LIBRARY_RELEASE}")
|
||||
set_property(TARGET protobuf::libprotobuf-lite APPEND PROPERTY
|
||||
IMPORTED_CONFIGURATIONS RELEASE)
|
||||
set_target_properties(protobuf::libprotobuf-lite PROPERTIES
|
||||
IMPORTED_LOCATION_RELEASE "${PROTOBUF_LITE_LIBRARY_RELEASE}")
|
||||
set_property(
|
||||
TARGET protobuf::libprotobuf-lite
|
||||
APPEND
|
||||
PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
|
||||
set_target_properties(
|
||||
protobuf::libprotobuf-lite
|
||||
PROPERTIES IMPORTED_LOCATION_RELEASE "${PROTOBUF_LITE_LIBRARY_RELEASE}")
|
||||
endif()
|
||||
if(EXISTS "${PROTOBUF_LITE_LIBRARY_DEBUG}")
|
||||
set_property(TARGET protobuf::libprotobuf-lite APPEND PROPERTY
|
||||
IMPORTED_CONFIGURATIONS DEBUG)
|
||||
set_target_properties(protobuf::libprotobuf-lite PROPERTIES
|
||||
IMPORTED_LOCATION_DEBUG "${PROTOBUF_LITE_LIBRARY_DEBUG}")
|
||||
set_property(
|
||||
TARGET protobuf::libprotobuf-lite
|
||||
APPEND
|
||||
PROPERTY IMPORTED_CONFIGURATIONS DEBUG)
|
||||
set_target_properties(
|
||||
protobuf::libprotobuf-lite PROPERTIES IMPORTED_LOCATION_DEBUG
|
||||
"${PROTOBUF_LITE_LIBRARY_DEBUG}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -78,14 +99,16 @@ elseif(Protobuf_FOUND OR PROTOBUF_FOUND)
|
||||
if(NOT TARGET protobuf::protoc)
|
||||
add_executable(protobuf::protoc IMPORTED)
|
||||
endif()
|
||||
set_property(TARGET protobuf::protoc PROPERTY
|
||||
IMPORTED_LOCATION ${PROTOBUF_PROTOC_EXECUTABLE})
|
||||
set_property(TARGET protobuf::protoc PROPERTY IMPORTED_LOCATION
|
||||
${PROTOBUF_PROTOC_EXECUTABLE})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# After above, we should have the protobuf related target now.
|
||||
if((NOT TARGET protobuf::libprotobuf) AND (NOT TARGET protobuf::libprotobuf-lite))
|
||||
message(WARNING
|
||||
if((NOT TARGET protobuf::libprotobuf) AND (NOT TARGET protobuf::libprotobuf-lite
|
||||
))
|
||||
message(
|
||||
WARNING
|
||||
"Protobuf cannot be found. Depending on whether you are building Caffe2 "
|
||||
"or a Caffe2 dependent library, the next warning / error will give you "
|
||||
"more info.")
|
||||
|
||||
@ -1,37 +1,51 @@
|
||||
################################################################################################
|
||||
# ##############################################################################
|
||||
# Exclude and prepend functionalities
|
||||
function(exclude OUTPUT INPUT)
|
||||
set(EXCLUDES ${ARGN})
|
||||
foreach(EXCLUDE ${EXCLUDES})
|
||||
list(REMOVE_ITEM INPUT "${EXCLUDE}")
|
||||
endforeach()
|
||||
set(${OUTPUT} ${INPUT} PARENT_SCOPE)
|
||||
set(EXCLUDES ${ARGN})
|
||||
foreach(EXCLUDE ${EXCLUDES})
|
||||
list(REMOVE_ITEM INPUT "${EXCLUDE}")
|
||||
endforeach()
|
||||
set(${OUTPUT}
|
||||
${INPUT}
|
||||
PARENT_SCOPE)
|
||||
endfunction(exclude)
|
||||
|
||||
function(prepend OUTPUT PREPEND)
|
||||
set(OUT "")
|
||||
foreach(ITEM ${ARGN})
|
||||
list(APPEND OUT "${PREPEND}${ITEM}")
|
||||
endforeach()
|
||||
set(${OUTPUT} ${OUT} PARENT_SCOPE)
|
||||
set(OUT "")
|
||||
foreach(ITEM ${ARGN})
|
||||
list(APPEND OUT "${PREPEND}${ITEM}")
|
||||
endforeach()
|
||||
set(${OUTPUT}
|
||||
${OUT}
|
||||
PARENT_SCOPE)
|
||||
endfunction(prepend)
|
||||
|
||||
################################################################################################
|
||||
# ##############################################################################
|
||||
# Parses a version string that might have values beyond major, minor, and patch
|
||||
# and set version variables for the library.
|
||||
# Usage:
|
||||
# caffe2_parse_version_str(<library_name> <version_string>)
|
||||
# and set version variables for the library. Usage:
|
||||
# caffe2_parse_version_str(<library_name> <version_string>)
|
||||
function(caffe2_parse_version_str LIBNAME VERSIONSTR)
|
||||
string(REGEX REPLACE "^([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MAJOR "${VERSIONSTR}")
|
||||
string(REGEX REPLACE "^[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MINOR "${VERSIONSTR}")
|
||||
string(REGEX REPLACE "[0-9]+\\.[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_PATCH "${VERSIONSTR}")
|
||||
set(${LIBNAME}_VERSION_MAJOR ${${LIBNAME}_VERSION_MAJOR} ${ARGN} PARENT_SCOPE)
|
||||
set(${LIBNAME}_VERSION_MINOR ${${LIBNAME}_VERSION_MINOR} ${ARGN} PARENT_SCOPE)
|
||||
set(${LIBNAME}_VERSION_PATCH ${${LIBNAME}_VERSION_PATCH} ${ARGN} PARENT_SCOPE)
|
||||
set(${LIBNAME}_VERSION "${${LIBNAME}_VERSION_MAJOR}.${${LIBNAME}_VERSION_MINOR}.${${LIBNAME}_VERSION_PATCH}" PARENT_SCOPE)
|
||||
string(REGEX REPLACE "^([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MAJOR
|
||||
"${VERSIONSTR}")
|
||||
string(REGEX REPLACE "^[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MINOR
|
||||
"${VERSIONSTR}")
|
||||
string(REGEX REPLACE "[0-9]+\\.[0-9]+\\.([0-9]+).*$" "\\1"
|
||||
${LIBNAME}_VERSION_PATCH "${VERSIONSTR}")
|
||||
set(${LIBNAME}_VERSION_MAJOR
|
||||
${${LIBNAME}_VERSION_MAJOR} ${ARGN}
|
||||
PARENT_SCOPE)
|
||||
set(${LIBNAME}_VERSION_MINOR
|
||||
${${LIBNAME}_VERSION_MINOR} ${ARGN}
|
||||
PARENT_SCOPE)
|
||||
set(${LIBNAME}_VERSION_PATCH
|
||||
${${LIBNAME}_VERSION_PATCH} ${ARGN}
|
||||
PARENT_SCOPE)
|
||||
set(${LIBNAME}_VERSION
|
||||
"${${LIBNAME}_VERSION_MAJOR}.${${LIBNAME}_VERSION_MINOR}.${${LIBNAME}_VERSION_PATCH}"
|
||||
PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
###
|
||||
#
|
||||
# Removes common indentation from a block of text to produce code suitable for
|
||||
# setting to `python -c`, or using with pycmd. This allows multiline code to be
|
||||
# nested nicely in the surrounding code structure.
|
||||
@ -39,9 +53,8 @@ endfunction()
|
||||
# This function respsects Python_EXECUTABLE if it defined, otherwise it uses
|
||||
# `python` and hopes for the best. An error will be thrown if it is not found.
|
||||
#
|
||||
# Args:
|
||||
# outvar : variable that will hold the stdout of the python command
|
||||
# text : text to remove indentation from
|
||||
# Args: outvar : variable that will hold the stdout of the python command text :
|
||||
# text to remove indentation from
|
||||
#
|
||||
function(dedent outvar text)
|
||||
# Use Python_EXECUTABLE if it is defined, otherwise default to python
|
||||
@ -50,7 +63,9 @@ function(dedent outvar text)
|
||||
else()
|
||||
set(_python_exe "${Python_EXECUTABLE}")
|
||||
endif()
|
||||
set(_fixup_cmd "import sys; from textwrap import dedent; print(dedent(sys.stdin.read()))")
|
||||
set(_fixup_cmd
|
||||
"import sys; from textwrap import dedent; print(dedent(sys.stdin.read()))"
|
||||
)
|
||||
file(WRITE "${CMAKE_BINARY_DIR}/indented.txt" "${text}")
|
||||
execute_process(
|
||||
COMMAND "${_python_exe}" -c "${_fixup_cmd}"
|
||||
@ -60,14 +75,16 @@ function(dedent outvar text)
|
||||
if(NOT _dedent_exitcode EQUAL 0)
|
||||
message(ERROR " Failed to remove indentation from: \n\"\"\"\n${text}\n\"\"\"
|
||||
Python dedent failed with error code: ${_dedent_exitcode}")
|
||||
message(FATAL_ERROR " Python dedent failed with error code: ${_dedent_exitcode}")
|
||||
message(
|
||||
FATAL_ERROR " Python dedent failed with error code: ${_dedent_exitcode}")
|
||||
endif()
|
||||
# Remove supurflous newlines (artifacts of print)
|
||||
string(STRIP "${_dedent_text}" _dedent_text)
|
||||
set(${outvar} "${_dedent_text}" PARENT_SCOPE)
|
||||
set(${outvar}
|
||||
"${_dedent_text}"
|
||||
PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
|
||||
function(pycmd_no_exit outvar exitcode cmd)
|
||||
# Use Python_EXECUTABLE if it is defined, otherwise default to python
|
||||
if("${Python_EXECUTABLE}" STREQUAL "")
|
||||
@ -82,12 +99,15 @@ function(pycmd_no_exit outvar exitcode cmd)
|
||||
OUTPUT_VARIABLE _output)
|
||||
# Remove supurflous newlines (artifacts of print)
|
||||
string(STRIP "${_output}" _output)
|
||||
set(${outvar} "${_output}" PARENT_SCOPE)
|
||||
set(${exitcode} "${_exitcode}" PARENT_SCOPE)
|
||||
set(${outvar}
|
||||
"${_output}"
|
||||
PARENT_SCOPE)
|
||||
set(${exitcode}
|
||||
"${_exitcode}"
|
||||
PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
|
||||
###
|
||||
#
|
||||
# Helper function to run `python -c "<cmd>"` and capture the results of stdout
|
||||
#
|
||||
# Runs a python command and populates an outvar with the result of stdout.
|
||||
@ -97,135 +117,140 @@ endfunction()
|
||||
# This function respsects Python_EXECUTABLE if it defined, otherwise it uses
|
||||
# `python` and hopes for the best. An error will be thrown if it is not found.
|
||||
#
|
||||
# Args:
|
||||
# outvar : variable that will hold the stdout of the python command
|
||||
# cmd : text representing a (possibly multiline) block of python code
|
||||
# Args: outvar : variable that will hold the stdout of the python command cmd :
|
||||
# text representing a (possibly multiline) block of python code
|
||||
#
|
||||
function(pycmd outvar cmd)
|
||||
dedent(_dedent_cmd "${cmd}")
|
||||
pycmd_no_exit(_output _exitcode "${_dedent_cmd}")
|
||||
|
||||
if(NOT _exitcode EQUAL 0)
|
||||
message(ERROR " Failed when running python code: \"\"\"\n${_dedent_cmd}\n\"\"\"")
|
||||
message(ERROR
|
||||
" Failed when running python code: \"\"\"\n${_dedent_cmd}\n\"\"\"")
|
||||
message(FATAL_ERROR " Python command failed with error code: ${_exitcode}")
|
||||
endif()
|
||||
# Remove supurflous newlines (artifacts of print)
|
||||
string(STRIP "${_output}" _output)
|
||||
set(${outvar} "${_output}" PARENT_SCOPE)
|
||||
set(${outvar}
|
||||
"${_output}"
|
||||
PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
|
||||
##############################################################################
|
||||
# ##############################################################################
|
||||
# Macro to update cached options.
|
||||
macro(caffe2_update_option variable value)
|
||||
if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO)
|
||||
get_property(__help_string CACHE ${variable} PROPERTY HELPSTRING)
|
||||
set(${variable} ${value} CACHE BOOL ${__help_string} FORCE)
|
||||
get_property(
|
||||
__help_string
|
||||
CACHE ${variable}
|
||||
PROPERTY HELPSTRING)
|
||||
set(${variable}
|
||||
${value}
|
||||
CACHE BOOL ${__help_string} FORCE)
|
||||
else()
|
||||
set(${variable} ${value})
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
|
||||
##############################################################################
|
||||
# ##############################################################################
|
||||
# Add an interface library definition that is dependent on the source.
|
||||
#
|
||||
# It's probably easiest to explain why this macro exists, by describing
|
||||
# what things would look like if we didn't have this macro.
|
||||
# It's probably easiest to explain why this macro exists, by describing what
|
||||
# things would look like if we didn't have this macro.
|
||||
#
|
||||
# Let's suppose we want to statically link against torch. We've defined
|
||||
# a library in cmake called torch, and we might think that we just
|
||||
# target_link_libraries(my-app PUBLIC torch). This will result in a
|
||||
# linker argument 'libtorch.a' getting passed to the linker.
|
||||
# Let's suppose we want to statically link against torch. We've defined a
|
||||
# library in cmake called torch, and we might think that we just
|
||||
# target_link_libraries(my-app PUBLIC torch). This will result in a linker
|
||||
# argument 'libtorch.a' getting passed to the linker.
|
||||
#
|
||||
# Unfortunately, this link command is wrong! We have static
|
||||
# initializers in libtorch.a that would get improperly pruned by
|
||||
# the default link settings. What we actually need is for you
|
||||
# to do -Wl,--whole-archive,libtorch.a -Wl,--no-whole-archive to ensure
|
||||
# that we keep all symbols, even if they are (seemingly) not used.
|
||||
# Unfortunately, this link command is wrong! We have static initializers in
|
||||
# libtorch.a that would get improperly pruned by the default link settings. What
|
||||
# we actually need is for you to do -Wl,--whole-archive,libtorch.a
|
||||
# -Wl,--no-whole-archive to ensure that we keep all symbols, even if they are
|
||||
# (seemingly) not used.
|
||||
#
|
||||
# What caffe2_interface_library does is create an interface library
|
||||
# that indirectly depends on the real library, but sets up the link
|
||||
# arguments so that you get all of the extra link settings you need.
|
||||
# The result is not a "real" library, and so we have to manually
|
||||
# copy over necessary properties from the original target.
|
||||
# What caffe2_interface_library does is create an interface library that
|
||||
# indirectly depends on the real library, but sets up the link arguments so that
|
||||
# you get all of the extra link settings you need. The result is not a "real"
|
||||
# library, and so we have to manually copy over necessary properties from the
|
||||
# original target.
|
||||
#
|
||||
# (The discussion above is about static libraries, but a similar
|
||||
# situation occurs for dynamic libraries: if no symbols are used from
|
||||
# a dynamic library, it will be pruned unless you are --no-as-needed)
|
||||
# (The discussion above is about static libraries, but a similar situation
|
||||
# occurs for dynamic libraries: if no symbols are used from a dynamic library,
|
||||
# it will be pruned unless you are --no-as-needed)
|
||||
macro(caffe2_interface_library SRC DST)
|
||||
add_library(${DST} INTERFACE)
|
||||
add_dependencies(${DST} ${SRC})
|
||||
# Depending on the nature of the source library as well as the compiler,
|
||||
# determine the needed compilation flags.
|
||||
get_target_property(__src_target_type ${SRC} TYPE)
|
||||
# Depending on the type of the source library, we will set up the
|
||||
# link command for the specific SRC library.
|
||||
# Depending on the type of the source library, we will set up the link command
|
||||
# for the specific SRC library.
|
||||
if(${__src_target_type} STREQUAL "STATIC_LIBRARY")
|
||||
# In the case of static library, we will need to add whole-static flags.
|
||||
target_link_libraries(${DST} INTERFACE $<LINK_LIBRARY:WHOLE_ARCHIVE,${SRC}>)
|
||||
# Link all interface link libraries of the src target as well.
|
||||
# For static library, we need to explicitly depend on all the libraries
|
||||
# that are the dependent library of the source library. Note that we cannot
|
||||
# use the populated INTERFACE_LINK_LIBRARIES property, because if one of the
|
||||
# dependent library is not a target, cmake creates a $<LINK_ONLY:src> wrapper
|
||||
# and then one is not able to find target "src". For more discussions, check
|
||||
# https://cmake.org/Bug/print_bug_page.php?bug_id=15415
|
||||
# https://cmake.org/pipermail/cmake-developers/2013-May/019019.html
|
||||
# Link all interface link libraries of the src target as well. For static
|
||||
# library, we need to explicitly depend on all the libraries that are the
|
||||
# dependent library of the source library. Note that we cannot use the
|
||||
# populated INTERFACE_LINK_LIBRARIES property, because if one of the
|
||||
# dependent library is not a target, cmake creates a $<LINK_ONLY:src>
|
||||
# wrapper and then one is not able to find target "src". For more
|
||||
# discussions, check https://cmake.org/Bug/print_bug_page.php?bug_id=15415
|
||||
# https://cmake.org/pipermail/cmake-developers/2013-May/019019.html
|
||||
# Specifically the following quote
|
||||
#
|
||||
# """
|
||||
# For STATIC libraries we can define that the PUBLIC/PRIVATE/INTERFACE keys
|
||||
# are ignored for linking and that it always populates both LINK_LIBRARIES
|
||||
# LINK_INTERFACE_LIBRARIES. Note that for STATIC libraries the
|
||||
# LINK_LIBRARIES property will not be used for anything except build-order
|
||||
# dependencies.
|
||||
# """
|
||||
target_link_libraries(${DST} INTERFACE
|
||||
$<TARGET_PROPERTY:${SRC},LINK_LIBRARIES>)
|
||||
# """ For STATIC libraries we can define that the PUBLIC/PRIVATE/INTERFACE
|
||||
# keys are ignored for linking and that it always populates both
|
||||
# LINK_LIBRARIES LINK_INTERFACE_LIBRARIES. Note that for STATIC libraries
|
||||
# the LINK_LIBRARIES property will not be used for anything except
|
||||
# build-order dependencies. """
|
||||
target_link_libraries(${DST}
|
||||
INTERFACE $<TARGET_PROPERTY:${SRC},LINK_LIBRARIES>)
|
||||
elseif(${__src_target_type} STREQUAL "SHARED_LIBRARY")
|
||||
if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
|
||||
target_link_libraries(${DST} INTERFACE
|
||||
"-Wl,--no-as-needed,\"$<TARGET_FILE:${SRC}>\" -Wl,--as-needed")
|
||||
target_link_libraries(
|
||||
${DST}
|
||||
INTERFACE "-Wl,--no-as-needed,\"$<TARGET_FILE:${SRC}>\" -Wl,--as-needed"
|
||||
)
|
||||
else()
|
||||
target_link_libraries(${DST} INTERFACE ${SRC})
|
||||
endif()
|
||||
# Link all interface link libraries of the src target as well.
|
||||
# For shared libraries, we can simply depend on the INTERFACE_LINK_LIBRARIES
|
||||
# property of the target.
|
||||
target_link_libraries(${DST} INTERFACE
|
||||
$<TARGET_PROPERTY:${SRC},INTERFACE_LINK_LIBRARIES>)
|
||||
# Link all interface link libraries of the src target as well. For shared
|
||||
# libraries, we can simply depend on the INTERFACE_LINK_LIBRARIES property
|
||||
# of the target.
|
||||
target_link_libraries(
|
||||
${DST} INTERFACE $<TARGET_PROPERTY:${SRC},INTERFACE_LINK_LIBRARIES>)
|
||||
else()
|
||||
message(FATAL_ERROR
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"You made a CMake build file error: target " ${SRC}
|
||||
" must be of type either STATIC_LIBRARY or SHARED_LIBRARY. However, "
|
||||
"I got " ${__src_target_type} ".")
|
||||
endif()
|
||||
# For all other interface properties, manually inherit from the source target.
|
||||
set_target_properties(${DST} PROPERTIES
|
||||
INTERFACE_COMPILE_DEFINITIONS
|
||||
$<TARGET_PROPERTY:${SRC},INTERFACE_COMPILE_DEFINITIONS>
|
||||
INTERFACE_COMPILE_OPTIONS
|
||||
$<TARGET_PROPERTY:${SRC},INTERFACE_COMPILE_OPTIONS>
|
||||
INTERFACE_INCLUDE_DIRECTORIES
|
||||
$<TARGET_PROPERTY:${SRC},INTERFACE_INCLUDE_DIRECTORIES>
|
||||
INTERFACE_SYSTEM_INCLUDE_DIRECTORIES
|
||||
$<TARGET_PROPERTY:${SRC},INTERFACE_SYSTEM_INCLUDE_DIRECTORIES>)
|
||||
set_target_properties(
|
||||
${DST}
|
||||
PROPERTIES INTERFACE_COMPILE_DEFINITIONS
|
||||
$<TARGET_PROPERTY:${SRC},INTERFACE_COMPILE_DEFINITIONS>
|
||||
INTERFACE_COMPILE_OPTIONS
|
||||
$<TARGET_PROPERTY:${SRC},INTERFACE_COMPILE_OPTIONS>
|
||||
INTERFACE_INCLUDE_DIRECTORIES
|
||||
$<TARGET_PROPERTY:${SRC},INTERFACE_INCLUDE_DIRECTORIES>
|
||||
INTERFACE_SYSTEM_INCLUDE_DIRECTORIES
|
||||
$<TARGET_PROPERTY:${SRC},INTERFACE_SYSTEM_INCLUDE_DIRECTORIES>)
|
||||
endmacro()
|
||||
|
||||
|
||||
##############################################################################
|
||||
# ##############################################################################
|
||||
# Creating a Caffe2 binary target with sources specified with relative path.
|
||||
# Usage:
|
||||
# caffe2_binary_target(target_name_or_src <src1> [<src2>] [<src3>] ...)
|
||||
# Usage: caffe2_binary_target(target_name_or_src <src1> [<src2>] [<src3>] ...)
|
||||
# If only target_name_or_src is specified, this target is build with one single
|
||||
# source file and the target name is autogen from the filename. Otherwise, the
|
||||
# target name is given by the first argument and the rest are the source files
|
||||
# to build the target.
|
||||
function(caffe2_binary_target target_name_or_src)
|
||||
# https://cmake.org/cmake/help/latest/command/function.html
|
||||
# Checking that ARGC is greater than # is the only way to ensure
|
||||
# that ARGV# was passed to the function as an extra argument.
|
||||
# https://cmake.org/cmake/help/latest/command/function.html Checking that ARGC
|
||||
# is greater than # is the only way to ensure that ARGV# was passed to the
|
||||
# function as an extra argument.
|
||||
if(ARGC GREATER 1)
|
||||
set(__target ${target_name_or_src})
|
||||
prepend(__srcs "${CMAKE_CURRENT_SOURCE_DIR}/" "${ARGN}")
|
||||
@ -257,11 +282,9 @@ function(caffe2_hip_binary_target target_name_or_src)
|
||||
target_include_directories(${__target} PRIVATE ${Caffe2_HIP_INCLUDE})
|
||||
endfunction()
|
||||
|
||||
|
||||
##############################################################################
|
||||
# ##############################################################################
|
||||
# Multiplex between adding libraries for CUDA versus HIP (AMD Software Stack).
|
||||
# Usage:
|
||||
# torch_cuda_based_add_library(cuda_target)
|
||||
# Usage: torch_cuda_based_add_library(cuda_target)
|
||||
#
|
||||
macro(torch_cuda_based_add_library cuda_target)
|
||||
if(USE_ROCM)
|
||||
@ -269,34 +292,38 @@ macro(torch_cuda_based_add_library cuda_target)
|
||||
elseif(USE_CUDA)
|
||||
add_library(${cuda_target} ${ARGN})
|
||||
else()
|
||||
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
##############################################################################
|
||||
# Get the HIP arch flags specified by PYTORCH_ROCM_ARCH.
|
||||
# Usage:
|
||||
# torch_hip_get_arch_list(variable_to_store_flags)
|
||||
# ##############################################################################
|
||||
# Get the HIP arch flags specified by PYTORCH_ROCM_ARCH. Usage:
|
||||
# torch_hip_get_arch_list(variable_to_store_flags)
|
||||
#
|
||||
macro(torch_hip_get_arch_list store_var)
|
||||
if(DEFINED ENV{PYTORCH_ROCM_ARCH})
|
||||
set(_TMP $ENV{PYTORCH_ROCM_ARCH})
|
||||
else()
|
||||
# Use arch of installed GPUs as default
|
||||
execute_process(COMMAND "rocm_agent_enumerator" COMMAND bash "-c" "grep -v gfx000 | sort -u | xargs | tr -d '\n'"
|
||||
RESULT_VARIABLE ROCM_AGENT_ENUMERATOR_RESULT
|
||||
OUTPUT_VARIABLE ROCM_ARCH_INSTALLED)
|
||||
execute_process(
|
||||
COMMAND "rocm_agent_enumerator"
|
||||
COMMAND bash "-c" "grep -v gfx000 | sort -u | xargs | tr -d '\n'"
|
||||
RESULT_VARIABLE ROCM_AGENT_ENUMERATOR_RESULT
|
||||
OUTPUT_VARIABLE ROCM_ARCH_INSTALLED)
|
||||
if(NOT ROCM_AGENT_ENUMERATOR_RESULT EQUAL 0)
|
||||
message(FATAL_ERROR " Could not detect ROCm arch for GPUs on machine. Result: '${ROCM_AGENT_ENUMERATOR_RESULT}'")
|
||||
message(
|
||||
FATAL_ERROR
|
||||
" Could not detect ROCm arch for GPUs on machine. Result: '${ROCM_AGENT_ENUMERATOR_RESULT}'"
|
||||
)
|
||||
endif()
|
||||
set(_TMP ${ROCM_ARCH_INSTALLED})
|
||||
endif()
|
||||
string(REPLACE " " ";" ${store_var} "${_TMP}")
|
||||
endmacro()
|
||||
|
||||
##############################################################################
|
||||
# Get the XPU arch flags specified by TORCH_XPU_ARCH_LIST.
|
||||
# Usage:
|
||||
# torch_xpu_get_arch_list(variable_to_store_flags)
|
||||
# ##############################################################################
|
||||
# Get the XPU arch flags specified by TORCH_XPU_ARCH_LIST. Usage:
|
||||
# torch_xpu_get_arch_list(variable_to_store_flags)
|
||||
#
|
||||
macro(torch_xpu_get_arch_list store_var)
|
||||
if(DEFINED ENV{TORCH_XPU_ARCH_LIST})
|
||||
@ -304,19 +331,19 @@ macro(torch_xpu_get_arch_list store_var)
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
##############################################################################
|
||||
# ##############################################################################
|
||||
# Get the NVCC arch flags specified by TORCH_CUDA_ARCH_LIST and CUDA_ARCH_NAME.
|
||||
# Usage:
|
||||
# torch_cuda_get_nvcc_gencode_flag(variable_to_store_flags)
|
||||
# Usage: torch_cuda_get_nvcc_gencode_flag(variable_to_store_flags)
|
||||
#
|
||||
macro(torch_cuda_get_nvcc_gencode_flag store_var)
|
||||
# setting nvcc arch flags
|
||||
# We need to support the explicitly and conveniently defined TORCH_CUDA_ARCH_LIST
|
||||
# setting nvcc arch flags We need to support the explicitly and conveniently
|
||||
# defined TORCH_CUDA_ARCH_LIST
|
||||
if((NOT DEFINED TORCH_CUDA_ARCH_LIST) AND (DEFINED ENV{TORCH_CUDA_ARCH_LIST}))
|
||||
set(TORCH_CUDA_ARCH_LIST $ENV{TORCH_CUDA_ARCH_LIST})
|
||||
endif()
|
||||
if(DEFINED CUDA_ARCH_NAME)
|
||||
message(WARNING
|
||||
message(
|
||||
WARNING
|
||||
"CUDA_ARCH_NAME is no longer used. Use TORCH_CUDA_ARCH_LIST instead. "
|
||||
"Right now, CUDA_ARCH_NAME is ${CUDA_ARCH_NAME} and "
|
||||
"TORCH_CUDA_ARCH_LIST is ${TORCH_CUDA_ARCH_LIST}.")
|
||||
@ -331,11 +358,8 @@ macro(torch_cuda_get_nvcc_gencode_flag store_var)
|
||||
cuda_select_nvcc_arch_flags(${store_var} ${TORCH_CUDA_ARCH_LIST})
|
||||
endmacro()
|
||||
|
||||
|
||||
##############################################################################
|
||||
# Add standard compile options.
|
||||
# Usage:
|
||||
# torch_compile_options(lib_name)
|
||||
# ##############################################################################
|
||||
# Add standard compile options. Usage: torch_compile_options(lib_name)
|
||||
function(torch_compile_options libname)
|
||||
set_property(TARGET ${libname} PROPERTY CXX_STANDARD 17)
|
||||
|
||||
@ -349,78 +373,85 @@ function(torch_compile_options libname)
|
||||
endif()
|
||||
|
||||
if(${MSVC_TOOLSET_VERSION} GREATER_EQUAL 142)
|
||||
# Add /permissive- flag for conformance mode to the compiler.
|
||||
# This will force more strict check to the code standard.
|
||||
# 1. From MS official doc: https://learn.microsoft.com/en-us/cpp/build/reference/permissive-standards-conformance?view=msvc-170#remarks
|
||||
# By default, the /permissive- option is set in new projects created by Visual Studio 2017 version 15.5 and later versions.
|
||||
# We set the /permissive- flag from VS 2019 (MSVC_TOOLSET_VERSION 142) to avoid compiling issues for old toolkit.
|
||||
# 2. For MSVC VERSION: https://cmake.org/cmake/help/latest/variable/MSVC_TOOLSET_VERSION.html
|
||||
target_compile_options(${libname} PUBLIC $<$<COMPILE_LANGUAGE:CXX>:/permissive->)
|
||||
# Add /permissive- flag for conformance mode to the compiler. This will
|
||||
# force more strict check to the code standard. 1. From MS official doc:
|
||||
# https://learn.microsoft.com/en-us/cpp/build/reference/permissive-standards-conformance?view=msvc-170#remarks
|
||||
# By default, the /permissive- option is set in new projects created by
|
||||
# Visual Studio 2017 version 15.5 and later versions. We set the
|
||||
# /permissive- flag from VS 2019 (MSVC_TOOLSET_VERSION 142) to avoid
|
||||
# compiling issues for old toolkit. 2. For MSVC VERSION:
|
||||
# https://cmake.org/cmake/help/latest/variable/MSVC_TOOLSET_VERSION.html
|
||||
target_compile_options(${libname}
|
||||
PUBLIC $<$<COMPILE_LANGUAGE:CXX>:/permissive->)
|
||||
endif()
|
||||
# This option enables a token-based preprocessor that conforms to C99 and C++11 and later standards.
|
||||
# This option is available since VS 2017.
|
||||
# For MS official doc: https://learn.microsoft.com/en-us/cpp/build/reference/zc-preprocessor
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zc:preprocessor" PARENT_SCOPE)
|
||||
# This option enables a token-based preprocessor that conforms to C99 and
|
||||
# C++11 and later standards. This option is available since VS 2017. For MS
|
||||
# official doc:
|
||||
# https://learn.microsoft.com/en-us/cpp/build/reference/zc-preprocessor
|
||||
set(CMAKE_CXX_FLAGS
|
||||
"${CMAKE_CXX_FLAGS} /Zc:preprocessor"
|
||||
PARENT_SCOPE)
|
||||
|
||||
if(${MSVC_TOOLSET_VERSION} GREATER_EQUAL 143)
|
||||
# Add /d2implyavx512upperregs- to disable compiler over-aggressive optimization, which caused involeved AVX512 register on AVX2 machine.
|
||||
# Reference: https://github.com/pytorch/pytorch/issues/145702#issuecomment-2874029459
|
||||
target_compile_options(${libname} PUBLIC $<$<COMPILE_LANGUAGE:CXX>:/d2implyavx512upperregs->)
|
||||
# Add /d2implyavx512upperregs- to disable compiler over-aggressive
|
||||
# optimization, which caused involeved AVX512 register on AVX2 machine.
|
||||
# Reference:
|
||||
# https://github.com/pytorch/pytorch/issues/145702#issuecomment-2874029459
|
||||
target_compile_options(
|
||||
${libname} PUBLIC $<$<COMPILE_LANGUAGE:CXX>:/d2implyavx512upperregs->)
|
||||
endif()
|
||||
|
||||
|
||||
|
||||
target_compile_options(${libname} PUBLIC
|
||||
$<$<COMPILE_LANGUAGE:CXX>:
|
||||
${MSVC_RUNTIME_LIBRARY_OPTION}
|
||||
target_compile_options(
|
||||
${libname}
|
||||
PUBLIC
|
||||
$<$<COMPILE_LANGUAGE:CXX>: ${MSVC_RUNTIME_LIBRARY_OPTION}
|
||||
$<$<OR:$<CONFIG:Debug>,$<CONFIG:RelWithDebInfo>>:${MSVC_DEBINFO_OPTION}>
|
||||
/EHsc
|
||||
/bigobj>
|
||||
)
|
||||
/EHsc /bigobj>)
|
||||
else()
|
||||
set(private_compile_options
|
||||
-Wall
|
||||
-Wextra
|
||||
-Wdeprecated
|
||||
-Wunused
|
||||
-Wno-unused-parameter
|
||||
-Wno-missing-field-initializers
|
||||
-Wno-array-bounds
|
||||
-Wno-unknown-pragmas
|
||||
-Wno-strict-overflow
|
||||
-Wno-strict-aliasing
|
||||
)
|
||||
-Wall
|
||||
-Wextra
|
||||
-Wdeprecated
|
||||
-Wunused
|
||||
-Wno-unused-parameter
|
||||
-Wno-missing-field-initializers
|
||||
-Wno-array-bounds
|
||||
-Wno-unknown-pragmas
|
||||
-Wno-strict-overflow
|
||||
-Wno-strict-aliasing)
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
list(APPEND private_compile_options -Wredundant-move)
|
||||
endif()
|
||||
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||
list(APPEND private_compile_options -Wextra-semi -Wmove)
|
||||
else()
|
||||
list(APPEND private_compile_options
|
||||
list(
|
||||
APPEND
|
||||
private_compile_options
|
||||
# Considered to be flaky. See the discussion at
|
||||
# https://github.com/pytorch/pytorch/pull/9608
|
||||
-Wno-maybe-uninitialized)
|
||||
endif()
|
||||
|
||||
if(WERROR)
|
||||
list(APPEND private_compile_options
|
||||
list(
|
||||
APPEND
|
||||
private_compile_options
|
||||
-Werror
|
||||
-Werror=ignored-attributes
|
||||
-Werror=inconsistent-missing-override
|
||||
-Werror=inconsistent-missing-destructor-override
|
||||
-Werror=pedantic
|
||||
-Werror=unused
|
||||
-Wno-error=unused-parameter
|
||||
)
|
||||
-Wno-error=unused-parameter)
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
list(APPEND private_compile_options -Werror=unused-but-set-variable)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
target_compile_options(${libname} PRIVATE
|
||||
$<$<COMPILE_LANGUAGE:CXX>:${private_compile_options}>)
|
||||
target_compile_options(
|
||||
${libname} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${private_compile_options}>)
|
||||
if(USE_CUDA)
|
||||
foreach(option IN LISTS private_compile_options)
|
||||
if(CMAKE_CUDA_HOST_COMPILER_ID STREQUAL "GNU")
|
||||
@ -431,7 +462,8 @@ function(torch_compile_options libname)
|
||||
continue()
|
||||
endif()
|
||||
endif()
|
||||
target_compile_options(${libname} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler ${option}>)
|
||||
target_compile_options(
|
||||
${libname} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler ${option}>)
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
@ -443,73 +475,90 @@ function(torch_compile_options libname)
|
||||
# Unfortunately, hidden visibility messes up some ubsan warnings because
|
||||
# templated classes crossing library boundary get duplicated (but identical)
|
||||
# definitions. It's easier to just disable it.
|
||||
target_compile_options(${libname} PRIVATE
|
||||
$<$<COMPILE_LANGUAGE:CXX>: -fvisibility=hidden>)
|
||||
target_compile_options(${libname} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:
|
||||
-fvisibility=hidden>)
|
||||
endif()
|
||||
|
||||
# Use -O2 for release builds (-O3 doesn't improve perf, and -Os results in perf regression)
|
||||
target_compile_options(${libname} PRIVATE
|
||||
$<$<AND:$<COMPILE_LANGUAGE:CXX>,$<OR:$<CONFIG:Release>,$<CONFIG:RelWithDebInfo>>>:-O2>)
|
||||
# Use -O2 for release builds (-O3 doesn't improve perf, and -Os results in
|
||||
# perf regression)
|
||||
target_compile_options(
|
||||
${libname}
|
||||
PRIVATE
|
||||
$<$<AND:$<COMPILE_LANGUAGE:CXX>,$<OR:$<CONFIG:Release>,$<CONFIG:RelWithDebInfo>>>:-O2>
|
||||
)
|
||||
|
||||
endfunction()
|
||||
|
||||
##############################################################################
|
||||
# ##############################################################################
|
||||
# Set old-style FindCuda.cmake compile flags from modern CMake cuda flags.
|
||||
# Usage:
|
||||
# torch_update_find_cuda_flags()
|
||||
# Usage: torch_update_find_cuda_flags()
|
||||
function(torch_update_find_cuda_flags)
|
||||
# Convert -O2 -Xcompiler="-O2 -Wall" to "-O2;-Xcompiler=-O2,-Wall"
|
||||
if(USE_CUDA)
|
||||
separate_arguments(FLAGS UNIX_COMMAND "${CMAKE_CUDA_FLAGS}")
|
||||
string(REPLACE " " "," FLAGS "${FLAGS}")
|
||||
set(CUDA_NVCC_FLAGS ${FLAGS} PARENT_SCOPE)
|
||||
set(CUDA_NVCC_FLAGS
|
||||
${FLAGS}
|
||||
PARENT_SCOPE)
|
||||
|
||||
separate_arguments(FLAGS_DEBUG UNIX_COMMAND "${CMAKE_CUDA_FLAGS_DEBUG}")
|
||||
string(REPLACE " " "," FLAGS_DEBUG "${FLAGS_DEBUG}")
|
||||
set(CUDA_NVCC_FLAGS_DEBUG "${FLAGS_DEBUG}" PARENT_SCOPE)
|
||||
set(CUDA_NVCC_FLAGS_DEBUG
|
||||
"${FLAGS_DEBUG}"
|
||||
PARENT_SCOPE)
|
||||
|
||||
separate_arguments(FLAGS_RELEASE UNIX_COMMAND "${CMAKE_CUDA_FLAGS_RELEASE}")
|
||||
string(REPLACE " " "," FLAGS_RELEASE "${FLAGS_RELEASE}")
|
||||
set(CUDA_NVCC_FLAGS_RELEASE "${FLAGS_RELEASE}" PARENT_SCOPE)
|
||||
set(CUDA_NVCC_FLAGS_RELEASE
|
||||
"${FLAGS_RELEASE}"
|
||||
PARENT_SCOPE)
|
||||
|
||||
separate_arguments(FLAGS_MINSIZEREL UNIX_COMMAND "${CMAKE_CUDA_FLAGS_MINSIZEREL}")
|
||||
separate_arguments(FLAGS_MINSIZEREL UNIX_COMMAND
|
||||
"${CMAKE_CUDA_FLAGS_MINSIZEREL}")
|
||||
string(REPLACE " " "," FLAGS_MINSIZEREL "${FLAGS_MINSIZEREL}")
|
||||
set(CUDA_NVCC_FLAGS_MINSIZEREL "${FLAGS_MINSIZEREL}" PARENT_SCOPE)
|
||||
set(CUDA_NVCC_FLAGS_MINSIZEREL
|
||||
"${FLAGS_MINSIZEREL}"
|
||||
PARENT_SCOPE)
|
||||
|
||||
separate_arguments(FLAGS_RELWITHDEBINFO UNIX_COMMAND "${CMAKE_CUDA_FLAGS_RELWITHDEBINFO}")
|
||||
separate_arguments(FLAGS_RELWITHDEBINFO UNIX_COMMAND
|
||||
"${CMAKE_CUDA_FLAGS_RELWITHDEBINFO}")
|
||||
string(REPLACE " " "," FLAGS_RELWITHDEBINFO "${FLAGS_RELWITHDEBINFO}")
|
||||
set(CUDA_NVCC_FLAGS_RELWITHDEBINFO "${FLAGS_RELWITHDEBINFO}" PARENT_SCOPE)
|
||||
set(CUDA_NVCC_FLAGS_RELWITHDEBINFO
|
||||
"${FLAGS_RELWITHDEBINFO}"
|
||||
PARENT_SCOPE)
|
||||
|
||||
message(STATUS "Converting CMAKE_CUDA_FLAGS to CUDA_NVCC_FLAGS:\n"
|
||||
" CUDA_NVCC_FLAGS = ${FLAGS}\n"
|
||||
" CUDA_NVCC_FLAGS_DEBUG = ${FLAGS_DEBUG}\n"
|
||||
" CUDA_NVCC_FLAGS_RELEASE = ${FLAGS_RELEASE}\n"
|
||||
" CUDA_NVCC_FLAGS_RELWITHDEBINFO = ${FLAGS_RELWITHDEBINFO}\n"
|
||||
" CUDA_NVCC_FLAGS_MINSIZEREL = ${FLAGS_MINSIZEREL}")
|
||||
message(
|
||||
STATUS "Converting CMAKE_CUDA_FLAGS to CUDA_NVCC_FLAGS:\n"
|
||||
" CUDA_NVCC_FLAGS = ${FLAGS}\n"
|
||||
" CUDA_NVCC_FLAGS_DEBUG = ${FLAGS_DEBUG}\n"
|
||||
" CUDA_NVCC_FLAGS_RELEASE = ${FLAGS_RELEASE}\n"
|
||||
" CUDA_NVCC_FLAGS_RELWITHDEBINFO = ${FLAGS_RELWITHDEBINFO}\n"
|
||||
" CUDA_NVCC_FLAGS_MINSIZEREL = ${FLAGS_MINSIZEREL}")
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
include(CheckCXXCompilerFlag)
|
||||
|
||||
##############################################################################
|
||||
# CHeck if given flag is supported and append it to provided outputvar
|
||||
# Also define HAS_UPPER_CASE_FLAG_NAME variable
|
||||
# Usage:
|
||||
# append_cxx_flag_if_supported("-Werror" CMAKE_CXX_FLAGS)
|
||||
# ##############################################################################
|
||||
# CHeck if given flag is supported and append it to provided outputvar Also
|
||||
# define HAS_UPPER_CASE_FLAG_NAME variable Usage:
|
||||
# append_cxx_flag_if_supported("-Werror" CMAKE_CXX_FLAGS)
|
||||
function(append_cxx_flag_if_supported flag outputvar)
|
||||
string(TOUPPER "HAS${flag}" _FLAG_NAME)
|
||||
string(REGEX REPLACE "[=-]" "_" _FLAG_NAME "${_FLAG_NAME}")
|
||||
# GCC silents unknown -Wno-XXX flags, so we detect the corresponding -WXXX.
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
string(REGEX REPLACE "Wno-" "W" new_flag "${flag}")
|
||||
else()
|
||||
set(new_flag ${flag})
|
||||
endif()
|
||||
check_cxx_compiler_flag("${new_flag}" ${_FLAG_NAME})
|
||||
if(${_FLAG_NAME})
|
||||
string(APPEND ${outputvar} " ${flag}")
|
||||
set(${outputvar} "${${outputvar}}" PARENT_SCOPE)
|
||||
endif()
|
||||
string(TOUPPER "HAS${flag}" _FLAG_NAME)
|
||||
string(REGEX REPLACE "[=-]" "_" _FLAG_NAME "${_FLAG_NAME}")
|
||||
# GCC silents unknown -Wno-XXX flags, so we detect the corresponding -WXXX.
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
string(REGEX REPLACE "Wno-" "W" new_flag "${flag}")
|
||||
else()
|
||||
set(new_flag ${flag})
|
||||
endif()
|
||||
check_cxx_compiler_flag("${new_flag}" ${_FLAG_NAME})
|
||||
if(${_FLAG_NAME})
|
||||
string(APPEND ${outputvar} " ${flag}")
|
||||
set(${outputvar}
|
||||
"${${outputvar}}"
|
||||
PARENT_SCOPE)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
function(target_compile_options_if_supported target flag)
|
||||
|
||||
@ -19,18 +19,14 @@ set(PYTORCH_FOUND_XPU TRUE)
|
||||
# SYCL library interface
|
||||
add_library(torch::sycl INTERFACE IMPORTED)
|
||||
|
||||
set_property(
|
||||
TARGET torch::sycl PROPERTY INTERFACE_INCLUDE_DIRECTORIES
|
||||
${SYCL_INCLUDE_DIR})
|
||||
set_property(
|
||||
TARGET torch::sycl PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
${SYCL_LIBRARY})
|
||||
set_property(TARGET torch::sycl PROPERTY INTERFACE_INCLUDE_DIRECTORIES
|
||||
${SYCL_INCLUDE_DIR})
|
||||
set_property(TARGET torch::sycl PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
${SYCL_LIBRARY})
|
||||
|
||||
# xpurt
|
||||
add_library(torch::xpurt INTERFACE IMPORTED)
|
||||
set_property(
|
||||
TARGET torch::xpurt PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
torch::sycl)
|
||||
set_property(TARGET torch::xpurt PROPERTY INTERFACE_LINK_LIBRARIES torch::sycl)
|
||||
|
||||
# setting xpu arch flags
|
||||
torch_xpu_get_arch_list(XPU_ARCH_FLAGS)
|
||||
@ -39,7 +35,8 @@ set(TORCH_XPU_ARCH_LIST ${XPU_ARCH_FLAGS})
|
||||
|
||||
# Ensure USE_XPU is enabled.
|
||||
string(APPEND XPU_HOST_CXX_FLAGS " -DUSE_XPU")
|
||||
string(APPEND XPU_HOST_CXX_FLAGS " -DSYCL_COMPILER_VERSION=${SYCL_COMPILER_VERSION}")
|
||||
string(APPEND XPU_HOST_CXX_FLAGS
|
||||
" -DSYCL_COMPILER_VERSION=${SYCL_COMPILER_VERSION}")
|
||||
|
||||
if(DEFINED ENV{XPU_ENABLE_KINETO})
|
||||
set(XPU_ENABLE_KINETO TRUE)
|
||||
@ -53,4 +50,4 @@ if(WIN32)
|
||||
endif()
|
||||
else()
|
||||
set(XPU_ENABLE_KINETO TRUE)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -12,10 +12,12 @@ add_library(${PROJECT_NAME} MODULE ${FT_SOURCES})
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
target_compile_definitions(${PROJECT_NAME} PRIVATE FUNCTORCH_BUILD_MAIN_LIB)
|
||||
target_compile_definitions(${PROJECT_NAME} PRIVATE TORCH_EXTENSION_NAME=_C)
|
||||
target_compile_definitions(${PROJECT_NAME} PRIVATE TORCH_API_INCLUDE_EXTENSION_H)
|
||||
target_compile_definitions(${PROJECT_NAME}
|
||||
PRIVATE TORCH_API_INCLUDE_EXTENSION_H)
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE ${TORCH_PYTHON_COMPILE_OPTIONS})
|
||||
target_compile_options_if_supported(${PROJECT_NAME} "-Wmissing-prototypes")
|
||||
target_compile_options_if_supported(${PROJECT_NAME} "-Werror=missing-prototypes")
|
||||
target_compile_options_if_supported(${PROJECT_NAME}
|
||||
"-Werror=missing-prototypes")
|
||||
if(BUILD_LIBTORCHLESS)
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIB} torch_python)
|
||||
else()
|
||||
@ -25,13 +27,16 @@ endif()
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE pybind::pybind11)
|
||||
|
||||
set_target_properties(${PROJECT_NAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY
|
||||
${CMAKE_BINARY_DIR}/functorch)
|
||||
set_target_properties(${PROJECT_NAME} PROPERTIES INSTALL_RPATH "${_rpath_portable_origin}/../torch/lib")
|
||||
${CMAKE_BINARY_DIR}/functorch)
|
||||
set_target_properties(
|
||||
${PROJECT_NAME} PROPERTIES INSTALL_RPATH
|
||||
"${_rpath_portable_origin}/../torch/lib")
|
||||
|
||||
# Copy-pasted prefix/suffix logic for Python extensions from
|
||||
# https://github.com/pytorch/pytorch/blob/33bb8ae350611760139457b85842b1d7edf9aa11/caffe2/CMakeLists.txt#L1975
|
||||
# https://github.com/pytorch/pytorch/blob/33bb8ae350611760139457b85842b1d7edf9aa11/caffe2/CMakeLists.txt#L2022
|
||||
# TODO: It would be good to be able to use Python3_add_library target, but it does not work in many cases
|
||||
# TODO: It would be good to be able to use Python3_add_library target, but it
|
||||
# does not work in many cases
|
||||
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" DEBUG_POSTFIX "")
|
||||
if(WIN32)
|
||||
set_target_properties(${PROJECT_NAME} PROPERTIES SUFFIX ".pyd")
|
||||
@ -40,6 +45,7 @@ else()
|
||||
endif()
|
||||
# Needed to link functorch on MacOS
|
||||
if(NOT ${TORCH_PYTHON_LINK_FLAGS} STREQUAL "")
|
||||
set_target_properties(${PROJECT_NAME} PROPERTIES LINK_FLAGS ${TORCH_PYTHON_LINK_FLAGS})
|
||||
set_target_properties(${PROJECT_NAME} PROPERTIES LINK_FLAGS
|
||||
${TORCH_PYTHON_LINK_FLAGS})
|
||||
endif()
|
||||
install(TARGETS ${PROJECT_NAME} DESTINATION "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
|
||||
@ -2,24 +2,21 @@ set(AOTI_ABI_CHECK_TEST_ROOT ${TORCH_ROOT}/test/cpp/aoti_abi_check)
|
||||
|
||||
# Build the cpp gtest binary containing the cpp-only tests.
|
||||
set(AOTI_ABI_CHECK_TEST_SRCS
|
||||
${AOTI_ABI_CHECK_TEST_ROOT}/main.cpp
|
||||
${AOTI_ABI_CHECK_TEST_ROOT}/test_cast.cpp
|
||||
${AOTI_ABI_CHECK_TEST_ROOT}/test_dtype.cpp
|
||||
${AOTI_ABI_CHECK_TEST_ROOT}/test_macros.cpp
|
||||
${AOTI_ABI_CHECK_TEST_ROOT}/test_math.cpp
|
||||
${AOTI_ABI_CHECK_TEST_ROOT}/test_rand.cpp
|
||||
${AOTI_ABI_CHECK_TEST_ROOT}/test_vec.cpp
|
||||
)
|
||||
${AOTI_ABI_CHECK_TEST_ROOT}/main.cpp
|
||||
${AOTI_ABI_CHECK_TEST_ROOT}/test_cast.cpp
|
||||
${AOTI_ABI_CHECK_TEST_ROOT}/test_dtype.cpp
|
||||
${AOTI_ABI_CHECK_TEST_ROOT}/test_macros.cpp
|
||||
${AOTI_ABI_CHECK_TEST_ROOT}/test_math.cpp
|
||||
${AOTI_ABI_CHECK_TEST_ROOT}/test_rand.cpp
|
||||
${AOTI_ABI_CHECK_TEST_ROOT}/test_vec.cpp)
|
||||
|
||||
add_executable(test_aoti_abi_check
|
||||
${AOTI_ABI_CHECK_TEST_SRCS}
|
||||
)
|
||||
add_executable(test_aoti_abi_check ${AOTI_ABI_CHECK_TEST_SRCS})
|
||||
|
||||
# TODO temporary until we can delete the old gtest polyfills.
|
||||
target_compile_definitions(test_aoti_abi_check PRIVATE USE_GTEST)
|
||||
|
||||
# WARNING: DO NOT LINK torch!!!
|
||||
# The purpose is to check if the used aten/c10 headers are writtern in a header-only way
|
||||
# WARNING: DO NOT LINK torch!!! The purpose is to check if the used aten/c10
|
||||
# headers are writtern in a header-only way
|
||||
target_link_libraries(test_aoti_abi_check PRIVATE gtest_main)
|
||||
target_include_directories(test_aoti_abi_check PRIVATE ${ATen_CPU_INCLUDE})
|
||||
|
||||
@ -27,6 +24,9 @@ if(INSTALL_TEST)
|
||||
install(TARGETS test_aoti_abi_check DESTINATION bin)
|
||||
# Install PDB files for MSVC builds
|
||||
if(MSVC AND BUILD_SHARED_LIBS)
|
||||
install(FILES $<TARGET_PDB_FILE:test_aoti_abi_check> DESTINATION bin OPTIONAL)
|
||||
install(
|
||||
FILES $<TARGET_PDB_FILE:test_aoti_abi_check>
|
||||
DESTINATION bin
|
||||
OPTIONAL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -1,47 +1,40 @@
|
||||
|
||||
set(AOT_INDUCTOR_TEST_ROOT ${TORCH_ROOT}/test/cpp/aoti_inference)
|
||||
|
||||
# Build custom TorchScript op for AOTInductor
|
||||
add_library(aoti_custom_class SHARED aoti_custom_class.cpp)
|
||||
set_target_properties(aoti_custom_class PROPERTIES
|
||||
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
|
||||
set_target_properties(aoti_custom_class PROPERTIES LIBRARY_OUTPUT_DIRECTORY
|
||||
${CMAKE_CURRENT_BINARY_DIR})
|
||||
if(USE_CUDA)
|
||||
target_compile_definitions(aoti_custom_class PRIVATE USE_CUDA)
|
||||
elseif(USE_ROCM)
|
||||
target_compile_definitions(aoti_custom_class PRIVATE USE_ROCM)
|
||||
target_compile_definitions(aoti_custom_class PRIVATE USE_ROCM)
|
||||
endif()
|
||||
# Link against LibTorch
|
||||
target_link_libraries(aoti_custom_class torch)
|
||||
|
||||
# the custom command that generates the TorchScript module
|
||||
add_custom_command(
|
||||
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/script_data.pt
|
||||
${CMAKE_CURRENT_BINARY_DIR}/script_model_cpu.pt
|
||||
${CMAKE_CURRENT_BINARY_DIR}/script_model_cuda.pt
|
||||
# This script requires the torch package to be installed.
|
||||
COMMAND python ${AOT_INDUCTOR_TEST_ROOT}/compile_model.py
|
||||
DEPENDS torch torch_python aoti_custom_class ${AOT_INDUCTOR_TEST_ROOT}/compile_model.py
|
||||
)
|
||||
add_custom_target(aoti_script_model ALL
|
||||
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/script_data.pt
|
||||
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/script_model_cpu.pt
|
||||
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/script_model_cuda.pt
|
||||
)
|
||||
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/script_data.pt
|
||||
${CMAKE_CURRENT_BINARY_DIR}/script_model_cpu.pt
|
||||
${CMAKE_CURRENT_BINARY_DIR}/script_model_cuda.pt
|
||||
# This script requires the torch package to be installed.
|
||||
COMMAND python ${AOT_INDUCTOR_TEST_ROOT}/compile_model.py
|
||||
DEPENDS torch torch_python aoti_custom_class
|
||||
${AOT_INDUCTOR_TEST_ROOT}/compile_model.py)
|
||||
add_custom_target(
|
||||
aoti_script_model ALL
|
||||
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/script_data.pt
|
||||
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/script_model_cpu.pt
|
||||
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/script_model_cuda.pt)
|
||||
add_dependencies(aoti_script_model aoti_custom_class)
|
||||
|
||||
# Build the cpp gtest binary containing the cpp-only tests.
|
||||
set(INDUCTOR_TEST_SRCS
|
||||
${AOT_INDUCTOR_TEST_ROOT}/test.cpp
|
||||
)
|
||||
set(INDUCTOR_TEST_SRCS ${AOT_INDUCTOR_TEST_ROOT}/test.cpp)
|
||||
|
||||
add_executable(test_aoti_inference
|
||||
${TORCH_ROOT}/test/cpp/common/main.cpp
|
||||
${INDUCTOR_TEST_SRCS}
|
||||
data.pt
|
||||
script_data.pt
|
||||
script_model_cpu.pt
|
||||
script_model_cuda.pt
|
||||
)
|
||||
add_executable(
|
||||
test_aoti_inference
|
||||
${TORCH_ROOT}/test/cpp/common/main.cpp ${INDUCTOR_TEST_SRCS} data.pt
|
||||
script_data.pt script_model_cpu.pt script_model_cuda.pt)
|
||||
add_dependencies(test_aoti_inference aoti_custom_class aoti_script_model)
|
||||
|
||||
# TODO temporary until we can delete the old gtest polyfills.
|
||||
@ -49,32 +42,32 @@ target_compile_definitions(test_aoti_inference PRIVATE USE_GTEST)
|
||||
|
||||
# Define a custom command to generate the library
|
||||
add_custom_command(
|
||||
OUTPUT data.pt
|
||||
COMMAND python ${AOT_INDUCTOR_TEST_ROOT}/test.py
|
||||
DEPENDS ${AOT_INDUCTOR_TEST_ROOT}/test.py
|
||||
)
|
||||
OUTPUT data.pt
|
||||
COMMAND python ${AOT_INDUCTOR_TEST_ROOT}/test.py
|
||||
DEPENDS ${AOT_INDUCTOR_TEST_ROOT}/test.py)
|
||||
|
||||
target_link_libraries(test_aoti_inference PRIVATE
|
||||
torch
|
||||
gtest_main
|
||||
-Wl,--no-as-needed aoti_custom_class
|
||||
)
|
||||
target_link_libraries(
|
||||
test_aoti_inference PRIVATE torch gtest_main -Wl,--no-as-needed
|
||||
aoti_custom_class)
|
||||
|
||||
if(USE_CUDA)
|
||||
target_include_directories(test_aoti_inference PRIVATE ${ATen_CUDA_INCLUDE})
|
||||
target_compile_definitions(test_aoti_inference PRIVATE USE_CUDA)
|
||||
elseif(USE_ROCM)
|
||||
target_include_directories(test_aoti_inference PRIVATE ${ATen_HIP_INCLUDE})
|
||||
target_compile_definitions(test_aoti_inference PRIVATE USE_ROCM)
|
||||
target_include_directories(test_aoti_inference PRIVATE ${ATen_HIP_INCLUDE})
|
||||
target_compile_definitions(test_aoti_inference PRIVATE USE_ROCM)
|
||||
endif()
|
||||
target_compile_definitions(test_aoti_inference PRIVATE
|
||||
CMAKE_CURRENT_BINARY_DIR=${CMAKE_CURRENT_BINARY_DIR}
|
||||
)
|
||||
target_compile_definitions(
|
||||
test_aoti_inference
|
||||
PRIVATE CMAKE_CURRENT_BINARY_DIR=${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
if(INSTALL_TEST)
|
||||
install(TARGETS test_aoti_inference DESTINATION bin)
|
||||
# Install PDB files for MSVC builds
|
||||
if(MSVC AND BUILD_SHARED_LIBS)
|
||||
install(FILES $<TARGET_PDB_FILE:test_aoti_inference> DESTINATION bin OPTIONAL)
|
||||
install(
|
||||
FILES $<TARGET_PDB_FILE:test_aoti_inference>
|
||||
DESTINATION bin
|
||||
OPTIONAL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -1,49 +1,48 @@
|
||||
set(TORCH_API_TEST_DIR "${TORCH_ROOT}/test/cpp/api")
|
||||
set(TORCH_API_TEST_SOURCES
|
||||
${TORCH_ROOT}/test/cpp/common/main.cpp
|
||||
${TORCH_API_TEST_DIR}/autograd.cpp
|
||||
${TORCH_API_TEST_DIR}/any.cpp
|
||||
${TORCH_API_TEST_DIR}/dataloader.cpp
|
||||
${TORCH_API_TEST_DIR}/enum.cpp
|
||||
${TORCH_API_TEST_DIR}/expanding-array.cpp
|
||||
${TORCH_API_TEST_DIR}/fft.cpp
|
||||
${TORCH_API_TEST_DIR}/functional.cpp
|
||||
${TORCH_API_TEST_DIR}/init.cpp
|
||||
${TORCH_API_TEST_DIR}/integration.cpp
|
||||
${TORCH_API_TEST_DIR}/ivalue.cpp
|
||||
${TORCH_API_TEST_DIR}/jit.cpp
|
||||
${TORCH_API_TEST_DIR}/memory.cpp
|
||||
${TORCH_API_TEST_DIR}/meta_tensor.cpp
|
||||
${TORCH_API_TEST_DIR}/misc.cpp
|
||||
${TORCH_API_TEST_DIR}/module.cpp
|
||||
${TORCH_API_TEST_DIR}/moduledict.cpp
|
||||
${TORCH_API_TEST_DIR}/modulelist.cpp
|
||||
${TORCH_API_TEST_DIR}/modules.cpp
|
||||
${TORCH_API_TEST_DIR}/nested.cpp
|
||||
${TORCH_API_TEST_DIR}/parameterdict.cpp
|
||||
${TORCH_API_TEST_DIR}/parameterlist.cpp
|
||||
${TORCH_API_TEST_DIR}/namespace.cpp
|
||||
${TORCH_API_TEST_DIR}/nn_utils.cpp
|
||||
${TORCH_API_TEST_DIR}/optim.cpp
|
||||
${TORCH_API_TEST_DIR}/ordered_dict.cpp
|
||||
${TORCH_API_TEST_DIR}/rnn.cpp
|
||||
${TORCH_API_TEST_DIR}/sequential.cpp
|
||||
${TORCH_API_TEST_DIR}/transformer.cpp
|
||||
${TORCH_API_TEST_DIR}/serialize.cpp
|
||||
${TORCH_API_TEST_DIR}/special.cpp
|
||||
${TORCH_API_TEST_DIR}/static.cpp
|
||||
${TORCH_API_TEST_DIR}/support.cpp
|
||||
${TORCH_API_TEST_DIR}/tensor_cuda.cpp
|
||||
${TORCH_API_TEST_DIR}/tensor_indexing.cpp
|
||||
${TORCH_API_TEST_DIR}/tensor_options_cuda.cpp
|
||||
${TORCH_API_TEST_DIR}/tensor_options.cpp
|
||||
${TORCH_API_TEST_DIR}/tensor.cpp
|
||||
${TORCH_API_TEST_DIR}/torch_include.cpp
|
||||
${TORCH_API_TEST_DIR}/inference_mode.cpp
|
||||
${TORCH_API_TEST_DIR}/grad_mode.cpp
|
||||
${TORCH_API_TEST_DIR}/operations.cpp
|
||||
${TORCH_API_TEST_DIR}/nested_int.cpp
|
||||
)
|
||||
${TORCH_ROOT}/test/cpp/common/main.cpp
|
||||
${TORCH_API_TEST_DIR}/autograd.cpp
|
||||
${TORCH_API_TEST_DIR}/any.cpp
|
||||
${TORCH_API_TEST_DIR}/dataloader.cpp
|
||||
${TORCH_API_TEST_DIR}/enum.cpp
|
||||
${TORCH_API_TEST_DIR}/expanding-array.cpp
|
||||
${TORCH_API_TEST_DIR}/fft.cpp
|
||||
${TORCH_API_TEST_DIR}/functional.cpp
|
||||
${TORCH_API_TEST_DIR}/init.cpp
|
||||
${TORCH_API_TEST_DIR}/integration.cpp
|
||||
${TORCH_API_TEST_DIR}/ivalue.cpp
|
||||
${TORCH_API_TEST_DIR}/jit.cpp
|
||||
${TORCH_API_TEST_DIR}/memory.cpp
|
||||
${TORCH_API_TEST_DIR}/meta_tensor.cpp
|
||||
${TORCH_API_TEST_DIR}/misc.cpp
|
||||
${TORCH_API_TEST_DIR}/module.cpp
|
||||
${TORCH_API_TEST_DIR}/moduledict.cpp
|
||||
${TORCH_API_TEST_DIR}/modulelist.cpp
|
||||
${TORCH_API_TEST_DIR}/modules.cpp
|
||||
${TORCH_API_TEST_DIR}/nested.cpp
|
||||
${TORCH_API_TEST_DIR}/parameterdict.cpp
|
||||
${TORCH_API_TEST_DIR}/parameterlist.cpp
|
||||
${TORCH_API_TEST_DIR}/namespace.cpp
|
||||
${TORCH_API_TEST_DIR}/nn_utils.cpp
|
||||
${TORCH_API_TEST_DIR}/optim.cpp
|
||||
${TORCH_API_TEST_DIR}/ordered_dict.cpp
|
||||
${TORCH_API_TEST_DIR}/rnn.cpp
|
||||
${TORCH_API_TEST_DIR}/sequential.cpp
|
||||
${TORCH_API_TEST_DIR}/transformer.cpp
|
||||
${TORCH_API_TEST_DIR}/serialize.cpp
|
||||
${TORCH_API_TEST_DIR}/special.cpp
|
||||
${TORCH_API_TEST_DIR}/static.cpp
|
||||
${TORCH_API_TEST_DIR}/support.cpp
|
||||
${TORCH_API_TEST_DIR}/tensor_cuda.cpp
|
||||
${TORCH_API_TEST_DIR}/tensor_indexing.cpp
|
||||
${TORCH_API_TEST_DIR}/tensor_options_cuda.cpp
|
||||
${TORCH_API_TEST_DIR}/tensor_options.cpp
|
||||
${TORCH_API_TEST_DIR}/tensor.cpp
|
||||
${TORCH_API_TEST_DIR}/torch_include.cpp
|
||||
${TORCH_API_TEST_DIR}/inference_mode.cpp
|
||||
${TORCH_API_TEST_DIR}/grad_mode.cpp
|
||||
${TORCH_API_TEST_DIR}/operations.cpp
|
||||
${TORCH_API_TEST_DIR}/nested_int.cpp)
|
||||
if(USE_CUDA OR USE_ROCM)
|
||||
list(APPEND TORCH_API_TEST_SOURCES ${TORCH_API_TEST_DIR}/parallel.cpp)
|
||||
endif()
|
||||
@ -57,8 +56,8 @@ if(USE_CUDA)
|
||||
endif()
|
||||
|
||||
if(NOT MSVC)
|
||||
# Clang has an unfixed bug leading to spurious missing braces
|
||||
# warnings, see https://bugs.llvm.org/show_bug.cgi?id=21629
|
||||
# Clang has an unfixed bug leading to spurious missing braces warnings, see
|
||||
# https://bugs.llvm.org/show_bug.cgi?id=21629
|
||||
target_compile_options_if_supported(test_api "-Wno-missing-braces")
|
||||
# Considered to be flaky. See the discussion at
|
||||
# https://github.com/pytorch/pytorch/pull/9608
|
||||
@ -67,17 +66,24 @@ if(NOT MSVC)
|
||||
target_compile_options_if_supported(test_api "-Wno-unused-but-set-parameter")
|
||||
|
||||
# Add -Wno-error=nonnull for GCC 12+
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12)
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION
|
||||
VERSION_GREATER_EQUAL 12)
|
||||
target_compile_options_if_supported(test_api "-Wno-error=nonnull")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(INSTALL_TEST)
|
||||
set_target_properties(test_api PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
|
||||
set_target_properties(
|
||||
test_api
|
||||
PROPERTIES INSTALL_RPATH
|
||||
"${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
|
||||
install(TARGETS test_api DESTINATION bin)
|
||||
# Install PDB files for MSVC builds
|
||||
if(MSVC AND BUILD_SHARED_LIBS)
|
||||
install(FILES $<TARGET_PDB_FILE:test_api> DESTINATION bin OPTIONAL)
|
||||
install(
|
||||
FILES $<TARGET_PDB_FILE:test_api>
|
||||
DESTINATION bin
|
||||
OPTIONAL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
if(USE_CUDA)
|
||||
add_library(c10d_cuda_test CUDATest.cu)
|
||||
target_include_directories(c10d_cuda_test PRIVATE $<BUILD_INTERFACE:${TORCH_SRC_DIR}/csrc/distributed>)
|
||||
target_include_directories(
|
||||
c10d_cuda_test PRIVATE $<BUILD_INTERFACE:${TORCH_SRC_DIR}/csrc/distributed>)
|
||||
target_link_libraries(c10d_cuda_test torch_cuda)
|
||||
add_dependencies(c10d_cuda_test torch_cuda)
|
||||
endif()
|
||||
@ -12,49 +13,82 @@ function(c10d_add_test test_src)
|
||||
set(multiValues LINK_LIBRARIES)
|
||||
|
||||
include(CMakeParseArguments)
|
||||
cmake_parse_arguments(${prefix} "${noValues}" "${singleValues}" "${multiValues}" ${ARGN})
|
||||
cmake_parse_arguments(${prefix} "${noValues}" "${singleValues}"
|
||||
"${multiValues}" ${ARGN})
|
||||
|
||||
get_filename_component(test_name ${test_src} NAME_WE)
|
||||
add_executable(${test_name} "${test_src}")
|
||||
target_include_directories(${test_name} PRIVATE
|
||||
target_include_directories(
|
||||
${test_name}
|
||||
PRIVATE
|
||||
$<BUILD_INTERFACE:${TORCH_SRC_DIR}/csrc/distributed>
|
||||
$<TARGET_PROPERTY:fmt::fmt-header-only,INTERFACE_INCLUDE_DIRECTORIES>
|
||||
)
|
||||
target_link_libraries(${test_name} PRIVATE
|
||||
fmt::fmt-header-only
|
||||
${ARG_LINK_LIBRARIES}
|
||||
)
|
||||
$<TARGET_PROPERTY:fmt::fmt-header-only,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
target_link_libraries(${test_name} PRIVATE fmt::fmt-header-only
|
||||
${ARG_LINK_LIBRARIES})
|
||||
add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
|
||||
|
||||
if(ARG_INSTALL_TEST)
|
||||
set_target_properties(${test_name} PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
|
||||
set_target_properties(
|
||||
${test_name}
|
||||
PROPERTIES INSTALL_RPATH
|
||||
"${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
|
||||
install(TARGETS ${test_name} DESTINATION bin)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
c10d_add_test(BackoffTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST OFF)
|
||||
c10d_add_test(FileStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST})
|
||||
c10d_add_test(TCPStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST})
|
||||
c10d_add_test(BackoffTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST
|
||||
OFF)
|
||||
c10d_add_test(FileStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main
|
||||
INSTALL_TEST ${INSTALL_TEST})
|
||||
c10d_add_test(TCPStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST
|
||||
${INSTALL_TEST})
|
||||
if(NOT WIN32)
|
||||
c10d_add_test(HashStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST})
|
||||
c10d_add_test(HashStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main
|
||||
INSTALL_TEST ${INSTALL_TEST})
|
||||
endif()
|
||||
|
||||
if(USE_CUDA)
|
||||
if(USE_GLOO AND USE_C10D_GLOO)
|
||||
c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main INSTALL_TEST ${INSTALL_TEST})
|
||||
c10d_add_test(ProcessGroupGlooAsyncTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main INSTALL_TEST ${INSTALL_TEST})
|
||||
c10d_add_test(
|
||||
ProcessGroupGlooTest.cpp
|
||||
LINK_LIBRARIES
|
||||
torch_cpu
|
||||
c10d_cuda_test
|
||||
gtest_main
|
||||
INSTALL_TEST
|
||||
${INSTALL_TEST})
|
||||
c10d_add_test(
|
||||
ProcessGroupGlooAsyncTest.cpp
|
||||
LINK_LIBRARIES
|
||||
torch_cpu
|
||||
c10d_cuda_test
|
||||
gtest_main
|
||||
INSTALL_TEST
|
||||
${INSTALL_TEST})
|
||||
endif()
|
||||
if(USE_NCCL AND USE_C10D_NCCL)
|
||||
# NCCL is a private dependency of libtorch, but the tests include some
|
||||
# private headers of libtorch, which in turn include NCCL. As a hacky
|
||||
# alternative to making NCCL a public dependency of libtorch, we make it
|
||||
# a private dependency of the tests as well.
|
||||
# alternative to making NCCL a public dependency of libtorch, we make it a
|
||||
# private dependency of the tests as well.
|
||||
c10d_add_test(
|
||||
ProcessGroupNCCLTest.cpp
|
||||
LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
|
||||
LINK_LIBRARIES
|
||||
torch_cpu
|
||||
c10d_cuda_test
|
||||
gtest_main
|
||||
__caffe2_nccl
|
||||
INSTALL_TEST
|
||||
${INSTALL_TEST})
|
||||
c10d_add_test(
|
||||
ProcessGroupNCCLErrorsTest.cpp
|
||||
LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
|
||||
LINK_LIBRARIES
|
||||
torch_cpu
|
||||
c10d_cuda_test
|
||||
gtest_main
|
||||
__caffe2_nccl
|
||||
INSTALL_TEST
|
||||
${INSTALL_TEST})
|
||||
if(INSTALL_TEST)
|
||||
install(TARGETS c10d_cuda_test DESTINATION lib)
|
||||
endif()
|
||||
@ -62,33 +96,45 @@ if(USE_CUDA)
|
||||
if(USE_UCC AND USE_C10D_UCC)
|
||||
# UCC is a private dependency of libtorch, but the tests include some
|
||||
# private headers of libtorch, which in turn include UCC. As a hacky
|
||||
# alternative to making UCC a public dependency of libtorch, we make it
|
||||
# a private dependency of the tests as well.
|
||||
# alternative to making UCC a public dependency of libtorch, we make it a
|
||||
# private dependency of the tests as well.
|
||||
c10d_add_test(
|
||||
ProcessGroupUCCTest.cpp
|
||||
LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_ucc INSTALL_TEST ${INSTALL_TEST})
|
||||
LINK_LIBRARIES
|
||||
torch_cpu
|
||||
c10d_cuda_test
|
||||
gtest_main
|
||||
__caffe2_ucc
|
||||
INSTALL_TEST
|
||||
${INSTALL_TEST})
|
||||
if(INSTALL_TEST)
|
||||
install(TARGETS c10d_cuda_test DESTINATION lib)
|
||||
endif()
|
||||
endif()
|
||||
else()
|
||||
if(USE_GLOO AND USE_C10D_GLOO)
|
||||
c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST OFF)
|
||||
c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu gtest_main
|
||||
INSTALL_TEST OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(USE_MPI AND USE_C10D_MPI)
|
||||
add_definitions(-DMPIEXEC=${MPIEXEC})
|
||||
# MPI is a private dependency of libtorch, but the tests include some
|
||||
# private headers of libtorch, which in turn include MPI. As a hacky
|
||||
# alternative to making MPI a public dependency of libtorch, we make it
|
||||
# a private dependency of the tests as well.
|
||||
c10d_add_test(ProcessGroupMPITest.cpp LINK_LIBRARIES torch_cpu MPI::MPI_CXX INSTALL_TEST ${INSTALL_TEST})
|
||||
# MPI is a private dependency of libtorch, but the tests include some private
|
||||
# headers of libtorch, which in turn include MPI. As a hacky alternative to
|
||||
# making MPI a public dependency of libtorch, we make it a private dependency
|
||||
# of the tests as well.
|
||||
c10d_add_test(ProcessGroupMPITest.cpp LINK_LIBRARIES torch_cpu MPI::MPI_CXX
|
||||
INSTALL_TEST ${INSTALL_TEST})
|
||||
endif()
|
||||
|
||||
if(LINUX AND USE_GLOO AND USE_C10D_GLOO)
|
||||
if(LINUX
|
||||
AND USE_GLOO
|
||||
AND USE_C10D_GLOO)
|
||||
add_executable(example_allreduce example/allreduce.cpp)
|
||||
target_include_directories(example_allreduce PRIVATE $<BUILD_INTERFACE:${TORCH_SRC_DIR}/csrc/distributed>)
|
||||
target_include_directories(
|
||||
example_allreduce
|
||||
PRIVATE $<BUILD_INTERFACE:${TORCH_SRC_DIR}/csrc/distributed>)
|
||||
target_link_libraries(example_allreduce torch_cpu)
|
||||
if(USE_CUDA)
|
||||
target_link_libraries(example_allreduce torch_cuda)
|
||||
|
||||
@ -1,9 +1,8 @@
|
||||
if(USE_DISTRIBUTED AND NOT WIN32)
|
||||
set(DIST_AUTOGRAD_TEST_DIR "${TORCH_ROOT}/test/cpp/dist_autograd")
|
||||
set(DIST_AUTOGRAD_TEST_SOURCES
|
||||
${TORCH_ROOT}/test/cpp/common/main.cpp
|
||||
${DIST_AUTOGRAD_TEST_DIR}/test_dist_autograd.cpp
|
||||
)
|
||||
${TORCH_ROOT}/test/cpp/common/main.cpp
|
||||
${DIST_AUTOGRAD_TEST_DIR}/test_dist_autograd.cpp)
|
||||
|
||||
add_executable(test_dist_autograd ${DIST_AUTOGRAD_TEST_SOURCES})
|
||||
target_include_directories(test_dist_autograd PRIVATE ${ATen_CPU_INCLUDE})
|
||||
@ -14,11 +13,17 @@ if(USE_DISTRIBUTED AND NOT WIN32)
|
||||
endif()
|
||||
|
||||
if(INSTALL_TEST)
|
||||
set_target_properties(test_dist_autograd PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
|
||||
set_target_properties(
|
||||
test_dist_autograd
|
||||
PROPERTIES INSTALL_RPATH
|
||||
"${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
|
||||
install(TARGETS test_dist_autograd DESTINATION bin)
|
||||
# Install PDB files for MSVC builds
|
||||
if(MSVC AND BUILD_SHARED_LIBS)
|
||||
install(FILES $<TARGET_PDB_FILE:test_dist_autograd> DESTINATION bin OPTIONAL)
|
||||
install(
|
||||
FILES $<TARGET_PDB_FILE:test_dist_autograd>
|
||||
DESTINATION bin
|
||||
OPTIONAL)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -1,33 +1,30 @@
|
||||
set(JIT_TEST_ROOT ${TORCH_ROOT}/test/cpp/jit)
|
||||
|
||||
# Build separate libraries the define custom classes/operators used from our Python tests.
|
||||
# These are intended to be used with torch.ops.load_library() in our Python test suite.
|
||||
add_library(torchbind_test SHARED
|
||||
${JIT_TEST_ROOT}/test_custom_class_registrations.h
|
||||
${JIT_TEST_ROOT}/test_custom_class_registrations.cpp
|
||||
)
|
||||
# Build separate libraries the define custom classes/operators used from our
|
||||
# Python tests. These are intended to be used with torch.ops.load_library() in
|
||||
# our Python test suite.
|
||||
add_library(
|
||||
torchbind_test SHARED ${JIT_TEST_ROOT}/test_custom_class_registrations.h
|
||||
${JIT_TEST_ROOT}/test_custom_class_registrations.cpp)
|
||||
target_link_libraries(torchbind_test torch)
|
||||
|
||||
add_library(jitbackend_test SHARED ${JIT_TEST_ROOT}/test_backend_lib.cpp)
|
||||
target_link_libraries(jitbackend_test torch)
|
||||
|
||||
set(BACKEND_WITH_COMPILER_SRCS
|
||||
${JIT_TEST_ROOT}/test_backend_compiler_lib.cpp
|
||||
${JIT_TEST_ROOT}/test_backend_compiler_preprocess.cpp
|
||||
)
|
||||
${JIT_TEST_ROOT}/test_backend_compiler_lib.cpp
|
||||
${JIT_TEST_ROOT}/test_backend_compiler_preprocess.cpp)
|
||||
if(USE_KINETO)
|
||||
# Testing edge profiler for backend use
|
||||
# profiler_edge should only be added when USE_KINETO flag is on
|
||||
# Testing edge profiler for backend use profiler_edge should only be added
|
||||
# when USE_KINETO flag is on
|
||||
list(APPEND BACKEND_WITH_COMPILER_SRCS
|
||||
${TORCH_SRC_DIR}/csrc/jit/mobile/profiler_edge.cpp)
|
||||
${TORCH_SRC_DIR}/csrc/jit/mobile/profiler_edge.cpp)
|
||||
endif()
|
||||
|
||||
add_library(backend_with_compiler SHARED
|
||||
${BACKEND_WITH_COMPILER_SRCS}
|
||||
)
|
||||
add_library(backend_with_compiler SHARED ${BACKEND_WITH_COMPILER_SRCS})
|
||||
if(USE_KINETO)
|
||||
set_target_properties(backend_with_compiler PROPERTIES COMPILE_FLAGS
|
||||
"-DUSE_KINETO")
|
||||
"-DUSE_KINETO")
|
||||
endif()
|
||||
target_link_libraries(backend_with_compiler torch)
|
||||
|
||||
@ -39,68 +36,64 @@ endif()
|
||||
|
||||
# Build the cpp gtest binary containing the cpp-only tests.
|
||||
set(JIT_TEST_SRCS
|
||||
${JIT_TEST_ROOT}/source_range_test.cpp
|
||||
${JIT_TEST_ROOT}/test_add_if_then_else.cpp
|
||||
${JIT_TEST_ROOT}/test_alias_analysis.cpp
|
||||
${JIT_TEST_ROOT}/test_argument_spec.cpp
|
||||
${JIT_TEST_ROOT}/test_autodiff.cpp
|
||||
${JIT_TEST_ROOT}/test_load_upgraders.cpp
|
||||
${JIT_TEST_ROOT}/test_op_replacement.cpp
|
||||
${JIT_TEST_ROOT}/test_upgrader_utils.cpp
|
||||
${JIT_TEST_ROOT}/test_backend.cpp
|
||||
${JIT_TEST_ROOT}/test_class_import.cpp
|
||||
${JIT_TEST_ROOT}/test_class_parser.cpp
|
||||
${JIT_TEST_ROOT}/test_class_type.cpp
|
||||
${JIT_TEST_ROOT}/test_code_template.cpp
|
||||
${JIT_TEST_ROOT}/test_concat_opt.cpp
|
||||
${JIT_TEST_ROOT}/test_constant_pooling.cpp
|
||||
${JIT_TEST_ROOT}/test_cleanup_passes.cpp
|
||||
${JIT_TEST_ROOT}/test_create_autodiff_subgraphs.cpp
|
||||
${JIT_TEST_ROOT}/test_custom_class.cpp
|
||||
${JIT_TEST_ROOT}/test_custom_class_registrations.h
|
||||
${JIT_TEST_ROOT}/test_custom_class_registrations.cpp
|
||||
${JIT_TEST_ROOT}/test_custom_operators.cpp
|
||||
${JIT_TEST_ROOT}/test_dce.cpp
|
||||
${JIT_TEST_ROOT}/test_fuser.cpp
|
||||
${JIT_TEST_ROOT}/test_graph_executor.cpp
|
||||
${JIT_TEST_ROOT}/test_graph_iterator.cpp
|
||||
${JIT_TEST_ROOT}/test_cs_debug_info_serialization.cpp
|
||||
${JIT_TEST_ROOT}/test_inliner.cpp
|
||||
${JIT_TEST_ROOT}/test_interface.cpp
|
||||
${JIT_TEST_ROOT}/test_interpreter.cpp
|
||||
${JIT_TEST_ROOT}/test_ir.cpp
|
||||
${JIT_TEST_ROOT}/test_irparser.cpp
|
||||
${JIT_TEST_ROOT}/test_jit_type.cpp
|
||||
${JIT_TEST_ROOT}/test_lexer.cpp
|
||||
${JIT_TEST_ROOT}/test_lite_interpreter.cpp
|
||||
${JIT_TEST_ROOT}/test_lite_interpreter_direct.cpp
|
||||
${JIT_TEST_ROOT}/test_lite_trainer.cpp
|
||||
${JIT_TEST_ROOT}/test_memory_dag.cpp
|
||||
${JIT_TEST_ROOT}/test_misc.cpp
|
||||
${JIT_TEST_ROOT}/test_mobile_type_parser.cpp
|
||||
${JIT_TEST_ROOT}/test_module_api.cpp
|
||||
${JIT_TEST_ROOT}/test_peephole_optimize.cpp
|
||||
${JIT_TEST_ROOT}/test_qualified_name.cpp
|
||||
${JIT_TEST_ROOT}/test_save_load.cpp
|
||||
${JIT_TEST_ROOT}/test_schema_info.cpp
|
||||
${JIT_TEST_ROOT}/test_schema_matching.cpp
|
||||
${JIT_TEST_ROOT}/test_stack_opt.cpp
|
||||
${JIT_TEST_ROOT}/test_subgraph_matcher.cpp
|
||||
${JIT_TEST_ROOT}/test_subgraph_rewriter.cpp
|
||||
${JIT_TEST_ROOT}/test_subgraph_utils.cpp
|
||||
${JIT_TEST_ROOT}/test_union.cpp
|
||||
${JIT_TEST_ROOT}/test_utils.cpp
|
||||
${JIT_TEST_ROOT}/test_script_profile.cpp
|
||||
${JIT_TEST_ROOT}/test_shape_analysis.cpp
|
||||
${JIT_TEST_ROOT}/test_jit_logging_levels.cpp
|
||||
${JIT_TEST_ROOT}/test_file_format.cpp
|
||||
${JIT_TEST_ROOT}/test_flatbuffer.cpp
|
||||
)
|
||||
${JIT_TEST_ROOT}/source_range_test.cpp
|
||||
${JIT_TEST_ROOT}/test_add_if_then_else.cpp
|
||||
${JIT_TEST_ROOT}/test_alias_analysis.cpp
|
||||
${JIT_TEST_ROOT}/test_argument_spec.cpp
|
||||
${JIT_TEST_ROOT}/test_autodiff.cpp
|
||||
${JIT_TEST_ROOT}/test_load_upgraders.cpp
|
||||
${JIT_TEST_ROOT}/test_op_replacement.cpp
|
||||
${JIT_TEST_ROOT}/test_upgrader_utils.cpp
|
||||
${JIT_TEST_ROOT}/test_backend.cpp
|
||||
${JIT_TEST_ROOT}/test_class_import.cpp
|
||||
${JIT_TEST_ROOT}/test_class_parser.cpp
|
||||
${JIT_TEST_ROOT}/test_class_type.cpp
|
||||
${JIT_TEST_ROOT}/test_code_template.cpp
|
||||
${JIT_TEST_ROOT}/test_concat_opt.cpp
|
||||
${JIT_TEST_ROOT}/test_constant_pooling.cpp
|
||||
${JIT_TEST_ROOT}/test_cleanup_passes.cpp
|
||||
${JIT_TEST_ROOT}/test_create_autodiff_subgraphs.cpp
|
||||
${JIT_TEST_ROOT}/test_custom_class.cpp
|
||||
${JIT_TEST_ROOT}/test_custom_class_registrations.h
|
||||
${JIT_TEST_ROOT}/test_custom_class_registrations.cpp
|
||||
${JIT_TEST_ROOT}/test_custom_operators.cpp
|
||||
${JIT_TEST_ROOT}/test_dce.cpp
|
||||
${JIT_TEST_ROOT}/test_fuser.cpp
|
||||
${JIT_TEST_ROOT}/test_graph_executor.cpp
|
||||
${JIT_TEST_ROOT}/test_graph_iterator.cpp
|
||||
${JIT_TEST_ROOT}/test_cs_debug_info_serialization.cpp
|
||||
${JIT_TEST_ROOT}/test_inliner.cpp
|
||||
${JIT_TEST_ROOT}/test_interface.cpp
|
||||
${JIT_TEST_ROOT}/test_interpreter.cpp
|
||||
${JIT_TEST_ROOT}/test_ir.cpp
|
||||
${JIT_TEST_ROOT}/test_irparser.cpp
|
||||
${JIT_TEST_ROOT}/test_jit_type.cpp
|
||||
${JIT_TEST_ROOT}/test_lexer.cpp
|
||||
${JIT_TEST_ROOT}/test_lite_interpreter.cpp
|
||||
${JIT_TEST_ROOT}/test_lite_interpreter_direct.cpp
|
||||
${JIT_TEST_ROOT}/test_lite_trainer.cpp
|
||||
${JIT_TEST_ROOT}/test_memory_dag.cpp
|
||||
${JIT_TEST_ROOT}/test_misc.cpp
|
||||
${JIT_TEST_ROOT}/test_mobile_type_parser.cpp
|
||||
${JIT_TEST_ROOT}/test_module_api.cpp
|
||||
${JIT_TEST_ROOT}/test_peephole_optimize.cpp
|
||||
${JIT_TEST_ROOT}/test_qualified_name.cpp
|
||||
${JIT_TEST_ROOT}/test_save_load.cpp
|
||||
${JIT_TEST_ROOT}/test_schema_info.cpp
|
||||
${JIT_TEST_ROOT}/test_schema_matching.cpp
|
||||
${JIT_TEST_ROOT}/test_stack_opt.cpp
|
||||
${JIT_TEST_ROOT}/test_subgraph_matcher.cpp
|
||||
${JIT_TEST_ROOT}/test_subgraph_rewriter.cpp
|
||||
${JIT_TEST_ROOT}/test_subgraph_utils.cpp
|
||||
${JIT_TEST_ROOT}/test_union.cpp
|
||||
${JIT_TEST_ROOT}/test_utils.cpp
|
||||
${JIT_TEST_ROOT}/test_script_profile.cpp
|
||||
${JIT_TEST_ROOT}/test_shape_analysis.cpp
|
||||
${JIT_TEST_ROOT}/test_jit_logging_levels.cpp
|
||||
${JIT_TEST_ROOT}/test_file_format.cpp
|
||||
${JIT_TEST_ROOT}/test_flatbuffer.cpp)
|
||||
|
||||
add_executable(test_jit
|
||||
${TORCH_ROOT}/test/cpp/common/main.cpp
|
||||
${JIT_TEST_SRCS}
|
||||
)
|
||||
add_executable(test_jit ${TORCH_ROOT}/test/cpp/common/main.cpp ${JIT_TEST_SRCS})
|
||||
|
||||
# We also build with UBSAN flag in build_asan.h
|
||||
if(USE_ASAN)
|
||||
@ -108,9 +101,7 @@ if(USE_ASAN)
|
||||
target_link_libraries(test_jit PRIVATE "-fsanitize=undefined")
|
||||
endif()
|
||||
|
||||
target_link_libraries(
|
||||
test_jit PRIVATE flatbuffers)
|
||||
|
||||
target_link_libraries(test_jit PRIVATE flatbuffers)
|
||||
|
||||
# TODO temporary until we can delete the old gtest polyfills.
|
||||
target_compile_definitions(test_jit PRIVATE USE_GTEST)
|
||||
@ -125,7 +116,8 @@ if(USE_MKLDNN)
|
||||
target_link_libraries(test_jit PRIVATE caffe2::mkldnn)
|
||||
endif()
|
||||
|
||||
set(JIT_TEST_DEPENDENCIES torch gtest_main jitbackend_test backend_with_compiler gmock)
|
||||
set(JIT_TEST_DEPENDENCIES torch gtest_main jitbackend_test
|
||||
backend_with_compiler gmock)
|
||||
|
||||
if(MSVC)
|
||||
list(APPEND JIT_TEST_DEPENDENCIES onnx_library)
|
||||
@ -135,30 +127,39 @@ target_link_libraries(test_jit PRIVATE ${JIT_TEST_DEPENDENCIES})
|
||||
target_include_directories(test_jit PRIVATE ${ATen_CPU_INCLUDE})
|
||||
|
||||
if(LINUX)
|
||||
#Update to target_link_options when CMake version can be upgraded
|
||||
target_link_libraries(test_jit PRIVATE "-Wl,--no-as-needed,$<TARGET_FILE:jitbackend_test>,$<TARGET_FILE:backend_with_compiler>,--as-needed")
|
||||
# Update to target_link_options when CMake version can be upgraded
|
||||
target_link_libraries(
|
||||
test_jit
|
||||
PRIVATE
|
||||
"-Wl,--no-as-needed,$<TARGET_FILE:jitbackend_test>,$<TARGET_FILE:backend_with_compiler>,--as-needed"
|
||||
)
|
||||
endif()
|
||||
|
||||
if(USE_CUDA)
|
||||
target_compile_definitions(test_jit PRIVATE USE_CUDA)
|
||||
# Suppress sign compare checks for NVFUSER JIT tests
|
||||
if(NOT MSVC)
|
||||
target_compile_options(test_jit PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-Wno-sign-compare>)
|
||||
target_compile_options(test_jit
|
||||
PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-Wno-sign-compare>)
|
||||
endif()
|
||||
elseif(USE_ROCM)
|
||||
target_link_libraries(test_jit PRIVATE
|
||||
hiprtc::hiprtc
|
||||
hip::amdhip64
|
||||
${TORCH_CUDA_LIBRARIES})
|
||||
target_link_libraries(test_jit PRIVATE hiprtc::hiprtc hip::amdhip64
|
||||
${TORCH_CUDA_LIBRARIES})
|
||||
|
||||
target_compile_definitions(test_jit PRIVATE USE_ROCM)
|
||||
endif()
|
||||
|
||||
if(INSTALL_TEST)
|
||||
set_target_properties(test_jit PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
|
||||
set_target_properties(
|
||||
test_jit
|
||||
PROPERTIES INSTALL_RPATH
|
||||
"${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
|
||||
install(TARGETS test_jit DESTINATION bin)
|
||||
# Install PDB files for MSVC builds
|
||||
if(MSVC AND BUILD_SHARED_LIBS)
|
||||
install(FILES $<TARGET_PDB_FILE:test_jit> DESTINATION bin OPTIONAL)
|
||||
install(
|
||||
FILES $<TARGET_PDB_FILE:test_jit>
|
||||
DESTINATION bin
|
||||
OPTIONAL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -2,28 +2,23 @@ set(LAZY_TEST_ROOT ${TORCH_ROOT}/test/cpp/lazy)
|
||||
|
||||
# Build the cpp gtest binary containing the cpp-only tests.
|
||||
set(LAZY_TEST_SRCS
|
||||
${LAZY_TEST_ROOT}/test_backend_device.cpp
|
||||
${LAZY_TEST_ROOT}/test_cache.cpp
|
||||
${LAZY_TEST_ROOT}/test_ir.cpp
|
||||
${LAZY_TEST_ROOT}/test_ir_util.cpp
|
||||
${LAZY_TEST_ROOT}/test_misc.cpp
|
||||
${LAZY_TEST_ROOT}/test_permutation_util.cpp
|
||||
${LAZY_TEST_ROOT}/test_shape.cpp
|
||||
${LAZY_TEST_ROOT}/test_trie_cache.cpp
|
||||
${LAZY_TEST_ROOT}/test_util.cpp
|
||||
${LAZY_TEST_ROOT}/test_lazy_graph_executor.cpp
|
||||
)
|
||||
${LAZY_TEST_ROOT}/test_backend_device.cpp
|
||||
${LAZY_TEST_ROOT}/test_cache.cpp
|
||||
${LAZY_TEST_ROOT}/test_ir.cpp
|
||||
${LAZY_TEST_ROOT}/test_ir_util.cpp
|
||||
${LAZY_TEST_ROOT}/test_misc.cpp
|
||||
${LAZY_TEST_ROOT}/test_permutation_util.cpp
|
||||
${LAZY_TEST_ROOT}/test_shape.cpp
|
||||
${LAZY_TEST_ROOT}/test_trie_cache.cpp
|
||||
${LAZY_TEST_ROOT}/test_util.cpp
|
||||
${LAZY_TEST_ROOT}/test_lazy_graph_executor.cpp)
|
||||
if(BUILD_LAZY_TS_BACKEND)
|
||||
list(APPEND LAZY_TEST_SRCS
|
||||
${LAZY_TEST_ROOT}/test_lazy_ops.cpp
|
||||
${LAZY_TEST_ROOT}/test_lazy_ops_util.cpp
|
||||
)
|
||||
list(APPEND LAZY_TEST_SRCS ${LAZY_TEST_ROOT}/test_lazy_ops.cpp
|
||||
${LAZY_TEST_ROOT}/test_lazy_ops_util.cpp)
|
||||
endif()
|
||||
|
||||
add_executable(test_lazy
|
||||
${TORCH_ROOT}/test/cpp/common/main.cpp
|
||||
${LAZY_TEST_SRCS}
|
||||
)
|
||||
add_executable(test_lazy ${TORCH_ROOT}/test/cpp/common/main.cpp
|
||||
${LAZY_TEST_SRCS})
|
||||
|
||||
# TODO temporary until we can delete the old gtest polyfills.
|
||||
target_compile_definitions(test_lazy PRIVATE USE_GTEST)
|
||||
@ -36,19 +31,23 @@ target_include_directories(test_lazy PRIVATE ${ATen_CPU_INCLUDE})
|
||||
if(USE_CUDA)
|
||||
target_compile_definitions(test_lazy PRIVATE USE_CUDA)
|
||||
elseif(USE_ROCM)
|
||||
target_link_libraries(test_lazy PRIVATE
|
||||
hiprtc::hiprtc
|
||||
hip::amdhip64
|
||||
${TORCH_CUDA_LIBRARIES})
|
||||
target_link_libraries(test_lazy PRIVATE hiprtc::hiprtc hip::amdhip64
|
||||
${TORCH_CUDA_LIBRARIES})
|
||||
|
||||
target_compile_definitions(test_lazy PRIVATE USE_ROCM)
|
||||
endif()
|
||||
|
||||
if(INSTALL_TEST)
|
||||
set_target_properties(test_lazy PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
|
||||
set_target_properties(
|
||||
test_lazy
|
||||
PROPERTIES INSTALL_RPATH
|
||||
"${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
|
||||
install(TARGETS test_lazy DESTINATION bin)
|
||||
# Install PDB files for MSVC builds
|
||||
if(MSVC AND BUILD_SHARED_LIBS)
|
||||
install(FILES $<TARGET_PDB_FILE:test_lazy> DESTINATION bin OPTIONAL)
|
||||
install(
|
||||
FILES $<TARGET_PDB_FILE:test_lazy>
|
||||
DESTINATION bin
|
||||
OPTIONAL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -1,30 +1,30 @@
|
||||
set(
|
||||
LITE_INTERPRETER_RUNTIME_TEST_DIR
|
||||
"${TORCH_ROOT}/test/cpp/lite_interpreter_runtime")
|
||||
set(LITE_INTERPRETER_RUNTIME_TEST_DIR
|
||||
${TORCH_ROOT}/test/cpp/lite_interpreter_runtime/main.cpp
|
||||
${TORCH_ROOT}/test/cpp/lite_interpreter_runtime/test_lite_interpreter_runtime.cpp
|
||||
${TORCH_ROOT}/test/cpp/lite_interpreter_runtime/test_mobile_profiler.cpp
|
||||
)
|
||||
"${TORCH_ROOT}/test/cpp/lite_interpreter_runtime")
|
||||
set(LITE_INTERPRETER_RUNTIME_TEST_DIR
|
||||
${TORCH_ROOT}/test/cpp/lite_interpreter_runtime/main.cpp
|
||||
${TORCH_ROOT}/test/cpp/lite_interpreter_runtime/test_lite_interpreter_runtime.cpp
|
||||
${TORCH_ROOT}/test/cpp/lite_interpreter_runtime/test_mobile_profiler.cpp)
|
||||
|
||||
add_library(backend_with_compiler_runtime SHARED
|
||||
${TORCH_ROOT}/test/cpp/jit/test_backend_compiler_lib.cpp
|
||||
${TORCH_ROOT}/torch/csrc/jit/backends/backend_interface.cpp
|
||||
)
|
||||
add_library(
|
||||
backend_with_compiler_runtime SHARED
|
||||
${TORCH_ROOT}/test/cpp/jit/test_backend_compiler_lib.cpp
|
||||
${TORCH_ROOT}/torch/csrc/jit/backends/backend_interface.cpp)
|
||||
target_link_libraries(backend_with_compiler_runtime PRIVATE torch)
|
||||
|
||||
add_executable(
|
||||
test_lite_interpreter_runtime
|
||||
${LITE_INTERPRETER_RUNTIME_TEST_DIR})
|
||||
target_include_directories(
|
||||
test_lite_interpreter_runtime PRIVATE
|
||||
${ATen_CPU_INCLUDE}
|
||||
)
|
||||
add_executable(test_lite_interpreter_runtime
|
||||
${LITE_INTERPRETER_RUNTIME_TEST_DIR})
|
||||
target_include_directories(test_lite_interpreter_runtime
|
||||
PRIVATE ${ATen_CPU_INCLUDE})
|
||||
|
||||
target_link_libraries(test_lite_interpreter_runtime PRIVATE torch gtest_main backend_with_compiler_runtime)
|
||||
target_link_libraries(test_lite_interpreter_runtime
|
||||
PRIVATE torch gtest_main backend_with_compiler_runtime)
|
||||
|
||||
if(LINUX)
|
||||
target_link_libraries(test_lite_interpreter_runtime PRIVATE "-Wl,--no-as-needed,$<TARGET_FILE:backend_with_compiler_runtime>,--as-needed")
|
||||
target_link_libraries(
|
||||
test_lite_interpreter_runtime
|
||||
PRIVATE
|
||||
"-Wl,--no-as-needed,$<TARGET_FILE:backend_with_compiler_runtime>,--as-needed"
|
||||
)
|
||||
endif()
|
||||
|
||||
if(INSTALL_TEST)
|
||||
@ -33,6 +33,7 @@ if(INSTALL_TEST)
|
||||
if(MSVC AND BUILD_SHARED_LIBS)
|
||||
install(
|
||||
FILES $<TARGET_PDB_FILE:test_lite_interpreter_runtime>
|
||||
DESTINATION bin OPTIONAL)
|
||||
DESTINATION bin
|
||||
OPTIONAL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -4,41 +4,38 @@ file(GLOB_RECURSE NATIVERT_ALL_TEST_FILES "${NATIVERT_TEST_ROOT}/test_*.cpp")
|
||||
|
||||
# Build the cpp gtest binary containing the cpp-only tests.
|
||||
set(NATIVERT_TEST_SRCS
|
||||
${NATIVERT_ALL_TEST_FILES}
|
||||
${TORCH_ROOT}/torch/nativert/graph/TensorMeta.cpp
|
||||
${TORCH_ROOT}/torch/nativert/graph/Graph.cpp
|
||||
${TORCH_ROOT}/torch/nativert/graph/GraphSignature.cpp
|
||||
${TORCH_ROOT}/torch/nativert/graph/Serialization.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/OpKernel.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/PlacementUtils.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/Weights.cpp
|
||||
${TORCH_ROOT}/torch/nativert/common/FileUtil.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/memory/FunctionSchema.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/ExecutionPlanner.cpp
|
||||
${TORCH_ROOT}/torch/nativert/detail/ITree.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/ExecutionFrame.cpp
|
||||
${TORCH_ROOT}/torch/nativert/kernels/C10Kernel.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/memory/GreedyBySize.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/memory/Bump.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/memory/DisjointStorageGroups.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/memory/LayoutPlanner.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/memory/LayoutManager.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/memory/AliasAnalyzer.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/Executor.cpp
|
||||
${TORCH_ROOT}/torch/nativert/kernels/KernelFactory.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/ConstantFolder.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/GraphExecutorBase.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/SerialGraphExecutor.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/ParallelGraphExecutor.cpp
|
||||
${TORCH_ROOT}/torch/nativert/kernels/AutoFunctionalizeKernel.cpp
|
||||
${TORCH_ROOT}/torch/nativert/kernels/CallTorchBindKernel.cpp
|
||||
${TORCH_ROOT}/torch/nativert/kernels/HigherOrderKernel.cpp
|
||||
)
|
||||
${NATIVERT_ALL_TEST_FILES}
|
||||
${TORCH_ROOT}/torch/nativert/graph/TensorMeta.cpp
|
||||
${TORCH_ROOT}/torch/nativert/graph/Graph.cpp
|
||||
${TORCH_ROOT}/torch/nativert/graph/GraphSignature.cpp
|
||||
${TORCH_ROOT}/torch/nativert/graph/Serialization.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/OpKernel.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/PlacementUtils.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/Weights.cpp
|
||||
${TORCH_ROOT}/torch/nativert/common/FileUtil.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/memory/FunctionSchema.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/ExecutionPlanner.cpp
|
||||
${TORCH_ROOT}/torch/nativert/detail/ITree.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/ExecutionFrame.cpp
|
||||
${TORCH_ROOT}/torch/nativert/kernels/C10Kernel.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/memory/GreedyBySize.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/memory/Bump.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/memory/DisjointStorageGroups.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/memory/LayoutPlanner.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/memory/LayoutManager.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/memory/AliasAnalyzer.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/Executor.cpp
|
||||
${TORCH_ROOT}/torch/nativert/kernels/KernelFactory.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/ConstantFolder.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/GraphExecutorBase.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/SerialGraphExecutor.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/ParallelGraphExecutor.cpp
|
||||
${TORCH_ROOT}/torch/nativert/kernels/AutoFunctionalizeKernel.cpp
|
||||
${TORCH_ROOT}/torch/nativert/kernels/CallTorchBindKernel.cpp
|
||||
${TORCH_ROOT}/torch/nativert/kernels/HigherOrderKernel.cpp)
|
||||
|
||||
add_executable(test_nativert
|
||||
${TORCH_ROOT}/test/cpp/common/main.cpp
|
||||
${NATIVERT_TEST_SRCS}
|
||||
)
|
||||
add_executable(test_nativert ${TORCH_ROOT}/test/cpp/common/main.cpp
|
||||
${NATIVERT_TEST_SRCS})
|
||||
|
||||
# TODO temporary until we can delete the old gtest polyfills.
|
||||
target_compile_definitions(test_nativert PRIVATE USE_GTEST)
|
||||
@ -52,19 +49,23 @@ target_include_directories(test_nativert PRIVATE ${ATen_CPU_INCLUDE})
|
||||
if(USE_CUDA)
|
||||
target_compile_definitions(test_nativert PRIVATE USE_CUDA)
|
||||
elseif(USE_ROCM)
|
||||
target_link_libraries(test_nativert PRIVATE
|
||||
hiprtc::hiprtc
|
||||
hip::amdhip64
|
||||
${TORCH_CUDA_LIBRARIES})
|
||||
target_link_libraries(test_nativert PRIVATE hiprtc::hiprtc hip::amdhip64
|
||||
${TORCH_CUDA_LIBRARIES})
|
||||
|
||||
target_compile_definitions(test_nativert PRIVATE USE_ROCM)
|
||||
endif()
|
||||
|
||||
if(INSTALL_TEST)
|
||||
set_target_properties(test_nativert PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
|
||||
set_target_properties(
|
||||
test_nativert
|
||||
PROPERTIES INSTALL_RPATH
|
||||
"${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
|
||||
install(TARGETS test_nativert DESTINATION bin)
|
||||
# Install PDB files for MSVC builds
|
||||
if(MSVC AND BUILD_SHARED_LIBS)
|
||||
install(FILES $<TARGET_PDB_FILE:test_nativert> DESTINATION bin OPTIONAL)
|
||||
install(
|
||||
FILES $<TARGET_PDB_FILE:test_nativert>
|
||||
DESTINATION bin
|
||||
OPTIONAL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -1,35 +1,25 @@
|
||||
set(TORCH_RPC_TEST_DIR "${TORCH_ROOT}/test/cpp/rpc")
|
||||
set(TORCH_RPC_TEST_SOURCES
|
||||
${TORCH_ROOT}/test/cpp/common/main.cpp
|
||||
${TORCH_RPC_TEST_DIR}/e2e_test_base.cpp
|
||||
${TORCH_RPC_TEST_DIR}/test_wire_serialization.cpp
|
||||
)
|
||||
set(TORCH_RPC_TEST_DEPENDENCY_LIBS
|
||||
torch gtest_main
|
||||
)
|
||||
${TORCH_ROOT}/test/cpp/common/main.cpp
|
||||
${TORCH_RPC_TEST_DIR}/e2e_test_base.cpp
|
||||
${TORCH_RPC_TEST_DIR}/test_wire_serialization.cpp)
|
||||
set(TORCH_RPC_TEST_DEPENDENCY_LIBS torch gtest_main)
|
||||
|
||||
if(USE_GLOO)
|
||||
list(APPEND TORCH_RPC_TEST_SOURCES
|
||||
${TORCH_RPC_TEST_DIR}/test_e2e_tensorpipe.cpp
|
||||
)
|
||||
${TORCH_RPC_TEST_DIR}/test_e2e_tensorpipe.cpp)
|
||||
endif()
|
||||
|
||||
if(USE_TENSORPIPE)
|
||||
list(APPEND TORCH_RPC_TEST_SOURCES
|
||||
${TORCH_RPC_TEST_DIR}/test_tensorpipe_serialization.cpp
|
||||
)
|
||||
list(APPEND TORCH_RPC_TEST_DEPENDENCY_LIBS
|
||||
tensorpipe
|
||||
)
|
||||
${TORCH_RPC_TEST_DIR}/test_tensorpipe_serialization.cpp)
|
||||
list(APPEND TORCH_RPC_TEST_DEPENDENCY_LIBS tensorpipe)
|
||||
endif()
|
||||
|
||||
add_executable(test_cpp_rpc ${TORCH_RPC_TEST_SOURCES})
|
||||
target_include_directories(test_cpp_rpc PRIVATE ${ATen_CPU_INCLUDE})
|
||||
target_include_directories(
|
||||
test_cpp_rpc PRIVATE
|
||||
${ATen_CPU_INCLUDE})
|
||||
target_include_directories(
|
||||
test_cpp_rpc PRIVATE
|
||||
$<BUILD_INTERFACE:${TORCH_SRC_DIR}/csrc/distributed>)
|
||||
test_cpp_rpc PRIVATE $<BUILD_INTERFACE:${TORCH_SRC_DIR}/csrc/distributed>)
|
||||
target_link_libraries(test_cpp_rpc PRIVATE ${TORCH_RPC_TEST_DEPENDENCY_LIBS})
|
||||
|
||||
if(USE_CUDA)
|
||||
@ -37,10 +27,16 @@ if(USE_CUDA)
|
||||
endif()
|
||||
|
||||
if(INSTALL_TEST)
|
||||
set_target_properties(test_cpp_rpc PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
|
||||
set_target_properties(
|
||||
test_cpp_rpc
|
||||
PROPERTIES INSTALL_RPATH
|
||||
"${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
|
||||
install(TARGETS test_cpp_rpc DESTINATION bin)
|
||||
# Install PDB files for MSVC builds
|
||||
if(MSVC AND BUILD_SHARED_LIBS)
|
||||
install(FILES $<TARGET_PDB_FILE:test_cpp_rpc> DESTINATION bin OPTIONAL)
|
||||
install(
|
||||
FILES $<TARGET_PDB_FILE:test_cpp_rpc>
|
||||
DESTINATION bin
|
||||
OPTIONAL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -1,30 +1,29 @@
|
||||
set(TENSOREXPR_TEST_ROOT ${TORCH_ROOT}/test/cpp/tensorexpr)
|
||||
|
||||
set(TENSOREXPR_TEST_SRCS
|
||||
${TENSOREXPR_TEST_ROOT}/test_approx.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_aten.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_boundsinference.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_conv.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_cpp_codegen.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_dynamic_shapes.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_expr.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_external_calls.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_graph_opt.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_ir_printer.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_ir_verifier.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_kernel.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_loopnest.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_memdependency.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_ops.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_quantization.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_memplanning.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_reductions.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_registerizer.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_simplify.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_te_fuser_pass.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_type.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_type_specializations.cpp
|
||||
)
|
||||
${TENSOREXPR_TEST_ROOT}/test_approx.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_aten.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_boundsinference.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_conv.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_cpp_codegen.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_dynamic_shapes.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_expr.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_external_calls.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_graph_opt.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_ir_printer.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_ir_verifier.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_kernel.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_loopnest.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_memdependency.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_ops.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_quantization.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_memplanning.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_reductions.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_registerizer.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_simplify.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_te_fuser_pass.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_type.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/test_type_specializations.cpp)
|
||||
|
||||
if(USE_CUDA)
|
||||
list(APPEND TENSOREXPR_TEST_SRCS ${TENSOREXPR_TEST_ROOT}/test_cuda.cpp)
|
||||
@ -34,10 +33,10 @@ if(USE_LLVM AND LLVM_FOUND)
|
||||
list(APPEND TENSOREXPR_TEST_SRCS ${TENSOREXPR_TEST_ROOT}/test_llvm.cpp)
|
||||
endif()
|
||||
|
||||
add_executable(test_tensorexpr
|
||||
add_executable(
|
||||
test_tensorexpr
|
||||
${TORCH_ROOT}/test/cpp/common/main.cpp
|
||||
${TENSOREXPR_TEST_ROOT}/padded_buffer.cpp
|
||||
${TENSOREXPR_TEST_SRCS})
|
||||
${TENSOREXPR_TEST_ROOT}/padded_buffer.cpp ${TENSOREXPR_TEST_SRCS})
|
||||
|
||||
target_link_libraries(test_tensorexpr PRIVATE torch gtest_main)
|
||||
target_include_directories(test_tensorexpr PRIVATE ${ATen_CPU_INCLUDE})
|
||||
@ -57,27 +56,36 @@ if(USE_CUDA)
|
||||
target_compile_definitions(test_tensorexpr PRIVATE USE_CUDA)
|
||||
target_compile_definitions(tutorial_tensorexpr PRIVATE USE_CUDA)
|
||||
elseif(USE_ROCM)
|
||||
target_link_libraries(test_tensorexpr PRIVATE
|
||||
hiprtc::hiprtc
|
||||
hip::amdhip64
|
||||
${TORCH_CUDA_LIBRARIES})
|
||||
target_link_libraries(test_tensorexpr PRIVATE hiprtc::hiprtc hip::amdhip64
|
||||
${TORCH_CUDA_LIBRARIES})
|
||||
target_compile_definitions(test_tensorexpr PRIVATE USE_ROCM)
|
||||
|
||||
target_link_libraries(tutorial_tensorexpr PRIVATE
|
||||
hiprtc::hiprtc
|
||||
hip::amdhip64
|
||||
${TORCH_CUDA_LIBRARIES})
|
||||
target_link_libraries(
|
||||
tutorial_tensorexpr PRIVATE hiprtc::hiprtc hip::amdhip64
|
||||
${TORCH_CUDA_LIBRARIES})
|
||||
target_compile_definitions(tutorial_tensorexpr PRIVATE USE_ROCM)
|
||||
endif()
|
||||
|
||||
if(INSTALL_TEST)
|
||||
set_target_properties(test_tensorexpr PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
|
||||
set_target_properties(
|
||||
test_tensorexpr
|
||||
PROPERTIES INSTALL_RPATH
|
||||
"${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
|
||||
install(TARGETS test_tensorexpr DESTINATION bin)
|
||||
set_target_properties(tutorial_tensorexpr PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
|
||||
set_target_properties(
|
||||
tutorial_tensorexpr
|
||||
PROPERTIES INSTALL_RPATH
|
||||
"${CMAKE_INSTALL_RPATH}:${_rpath_portable_origin}/../lib")
|
||||
install(TARGETS tutorial_tensorexpr DESTINATION bin)
|
||||
# Install PDB files for MSVC builds
|
||||
if(MSVC AND BUILD_SHARED_LIBS)
|
||||
install(FILES $<TARGET_PDB_FILE:test_tensorexpr> DESTINATION bin OPTIONAL)
|
||||
install(FILES $<TARGET_PDB_FILE:tutorial_tensorexpr> DESTINATION bin OPTIONAL)
|
||||
install(
|
||||
FILES $<TARGET_PDB_FILE:test_tensorexpr>
|
||||
DESTINATION bin
|
||||
OPTIONAL)
|
||||
install(
|
||||
FILES $<TARGET_PDB_FILE:tutorial_tensorexpr>
|
||||
DESTINATION bin
|
||||
OPTIONAL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -1,8 +1,6 @@
|
||||
set(LIBRARY_NAME torch_openreg)
|
||||
|
||||
file(GLOB_RECURSE SOURCE_FILES
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/*.cpp"
|
||||
)
|
||||
file(GLOB_RECURSE SOURCE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp")
|
||||
|
||||
add_library(${LIBRARY_NAME} SHARED ${SOURCE_FILES})
|
||||
|
||||
|
||||
@ -1,8 +1,6 @@
|
||||
set(LIBRARY_NAME openreg)
|
||||
|
||||
file(GLOB_RECURSE SOURCE_FILES
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/*.cpp"
|
||||
)
|
||||
file(GLOB_RECURSE SOURCE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp")
|
||||
|
||||
add_library(${LIBRARY_NAME} SHARED ${SOURCE_FILES})
|
||||
|
||||
|
||||
@ -1,8 +1,6 @@
|
||||
set(LIBRARY_NAME torch_bindings)
|
||||
|
||||
file(GLOB_RECURSE SOURCE_FILES
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/*.cpp"
|
||||
)
|
||||
file(GLOB_RECURSE SOURCE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp")
|
||||
|
||||
add_library(${LIBRARY_NAME} SHARED ${SOURCE_FILES})
|
||||
|
||||
|
||||
@ -3,8 +3,8 @@ cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
|
||||
project(custom_backend)
|
||||
|
||||
if(USE_ROCM)
|
||||
include(utils)
|
||||
include(LoadHIP)
|
||||
include(utils)
|
||||
include(LoadHIP)
|
||||
endif()
|
||||
find_package(Torch REQUIRED)
|
||||
|
||||
|
||||
@ -3,8 +3,8 @@ cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
|
||||
project(custom_ops)
|
||||
|
||||
if(USE_ROCM)
|
||||
include(utils)
|
||||
include(LoadHIP)
|
||||
include(utils)
|
||||
include(LoadHIP)
|
||||
endif()
|
||||
find_package(Torch REQUIRED)
|
||||
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
# Build separate libraries the define custom classes/operators used from our Python tests.
|
||||
# These are intended to be used with torch.ops.load_library() in our Python test suite.
|
||||
# Build separate libraries the define custom classes/operators used from our
|
||||
# Python tests. These are intended to be used with torch.ops.load_library() in
|
||||
# our Python test suite.
|
||||
add_library(aoti_custom_ops SHARED custom_ops.cpp)
|
||||
target_link_libraries(aoti_custom_ops torch)
|
||||
|
||||
|
||||
@ -5,9 +5,8 @@ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/test/lib)
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/test/lib)
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/test/bin)
|
||||
|
||||
# TODO(voz): Fix hack below
|
||||
# Start hack
|
||||
list(APPEND policies_new CMP0079)
|
||||
# TODO(voz): Fix hack below Start hack
|
||||
list(APPEND policies_new CMP0079)
|
||||
|
||||
foreach(policy ${policies_new})
|
||||
if(POLICY ${policy})
|
||||
@ -16,32 +15,37 @@ foreach(policy ${policies_new})
|
||||
endforeach()
|
||||
# End hack
|
||||
|
||||
################################
|
||||
# ##############################################################################
|
||||
# GTest
|
||||
################################
|
||||
# ##############################################################################
|
||||
project(googletest-git NONE)
|
||||
|
||||
include(FetchContent)
|
||||
FetchContent_Declare(
|
||||
googletest
|
||||
GIT_REPOSITORY https://github.com/google/googletest.git
|
||||
GIT_TAG release-1.12.1
|
||||
)
|
||||
GIT_TAG release-1.12.1)
|
||||
|
||||
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
|
||||
set(BUILD_GMOCK OFF CACHE BOOL "" FORCE)
|
||||
set(BUILD_GTEST ON CACHE BOOL "" FORCE)
|
||||
set(gtest_force_shared_crt
|
||||
ON
|
||||
CACHE BOOL "" FORCE)
|
||||
set(BUILD_GMOCK
|
||||
OFF
|
||||
CACHE BOOL "" FORCE)
|
||||
set(BUILD_GTEST
|
||||
ON
|
||||
CACHE BOOL "" FORCE)
|
||||
|
||||
FetchContent_MakeAvailable(googletest)
|
||||
|
||||
|
||||
|
||||
################################
|
||||
# ##############################################################################
|
||||
# Tests
|
||||
################################
|
||||
# ##############################################################################
|
||||
|
||||
# TODO(voz): This is a little assumptive of just this one test, rewrite with real dir includes
|
||||
# TODO(voz): This is a little assumptive of just this one test, rewrite with
|
||||
# real dir includes
|
||||
include_directories(${ATEN_INCLUDE})
|
||||
add_executable(test_cpp_prefix test_cpp_prefix.cpp ../../torchinductor/codegen/cpp_prefix.h)
|
||||
add_executable(test_cpp_prefix test_cpp_prefix.cpp
|
||||
../../torchinductor/codegen/cpp_prefix.h)
|
||||
target_link_libraries(test_cpp_prefix gtest_main)
|
||||
add_test(NAME test_cpp_prefix COMMAND test_cpp_prefix)
|
||||
|
||||
@ -3,8 +3,8 @@ cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
|
||||
project(jit_hooks)
|
||||
|
||||
if(USE_ROCM)
|
||||
include(utils)
|
||||
include(LoadHIP)
|
||||
include(utils)
|
||||
include(LoadHIP)
|
||||
endif()
|
||||
find_package(Torch REQUIRED)
|
||||
|
||||
|
||||
@ -2,7 +2,10 @@ cmake_minimum_required(VERSION 3.15)
|
||||
|
||||
project(custom_build_project)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.")
|
||||
set(CMAKE_CXX_STANDARD
|
||||
17
|
||||
CACHE STRING
|
||||
"The C++ standard whose features are requested to build this target.")
|
||||
|
||||
# Find torch library
|
||||
find_package(Torch REQUIRED)
|
||||
@ -13,12 +16,12 @@ target_include_directories(Predictor PUBLIC ${TORCH_INCLUDE_DIRS})
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
target_link_libraries(Predictor
|
||||
target_link_libraries(
|
||||
Predictor
|
||||
-Wl,-s
|
||||
-Wl,--gc-sections
|
||||
-Wl,--whole-archive
|
||||
${TORCH_LIBRARIES}
|
||||
-Wl,--no-whole-archive
|
||||
Threads::Threads
|
||||
${CMAKE_DL_LIBS}
|
||||
)
|
||||
${CMAKE_DL_LIBS})
|
||||
|
||||
@ -3,10 +3,8 @@ cmake_minimum_required(VERSION 3.15)
|
||||
set(TORCH_ROOT ${CMAKE_CURRENT_LIST_DIR}/../../..)
|
||||
set(TEST_ROOT ${TORCH_ROOT}/test/mobile/lightweight_dispatch)
|
||||
|
||||
add_executable(test_codegen_unboxing
|
||||
${TEST_ROOT}/test_lightweight_dispatch.cpp
|
||||
${TEST_ROOT}/test_codegen_unboxing.cpp
|
||||
)
|
||||
add_executable(test_codegen_unboxing ${TEST_ROOT}/test_lightweight_dispatch.cpp
|
||||
${TEST_ROOT}/test_codegen_unboxing.cpp)
|
||||
|
||||
target_include_directories(test_codegen_unboxing PRIVATE ${ATen_CPU_INCLUDE})
|
||||
|
||||
@ -14,9 +12,8 @@ target_compile_definitions(test_codegen_unboxing PRIVATE USE_GTEST)
|
||||
|
||||
set(TEST_UNBOXING_DEPENDENCIES torch gtest_main)
|
||||
|
||||
target_link_libraries(test_codegen_unboxing PRIVATE
|
||||
${TEST_UNBOXING_DEPENDENCIES}
|
||||
)
|
||||
target_link_libraries(test_codegen_unboxing
|
||||
PRIVATE ${TEST_UNBOXING_DEPENDENCIES})
|
||||
|
||||
if(INSTALL_TEST)
|
||||
install(TARGETS test_codegen_unboxing DESTINATION bin)
|
||||
|
||||
@ -1,23 +1,20 @@
|
||||
set(MOBILE_NNC_TEST_ROOT ${TORCH_ROOT}/test/mobile/nnc)
|
||||
|
||||
set(MOBILE_NNC_TEST_SRCS
|
||||
${MOBILE_NNC_TEST_ROOT}/test_context.cpp
|
||||
${MOBILE_NNC_TEST_ROOT}/test_nnc_backend.cpp
|
||||
${MOBILE_NNC_TEST_ROOT}/test_registry.cpp
|
||||
)
|
||||
${MOBILE_NNC_TEST_ROOT}/test_context.cpp
|
||||
${MOBILE_NNC_TEST_ROOT}/test_nnc_backend.cpp
|
||||
${MOBILE_NNC_TEST_ROOT}/test_registry.cpp)
|
||||
|
||||
add_executable(test_mobile_nnc
|
||||
${TORCH_ROOT}/test/cpp/lite_interpreter_runtime/main.cpp
|
||||
${MOBILE_NNC_TEST_SRCS}
|
||||
)
|
||||
add_executable(
|
||||
test_mobile_nnc ${TORCH_ROOT}/test/cpp/lite_interpreter_runtime/main.cpp
|
||||
${MOBILE_NNC_TEST_SRCS})
|
||||
|
||||
target_link_libraries(test_mobile_nnc PRIVATE torch gtest_main)
|
||||
target_include_directories(test_mobile_nnc PRIVATE ${ATen_CPU_INCLUDE})
|
||||
target_compile_definitions(test_mobile_nnc PRIVATE USE_GTEST)
|
||||
|
||||
add_executable(aot_model_compiler_test
|
||||
${TORCH_ROOT}/binaries/aot_model_compiler.cc
|
||||
)
|
||||
${TORCH_ROOT}/binaries/aot_model_compiler.cc)
|
||||
|
||||
target_link_libraries(aot_model_compiler_test PRIVATE torch)
|
||||
target_include_directories(aot_model_compiler_test PRIVATE ${ATen_CPU_INCLUDE})
|
||||
@ -27,6 +24,9 @@ if(INSTALL_TEST)
|
||||
install(TARGETS aot_model_compiler_test DESTINATION bin)
|
||||
# Install PDB files for MSVC builds
|
||||
if(MSVC AND BUILD_SHARED_LIBS)
|
||||
install(FILES $<TARGET_PDB_FILE:test_mobile_nnc> DESTINATION bin OPTIONAL)
|
||||
install(
|
||||
FILES $<TARGET_PDB_FILE:test_mobile_nnc>
|
||||
DESTINATION bin
|
||||
OPTIONAL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
# This file used to build libtorch.so.
|
||||
# Now it only builds the Torch python bindings.
|
||||
# This file used to build libtorch.so. Now it only builds the Torch python
|
||||
# bindings.
|
||||
|
||||
if(NOT CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO)
|
||||
cmake_minimum_required(VERSION 3.27 FATAL_ERROR)
|
||||
@ -21,9 +21,9 @@ if(NOT TORCH_INSTALL_LIB_DIR)
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
set(LIBSHM_SUBDIR libshm_windows)
|
||||
set(LIBSHM_SUBDIR libshm_windows)
|
||||
else()
|
||||
set(LIBSHM_SUBDIR libshm)
|
||||
set(LIBSHM_SUBDIR libshm)
|
||||
endif()
|
||||
|
||||
set(LIBSHM_SRCDIR ${TORCH_SRC_DIR}/lib/${LIBSHM_SUBDIR})
|
||||
@ -32,49 +32,38 @@ add_subdirectory(${LIBSHM_SRCDIR})
|
||||
# Generate files
|
||||
set(TOOLS_PATH "${TORCH_ROOT}/tools")
|
||||
|
||||
|
||||
set(TORCH_PYTHON_SRCS
|
||||
${GENERATED_THNN_CXX}
|
||||
${GENERATED_CXX_PYTHON}
|
||||
)
|
||||
set(TORCH_PYTHON_SRCS ${GENERATED_THNN_CXX} ${GENERATED_CXX_PYTHON})
|
||||
append_filelist("libtorch_python_core_sources" TORCH_PYTHON_SRCS)
|
||||
|
||||
# NB: This has to match the condition under which the JIT test directory
|
||||
# is included (at the time of writing that's in caffe2/CMakeLists.txt).
|
||||
# NB: This has to match the condition under which the JIT test directory is
|
||||
# included (at the time of writing that's in caffe2/CMakeLists.txt).
|
||||
if(BUILD_TEST)
|
||||
add_definitions(-DBUILDING_TESTS)
|
||||
list(APPEND TORCH_PYTHON_SRCS
|
||||
${TORCH_ROOT}/test/cpp/jit/torch_python_test.cpp
|
||||
)
|
||||
add_definitions(-DBUILDING_TESTS)
|
||||
list(APPEND TORCH_PYTHON_SRCS
|
||||
${TORCH_ROOT}/test/cpp/jit/torch_python_test.cpp)
|
||||
endif()
|
||||
|
||||
set(TORCH_PYTHON_INCLUDE_DIRECTORIES
|
||||
${PYTHON_INCLUDE_DIR}
|
||||
|
||||
${TORCH_ROOT}
|
||||
${TORCH_ROOT}/aten/src
|
||||
${TORCH_ROOT}/aten/src/TH
|
||||
|
||||
${CMAKE_BINARY_DIR}
|
||||
${CMAKE_BINARY_DIR}/aten/src
|
||||
${CMAKE_BINARY_DIR}/caffe2/aten/src
|
||||
${CMAKE_BINARY_DIR}/third_party
|
||||
${CMAKE_BINARY_DIR}/third_party/onnx
|
||||
|
||||
${TORCH_ROOT}/third_party/valgrind-headers
|
||||
|
||||
${TORCH_ROOT}/third_party/gloo
|
||||
${TORCH_ROOT}/third_party/onnx
|
||||
${TORCH_ROOT}/third_party/flatbuffers/include
|
||||
${TORCH_ROOT}/third_party/kineto/libkineto/include
|
||||
${TORCH_ROOT}/third_party/cpp-httplib
|
||||
${TORCH_ROOT}/third_party/nlohmann/include
|
||||
|
||||
${TORCH_SRC_DIR}/csrc
|
||||
${TORCH_SRC_DIR}/csrc/api/include
|
||||
${TORCH_SRC_DIR}/lib
|
||||
${TORCH_SRC_DIR}/standalone
|
||||
)
|
||||
${TORCH_SRC_DIR}/standalone)
|
||||
|
||||
list(APPEND TORCH_PYTHON_INCLUDE_DIRECTORIES ${LIBSHM_SRCDIR})
|
||||
|
||||
@ -106,231 +95,205 @@ set(TORCH_PYTHON_COMPILE_OPTIONS)
|
||||
set(TORCH_PYTHON_LINK_FLAGS "")
|
||||
|
||||
if(MSVC)
|
||||
string(APPEND TORCH_PYTHON_LINK_FLAGS " /NODEFAULTLIB:LIBCMT.LIB")
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES ${PYTHON_LIBRARIES} onnx_library)
|
||||
if(NOT CMAKE_BUILD_TYPE MATCHES "Release")
|
||||
string(APPEND TORCH_PYTHON_LINK_FLAGS " /DEBUG:FULL")
|
||||
endif()
|
||||
string(APPEND TORCH_PYTHON_LINK_FLAGS " /NODEFAULTLIB:LIBCMT.LIB")
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES ${PYTHON_LIBRARIES} onnx_library)
|
||||
if(NOT CMAKE_BUILD_TYPE MATCHES "Release")
|
||||
string(APPEND TORCH_PYTHON_LINK_FLAGS " /DEBUG:FULL")
|
||||
endif()
|
||||
elseif(APPLE)
|
||||
string(APPEND TORCH_PYTHON_LINK_FLAGS " -undefined dynamic_lookup")
|
||||
string(APPEND TORCH_PYTHON_LINK_FLAGS " -undefined dynamic_lookup")
|
||||
else()
|
||||
list(APPEND TORCH_PYTHON_COMPILE_OPTIONS
|
||||
-fno-strict-aliasing
|
||||
-Wno-strict-aliasing)
|
||||
list(APPEND TORCH_PYTHON_COMPILE_OPTIONS -fno-strict-aliasing
|
||||
-Wno-strict-aliasing)
|
||||
endif()
|
||||
|
||||
if(USE_ITT)
|
||||
list(APPEND TORCH_PYTHON_SRCS
|
||||
${TORCH_SRC_DIR}/csrc/itt.cpp
|
||||
)
|
||||
list(APPEND TORCH_PYTHON_SRCS ${TORCH_SRC_DIR}/csrc/itt.cpp)
|
||||
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_ITT)
|
||||
endif()
|
||||
|
||||
if(USE_CUDA)
|
||||
include(${TORCH_ROOT}/cmake/public/cuda.cmake)
|
||||
append_filelist("libtorch_python_cuda_core_sources" TORCH_PYTHON_SRCS)
|
||||
list(APPEND TORCH_PYTHON_SRCS ${GENERATED_THNN_CXX_CUDA})
|
||||
include(${TORCH_ROOT}/cmake/public/cuda.cmake)
|
||||
append_filelist("libtorch_python_cuda_core_sources" TORCH_PYTHON_SRCS)
|
||||
list(APPEND TORCH_PYTHON_SRCS ${GENERATED_THNN_CXX_CUDA})
|
||||
|
||||
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUDA)
|
||||
if(USE_CUDNN)
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::cudnn)
|
||||
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUDNN)
|
||||
endif()
|
||||
if(USE_CUSPARSELT)
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::cusparselt)
|
||||
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUSPARSELT)
|
||||
endif()
|
||||
if(USE_CUFILE)
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::cufile)
|
||||
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUFILE)
|
||||
endif()
|
||||
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUDA)
|
||||
if(USE_CUDNN)
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::cudnn)
|
||||
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUDNN)
|
||||
endif()
|
||||
if(USE_CUSPARSELT)
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::cusparselt)
|
||||
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUSPARSELT)
|
||||
endif()
|
||||
if(USE_CUFILE)
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::cufile)
|
||||
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUFILE)
|
||||
endif()
|
||||
|
||||
if(TARGET torch::nvtx3)
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::nvtx3)
|
||||
else()
|
||||
if(TARGET torch::nvtoolsext)
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::nvtoolsext)
|
||||
endif()
|
||||
if(TARGET torch::nvtx3)
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::nvtx3)
|
||||
else()
|
||||
if(TARGET torch::nvtoolsext)
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::nvtoolsext)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(USE_ROCM)
|
||||
append_filelist("libtorch_python_cuda_core_sources" TORCH_PYTHON_SRCS)
|
||||
list(APPEND TORCH_PYTHON_SRCS ${GENERATED_THNN_CXX_CUDA})
|
||||
append_filelist("libtorch_python_cuda_core_sources" TORCH_PYTHON_SRCS)
|
||||
list(APPEND TORCH_PYTHON_SRCS ${GENERATED_THNN_CXX_CUDA})
|
||||
|
||||
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS
|
||||
USE_ROCM
|
||||
__HIP_PLATFORM_AMD__
|
||||
)
|
||||
if(NOT WIN32)
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES ${ROCM_ROCTX_LIB})
|
||||
endif()
|
||||
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_ROCM __HIP_PLATFORM_AMD__)
|
||||
if(NOT WIN32)
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES ${ROCM_ROCTX_LIB})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(USE_XPU)
|
||||
include(${TORCH_ROOT}/cmake/public/xpu.cmake)
|
||||
append_filelist("libtorch_python_xpu_sources" TORCH_PYTHON_SRCS)
|
||||
include(${TORCH_ROOT}/cmake/public/xpu.cmake)
|
||||
append_filelist("libtorch_python_xpu_sources" TORCH_PYTHON_SRCS)
|
||||
|
||||
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_XPU)
|
||||
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_XPU)
|
||||
endif()
|
||||
|
||||
if(USE_CUDNN OR USE_ROCM)
|
||||
list(APPEND TORCH_PYTHON_SRCS
|
||||
${TORCH_SRC_DIR}/csrc/cuda/shared/cudnn.cpp
|
||||
)
|
||||
if(USE_STATIC_CUDNN)
|
||||
set_source_files_properties(
|
||||
${TORCH_SRC_DIR}/csrc/cuda/shared/cudnn.cpp
|
||||
PROPERTIES COMPILE_DEFINITIONS "USE_STATIC_CUDNN"
|
||||
)
|
||||
endif()
|
||||
list(APPEND TORCH_PYTHON_SRCS ${TORCH_SRC_DIR}/csrc/cuda/shared/cudnn.cpp)
|
||||
if(USE_STATIC_CUDNN)
|
||||
set_source_files_properties(
|
||||
${TORCH_SRC_DIR}/csrc/cuda/shared/cudnn.cpp PROPERTIES COMPILE_DEFINITIONS
|
||||
"USE_STATIC_CUDNN")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(USE_CUSPARSELT)
|
||||
list(APPEND TORCH_PYTHON_SRCS ${TORCH_SRC_DIR}/csrc/cuda/shared/cusparselt.cpp)
|
||||
list(APPEND TORCH_PYTHON_SRCS
|
||||
${TORCH_SRC_DIR}/csrc/cuda/shared/cusparselt.cpp)
|
||||
endif()
|
||||
|
||||
if(USE_MPS)
|
||||
list(APPEND TORCH_PYTHON_SRCS ${MPS_PYTHON_SRCS})
|
||||
list(APPEND TORCH_PYTHON_SRCS ${MPS_PYTHON_SRCS})
|
||||
endif()
|
||||
|
||||
if(USE_VALGRIND AND NOT WIN32)
|
||||
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_VALGRIND)
|
||||
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_VALGRIND)
|
||||
endif()
|
||||
|
||||
# In the most recent CMake versions, a new 'TRANSFORM' subcommand of 'list' allows much of the boilerplate of defining the lists
|
||||
# of type stub files to be omitted.
|
||||
# For compatibility with older CMake versions, we omit it for now, but leave it as a comment in case compatibility with the older
|
||||
# CMake versions is eventually dropped.
|
||||
# set(Modules
|
||||
# __init__
|
||||
# activation
|
||||
# adaptive
|
||||
# batchnorm
|
||||
# container
|
||||
# conv
|
||||
# distance
|
||||
# dropout
|
||||
# fold
|
||||
# instancenorm
|
||||
# linear
|
||||
# loss
|
||||
# module
|
||||
# normalization
|
||||
# padding
|
||||
# pixelshuffle
|
||||
# pooling
|
||||
# rnn
|
||||
# sparse
|
||||
# upsampling
|
||||
# )
|
||||
# In the most recent CMake versions, a new 'TRANSFORM' subcommand of 'list'
|
||||
# allows much of the boilerplate of defining the lists of type stub files to be
|
||||
# omitted. For compatibility with older CMake versions, we omit it for now, but
|
||||
# leave it as a comment in case compatibility with the older CMake versions is
|
||||
# eventually dropped. set(Modules __init__ activation adaptive batchnorm
|
||||
# container conv distance dropout fold instancenorm linear loss module
|
||||
# normalization padding pixelshuffle pooling rnn sparse upsampling )
|
||||
# list(TRANSFORM Modules PREPEND "${TORCH_SRC_DIR}/nn/modules/")
|
||||
add_custom_target(torch_python_stubs DEPENDS
|
||||
"${TORCH_SRC_DIR}/_C/__init__.pyi"
|
||||
"${TORCH_SRC_DIR}/_C/_VariableFunctions.pyi"
|
||||
"${TORCH_SRC_DIR}/nn/functional.pyi"
|
||||
"${TORCH_SRC_DIR}/utils/data/datapipes/datapipe.pyi"
|
||||
)
|
||||
add_custom_target(
|
||||
torch_python_stubs
|
||||
DEPENDS "${TORCH_SRC_DIR}/_C/__init__.pyi"
|
||||
"${TORCH_SRC_DIR}/_C/_VariableFunctions.pyi"
|
||||
"${TORCH_SRC_DIR}/nn/functional.pyi"
|
||||
"${TORCH_SRC_DIR}/utils/data/datapipes/datapipe.pyi")
|
||||
|
||||
file(GLOB_RECURSE torchgen_python "${PROJECT_SOURCE_DIR}/torchgen/*.py")
|
||||
file(GLOB_RECURSE autograd_python "${TOOLS_PATH}/autograd/*.py")
|
||||
file(GLOB_RECURSE pyi_python "${TOOLS_PATH}/pyi/*.py")
|
||||
add_custom_command(
|
||||
OUTPUT
|
||||
"${TORCH_SRC_DIR}/_C/__init__.pyi"
|
||||
"${TORCH_SRC_DIR}/_C/_VariableFunctions.pyi"
|
||||
"${TORCH_SRC_DIR}/nn/functional.pyi"
|
||||
COMMAND
|
||||
"${Python_EXECUTABLE}" -mtools.pyi.gen_pyi
|
||||
--native-functions-path "aten/src/ATen/native/native_functions.yaml"
|
||||
--tags-path "aten/src/ATen/native/tags.yaml"
|
||||
--deprecated-functions-path "tools/autograd/deprecated.yaml"
|
||||
DEPENDS
|
||||
"${TORCH_SRC_DIR}/_C/__init__.pyi.in"
|
||||
"${TORCH_SRC_DIR}/_C/_VariableFunctions.pyi.in"
|
||||
"${TORCH_SRC_DIR}/nn/functional.pyi.in"
|
||||
"${TORCH_ROOT}/aten/src/ATen/native/native_functions.yaml"
|
||||
"${TORCH_ROOT}/aten/src/ATen/native/tags.yaml"
|
||||
"${TORCH_ROOT}/tools/autograd/deprecated.yaml"
|
||||
"${TORCH_ROOT}/torch/_torch_docs.py"
|
||||
"${TORCH_ROOT}/torch/_tensor_docs.py"
|
||||
${pyi_python}
|
||||
${autograd_python}
|
||||
${torchgen_python}
|
||||
WORKING_DIRECTORY
|
||||
"${TORCH_ROOT}"
|
||||
)
|
||||
OUTPUT "${TORCH_SRC_DIR}/_C/__init__.pyi"
|
||||
"${TORCH_SRC_DIR}/_C/_VariableFunctions.pyi"
|
||||
"${TORCH_SRC_DIR}/nn/functional.pyi"
|
||||
COMMAND
|
||||
"${Python_EXECUTABLE}" -mtools.pyi.gen_pyi --native-functions-path
|
||||
"aten/src/ATen/native/native_functions.yaml" --tags-path
|
||||
"aten/src/ATen/native/tags.yaml" --deprecated-functions-path
|
||||
"tools/autograd/deprecated.yaml"
|
||||
DEPENDS "${TORCH_SRC_DIR}/_C/__init__.pyi.in"
|
||||
"${TORCH_SRC_DIR}/_C/_VariableFunctions.pyi.in"
|
||||
"${TORCH_SRC_DIR}/nn/functional.pyi.in"
|
||||
"${TORCH_ROOT}/aten/src/ATen/native/native_functions.yaml"
|
||||
"${TORCH_ROOT}/aten/src/ATen/native/tags.yaml"
|
||||
"${TORCH_ROOT}/tools/autograd/deprecated.yaml"
|
||||
"${TORCH_ROOT}/torch/_torch_docs.py"
|
||||
"${TORCH_ROOT}/torch/_tensor_docs.py"
|
||||
${pyi_python}
|
||||
${autograd_python}
|
||||
${torchgen_python}
|
||||
WORKING_DIRECTORY "${TORCH_ROOT}")
|
||||
file(GLOB_RECURSE datapipe_files "${TORCH_SRC_DIR}/utils/data/datapipes/*.py")
|
||||
add_custom_command(
|
||||
OUTPUT
|
||||
"${TORCH_SRC_DIR}/utils/data/datapipes/datapipe.pyi"
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -E env PYTHONPATH="${TORCH_ROOT}"
|
||||
"${Python_EXECUTABLE}" ${TORCH_SRC_DIR}/utils/data/datapipes/gen_pyi.py
|
||||
DEPENDS
|
||||
"${TORCH_SRC_DIR}/utils/data/datapipes/datapipe.pyi.in"
|
||||
${datapipe_files}
|
||||
WORKING_DIRECTORY
|
||||
"${TORCH_ROOT}"
|
||||
)
|
||||
OUTPUT "${TORCH_SRC_DIR}/utils/data/datapipes/datapipe.pyi"
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -E env PYTHONPATH="${TORCH_ROOT}" "${Python_EXECUTABLE}"
|
||||
${TORCH_SRC_DIR}/utils/data/datapipes/gen_pyi.py
|
||||
DEPENDS "${TORCH_SRC_DIR}/utils/data/datapipes/datapipe.pyi.in"
|
||||
${datapipe_files}
|
||||
WORKING_DIRECTORY "${TORCH_ROOT}")
|
||||
if(USE_DISTRIBUTED)
|
||||
if(WIN32)
|
||||
append_filelist("libtorch_python_distributed_core_sources" TORCH_PYTHON_SRCS)
|
||||
else()
|
||||
append_filelist("libtorch_python_distributed_sources" TORCH_PYTHON_SRCS)
|
||||
endif()
|
||||
# Disable certain warnings for GCC-9.X
|
||||
if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/autograd/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type")
|
||||
set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/rpc/testing/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type")
|
||||
set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/c10d/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type")
|
||||
endif()
|
||||
# NCCL is a private dependency of libtorch, but libtorch_python includes
|
||||
# some private headers of libtorch, which in turn include NCCL. As a hacky
|
||||
# alternative to making NCCL a public dependency of libtorch, we make it
|
||||
# a private dependency of libtorch_python as well.
|
||||
if(USE_NCCL)
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES __caffe2_nccl)
|
||||
endif()
|
||||
# Same for MPI.
|
||||
if(USE_MPI)
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES MPI::MPI_CXX)
|
||||
endif()
|
||||
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_C10D)
|
||||
if(WIN32)
|
||||
append_filelist("libtorch_python_distributed_core_sources"
|
||||
TORCH_PYTHON_SRCS)
|
||||
else()
|
||||
append_filelist("libtorch_python_distributed_sources" TORCH_PYTHON_SRCS)
|
||||
endif()
|
||||
# Disable certain warnings for GCC-9.X
|
||||
if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
set_source_files_properties(
|
||||
${TORCH_SRC_DIR}/csrc/distributed/autograd/init.cpp
|
||||
PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type")
|
||||
set_source_files_properties(
|
||||
${TORCH_SRC_DIR}/csrc/distributed/rpc/testing/init.cpp
|
||||
PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type")
|
||||
set_source_files_properties(
|
||||
${TORCH_SRC_DIR}/csrc/distributed/c10d/init.cpp
|
||||
PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type")
|
||||
endif()
|
||||
# NCCL is a private dependency of libtorch, but libtorch_python includes some
|
||||
# private headers of libtorch, which in turn include NCCL. As a hacky
|
||||
# alternative to making NCCL a public dependency of libtorch, we make it a
|
||||
# private dependency of libtorch_python as well.
|
||||
if(USE_NCCL)
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES __caffe2_nccl)
|
||||
endif()
|
||||
# Same for MPI.
|
||||
if(USE_MPI)
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES MPI::MPI_CXX)
|
||||
endif()
|
||||
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_C10D)
|
||||
|
||||
endif()
|
||||
|
||||
if(USE_NCCL AND NOT WIN32)
|
||||
list(APPEND TORCH_PYTHON_SRCS
|
||||
${TORCH_SRC_DIR}/csrc/cuda/python_nccl.cpp)
|
||||
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_NCCL)
|
||||
list(APPEND TORCH_PYTHON_SRCS ${TORCH_SRC_DIR}/csrc/cuda/python_nccl.cpp)
|
||||
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_NCCL)
|
||||
endif()
|
||||
|
||||
if(NOT MSVC)
|
||||
# cudaProfilerInitialize must go away
|
||||
set_source_files_properties(${TORCH_SRC_DIR}/csrc/cuda/shared/cudart.cpp PROPERTIES COMPILE_FLAGS "-Wno-deprecated-declarations")
|
||||
set_source_files_properties(
|
||||
${TORCH_SRC_DIR}/csrc/cuda/shared/cudart.cpp
|
||||
PROPERTIES COMPILE_FLAGS "-Wno-deprecated-declarations")
|
||||
endif()
|
||||
|
||||
# coreml
|
||||
if(USE_COREML_DELEGATE)
|
||||
list(APPEND TORCH_PYTHON_SRCS ${TORCH_SRC_DIR}/csrc/jit/backends/coreml/cpp/backend.cpp)
|
||||
list(APPEND TORCH_PYTHON_SRCS ${TORCH_SRC_DIR}/csrc/jit/backends/coreml/cpp/preprocess.cpp)
|
||||
list(APPEND TORCH_PYTHON_SRCS
|
||||
${TORCH_SRC_DIR}/csrc/jit/backends/coreml/cpp/backend.cpp)
|
||||
list(APPEND TORCH_PYTHON_SRCS
|
||||
${TORCH_SRC_DIR}/csrc/jit/backends/coreml/cpp/preprocess.cpp)
|
||||
endif()
|
||||
|
||||
|
||||
add_library(torch_python SHARED ${TORCH_PYTHON_SRCS})
|
||||
torch_compile_options(torch_python) # see cmake/public/utils.cmake
|
||||
torch_compile_options(torch_python) # see cmake/public/utils.cmake
|
||||
if(APPLE)
|
||||
target_compile_options(torch_python PRIVATE
|
||||
$<$<COMPILE_LANGUAGE:CXX>: -fvisibility=default>)
|
||||
target_compile_options(torch_python PRIVATE $<$<COMPILE_LANGUAGE:CXX>:
|
||||
-fvisibility=default>)
|
||||
endif()
|
||||
|
||||
if(CAFFE2_USE_MKL AND BUILD_LIBTORCHLESS)
|
||||
|
||||
# Use the RPATH of the linked libraries
|
||||
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
|
||||
# we need to explicitly link caffe2::mkl in order to have the
|
||||
# correct RPATH in torch_python for the split build
|
||||
# we need to explicitly link caffe2::mkl in order to have the correct RPATH in
|
||||
# torch_python for the split build
|
||||
target_link_libraries(torch_python PRIVATE caffe2::mkl)
|
||||
endif()
|
||||
|
||||
@ -346,8 +309,8 @@ if(USE_CUFILE AND NOT USE_ROCM)
|
||||
endif()
|
||||
|
||||
if(HAVE_SOVERSION)
|
||||
set_target_properties(torch_python PROPERTIES
|
||||
VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION})
|
||||
set_target_properties(torch_python PROPERTIES VERSION ${TORCH_VERSION}
|
||||
SOVERSION ${TORCH_SOVERSION})
|
||||
endif()
|
||||
|
||||
# in case of the split build we need to add compile definitions
|
||||
@ -386,10 +349,15 @@ if(BUILD_LIBTORCHLESS)
|
||||
target_compile_definitions(torch_python PRIVATE USE_TENSORPIPE)
|
||||
endif()
|
||||
|
||||
set(EXPERIMENTAL_SINGLE_THREAD_POOL "0" CACHE STRING
|
||||
"Experimental option to use a single thread pool for inter- and intra-op parallelism")
|
||||
set(EXPERIMENTAL_SINGLE_THREAD_POOL
|
||||
"0"
|
||||
CACHE
|
||||
STRING
|
||||
"Experimental option to use a single thread pool for inter- and intra-op parallelism"
|
||||
)
|
||||
if("${EXPERIMENTAL_SINGLE_THREAD_POOL}")
|
||||
target_compile_definitions(torch_python PRIVATE "-DAT_EXPERIMENTAL_SINGLE_THREAD_POOL=1")
|
||||
target_compile_definitions(torch_python
|
||||
PRIVATE "-DAT_EXPERIMENTAL_SINGLE_THREAD_POOL=1")
|
||||
endif()
|
||||
|
||||
endif()
|
||||
@ -397,54 +365,64 @@ endif()
|
||||
add_dependencies(torch_python torch_python_stubs)
|
||||
add_dependencies(torch_python flatbuffers)
|
||||
|
||||
|
||||
if(USE_PRECOMPILED_HEADERS)
|
||||
target_precompile_headers(torch_python PRIVATE
|
||||
"$<$<COMPILE_LANGUAGE:CXX>:ATen/ATen.h>")
|
||||
"$<$<COMPILE_LANGUAGE:CXX>:ATen/ATen.h>")
|
||||
endif()
|
||||
|
||||
# Required workaround for generated sources
|
||||
# See https://samthursfield.wordpress.com/2015/11/21/cmake-dependencies-between-targets-and-files-and-custom-commands/#custom-commands-in-different-directories
|
||||
# Required workaround for generated sources See
|
||||
# https://samthursfield.wordpress.com/2015/11/21/cmake-dependencies-between-targets-and-files-and-custom-commands/#custom-commands-in-different-directories
|
||||
add_dependencies(torch_python generate-torch-sources)
|
||||
set_source_files_properties(
|
||||
${GENERATED_THNN_SOURCES}
|
||||
${GENERATED_CXX_PYTHON}
|
||||
PROPERTIES GENERATED TRUE
|
||||
)
|
||||
set_source_files_properties(${GENERATED_THNN_SOURCES} ${GENERATED_CXX_PYTHON}
|
||||
PROPERTIES GENERATED TRUE)
|
||||
|
||||
# Disable certain warnings for GCC-9.X
|
||||
if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
set_source_files_properties(${TORCH_SRC_DIR}/csrc/Module.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type")
|
||||
set_source_files_properties(${TORCH_SRC_DIR}/csrc/autograd/python_variable.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type")
|
||||
set_source_files_properties(
|
||||
${TORCH_SRC_DIR}/csrc/Module.cpp PROPERTIES COMPILE_FLAGS
|
||||
"-Wno-cast-function-type")
|
||||
set_source_files_properties(
|
||||
${TORCH_SRC_DIR}/csrc/autograd/python_variable.cpp
|
||||
PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type")
|
||||
endif()
|
||||
|
||||
# Preserve CUDA_GENCODE flags
|
||||
if(USE_CUDA)
|
||||
torch_cuda_get_nvcc_gencode_flag(_ARCH_FLAGS)
|
||||
set_source_files_properties(${TORCH_SRC_DIR}/csrc/cuda/Module.cpp PROPERTIES COMPILE_FLAGS "-DCUDA_ARCH_FLAGS=\"${_ARCH_FLAGS_readable}\"")
|
||||
set_source_files_properties(
|
||||
${TORCH_SRC_DIR}/csrc/cuda/Module.cpp
|
||||
PROPERTIES COMPILE_FLAGS "-DCUDA_ARCH_FLAGS=\"${_ARCH_FLAGS_readable}\"")
|
||||
endif()
|
||||
|
||||
# Preserve HIP arch flags
|
||||
if(USE_ROCM)
|
||||
string(REPLACE ";" " " PYTORCH_ROCM_ARCH_readable "${PYTORCH_ROCM_ARCH}")
|
||||
set_source_files_properties(${TORCH_SRC_DIR}/csrc/cuda/Module.cpp PROPERTIES COMPILE_FLAGS "-DCUDA_ARCH_FLAGS=\"${PYTORCH_ROCM_ARCH_readable}\"")
|
||||
set_source_files_properties(
|
||||
${TORCH_SRC_DIR}/csrc/cuda/Module.cpp
|
||||
PROPERTIES COMPILE_FLAGS
|
||||
"-DCUDA_ARCH_FLAGS=\"${PYTORCH_ROCM_ARCH_readable}\"")
|
||||
endif()
|
||||
|
||||
# Preserve XPU arch flags
|
||||
if(USE_XPU)
|
||||
string(REPLACE "," " " _ARCH_FLAGS_readable "${TORCH_XPU_ARCH_LIST}")
|
||||
set_source_files_properties(${TORCH_SRC_DIR}/csrc/xpu/Module.cpp PROPERTIES COMPILE_FLAGS "-DXPU_ARCH_FLAGS=\"${_ARCH_FLAGS_readable}\"")
|
||||
set_source_files_properties(
|
||||
${TORCH_SRC_DIR}/csrc/xpu/Module.cpp
|
||||
PROPERTIES COMPILE_FLAGS "-DXPU_ARCH_FLAGS=\"${_ARCH_FLAGS_readable}\"")
|
||||
endif()
|
||||
|
||||
target_compile_definitions(torch_python PRIVATE "-DTHP_BUILD_MAIN_LIB")
|
||||
|
||||
target_link_libraries(torch_python PRIVATE ${TORCH_LIB} ${TORCH_PYTHON_LINK_LIBRARIES})
|
||||
target_link_libraries(torch_python PRIVATE ${TORCH_LIB}
|
||||
${TORCH_PYTHON_LINK_LIBRARIES})
|
||||
|
||||
target_compile_definitions(torch_python PRIVATE ${TORCH_PYTHON_COMPILE_DEFINITIONS})
|
||||
target_compile_definitions(torch_python
|
||||
PRIVATE ${TORCH_PYTHON_COMPILE_DEFINITIONS})
|
||||
|
||||
target_compile_options(torch_python PRIVATE ${TORCH_PYTHON_COMPILE_OPTIONS})
|
||||
|
||||
target_include_directories(torch_python PUBLIC ${TORCH_PYTHON_INCLUDE_DIRECTORIES})
|
||||
target_include_directories(torch_python
|
||||
PUBLIC ${TORCH_PYTHON_INCLUDE_DIRECTORIES})
|
||||
|
||||
if(USE_UCC)
|
||||
target_link_libraries(torch_python PRIVATE __caffe2_ucc)
|
||||
@ -459,13 +437,15 @@ if(BUILD_ONEDNN_GRAPH)
|
||||
endif()
|
||||
|
||||
if(${CMAKE_BUILD_TYPE} STREQUAL "RelWithAssert")
|
||||
# Workaround numerous decret-without-a-gil warnings from JIT
|
||||
# see https://github.com/pytorch/pytorch/issues/130073
|
||||
target_compile_definitions(torch_python PRIVATE "-DPYBIND11_NO_ASSERT_GIL_HELD_INCREF_DECREF")
|
||||
# Workaround numerous decret-without-a-gil warnings from JIT see
|
||||
# https://github.com/pytorch/pytorch/issues/130073
|
||||
target_compile_definitions(
|
||||
torch_python PRIVATE "-DPYBIND11_NO_ASSERT_GIL_HELD_INCREF_DECREF")
|
||||
endif()
|
||||
|
||||
if(NOT TORCH_PYTHON_LINK_FLAGS STREQUAL "")
|
||||
set_target_properties(torch_python PROPERTIES LINK_FLAGS ${TORCH_PYTHON_LINK_FLAGS})
|
||||
set_target_properties(torch_python PROPERTIES LINK_FLAGS
|
||||
${TORCH_PYTHON_LINK_FLAGS})
|
||||
endif()
|
||||
|
||||
install(TARGETS torch_python DESTINATION "${TORCH_INSTALL_LIB_DIR}")
|
||||
@ -485,32 +465,34 @@ endif()
|
||||
add_custom_target(
|
||||
gen_torch_version ALL
|
||||
"${Python_EXECUTABLE}" "${TOOLS_PATH}/generate_torch_version.py"
|
||||
--is-debug=${TORCH_VERSION_DEBUG}
|
||||
--cuda-version=${CUDA_VERSION}
|
||||
--hip-version=${HIP_VERSION}
|
||||
--xpu-version=${SYCL_COMPILER_VERSION}
|
||||
--is-debug=${TORCH_VERSION_DEBUG} --cuda-version=${CUDA_VERSION}
|
||||
--hip-version=${HIP_VERSION} --xpu-version=${SYCL_COMPILER_VERSION}
|
||||
BYPRODUCTS ${TORCH_SRC_DIR}/version.py
|
||||
COMMENT "Regenerating version file..."
|
||||
WORKING_DIRECTORY ${TORCH_ROOT}
|
||||
)
|
||||
WORKING_DIRECTORY ${TORCH_ROOT})
|
||||
add_dependencies(torch_python gen_torch_version)
|
||||
|
||||
# Skip building this library under MacOS, since it is currently failing to build on Mac
|
||||
# Github issue #61930
|
||||
# Skip building this library under MacOS, since it is currently failing to build
|
||||
# on Mac Github issue #61930
|
||||
if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||
# Add Android Nnapi delegate library
|
||||
add_library(nnapi_backend SHARED
|
||||
${TORCH_SRC_DIR}/csrc/jit/backends/nnapi/nnapi_backend_lib.cpp
|
||||
${TORCH_SRC_DIR}/csrc/jit/backends/nnapi/nnapi_backend_preprocess.cpp
|
||||
)
|
||||
add_library(
|
||||
nnapi_backend SHARED
|
||||
${TORCH_SRC_DIR}/csrc/jit/backends/nnapi/nnapi_backend_lib.cpp
|
||||
${TORCH_SRC_DIR}/csrc/jit/backends/nnapi/nnapi_backend_preprocess.cpp)
|
||||
# Pybind11 requires explicit linking of the torch_python library
|
||||
if(BUILD_LIBTORCHLESS)
|
||||
target_link_libraries(nnapi_backend PRIVATE ${TORCH_LIB})
|
||||
else()
|
||||
target_link_libraries(nnapi_backend PRIVATE torch)
|
||||
endif()
|
||||
target_link_libraries(nnapi_backend PRIVATE torch_python pybind::pybind11 fmt::fmt-header-only)
|
||||
target_link_libraries(nnapi_backend PRIVATE torch_python pybind::pybind11
|
||||
fmt::fmt-header-only)
|
||||
endif()
|
||||
|
||||
set(TORCH_PYTHON_COMPILE_OPTIONS ${TORCH_PYTHON_COMPILE_OPTIONS} PARENT_SCOPE)
|
||||
set(TORCH_PYTHON_LINK_FLAGS ${TORCH_PYTHON_LINK_FLAGS} PARENT_SCOPE)
|
||||
set(TORCH_PYTHON_COMPILE_OPTIONS
|
||||
${TORCH_PYTHON_COMPILE_OPTIONS}
|
||||
PARENT_SCOPE)
|
||||
set(TORCH_PYTHON_LINK_FLAGS
|
||||
${TORCH_PYTHON_LINK_FLAGS}
|
||||
PARENT_SCOPE)
|
||||
|
||||
@ -1,14 +1,10 @@
|
||||
set(
|
||||
MODEL_TRACER_DIR
|
||||
"${TORCH_ROOT}/torch/csrc/jit/mobile/model_tracer")
|
||||
set(MODEL_TRACER_DIR "${TORCH_ROOT}/torch/csrc/jit/mobile/model_tracer")
|
||||
|
||||
list(APPEND MODEL_TRACER_SOURCES "")
|
||||
|
||||
append_filelist("torch_mobile_tracer_sources" MODEL_TRACER_SOURCES)
|
||||
|
||||
add_executable(
|
||||
model_tracer
|
||||
${MODEL_TRACER_SOURCES})
|
||||
add_executable(model_tracer ${MODEL_TRACER_SOURCES})
|
||||
|
||||
target_link_libraries(model_tracer PRIVATE torch)
|
||||
|
||||
|
||||
@ -2,27 +2,28 @@ cmake_minimum_required(VERSION 3.27 FATAL_ERROR)
|
||||
|
||||
project(headeronly CXX)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard whose features are requested to build this target.")
|
||||
set(CMAKE_CXX_STANDARD
|
||||
17
|
||||
CACHE STRING
|
||||
"The C++ standard whose features are requested to build this target.")
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
# Main build file for torch/headeronly, except there's no build cuz this lib is header-only!
|
||||
# Main build file for torch/headeronly, except there's no build cuz this lib is
|
||||
# header-only!
|
||||
|
||||
# ---[ Configure macro file.
|
||||
set(C10_USE_GFLAGS ${USE_GFLAGS}) # used in cmake_macros.h.in
|
||||
set(C10_USE_GLOG ${USE_GLOG}) # used in cmake_macros.h.in
|
||||
set(C10_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # used in cmake_macros.h.in
|
||||
set(C10_USE_NUMA ${USE_NUMA}) # used in cmake_macros.h.in
|
||||
set(C10_USE_MSVC_STATIC_RUNTIME ${CAFFE2_USE_MSVC_STATIC_RUNTIME}) # used in cmake_macros.h.in
|
||||
set(C10_USE_ROCM_KERNEL_ASSERT ${USE_ROCM_KERNEL_ASSERT}) # used in cmake_macros.h.in
|
||||
configure_file(
|
||||
${CMAKE_CURRENT_LIST_DIR}/macros/cmake_macros.h.in
|
||||
${CMAKE_BINARY_DIR}/torch/headeronly/macros/cmake_macros.h)
|
||||
set(C10_USE_MSVC_STATIC_RUNTIME ${CAFFE2_USE_MSVC_STATIC_RUNTIME}
|
||||
)# used in cmake_macros.h.in
|
||||
set(C10_USE_ROCM_KERNEL_ASSERT ${USE_ROCM_KERNEL_ASSERT}) # used in
|
||||
# cmake_macros.h.in
|
||||
configure_file(${CMAKE_CURRENT_LIST_DIR}/macros/cmake_macros.h.in
|
||||
${CMAKE_BINARY_DIR}/torch/headeronly/macros/cmake_macros.h)
|
||||
|
||||
file(GLOB HEADERONLY_HEADERS
|
||||
*.h
|
||||
macros/*.h
|
||||
util/*.h
|
||||
)
|
||||
file(GLOB HEADERONLY_HEADERS *.h macros/*.h util/*.h)
|
||||
|
||||
add_library(headeronly INTERFACE ${HEADERONLY_HEADERS})
|
||||
|
||||
@ -31,5 +32,8 @@ install(FILES ${CMAKE_BINARY_DIR}/torch/headeronly/macros/cmake_macros.h
|
||||
|
||||
if(NOT BUILD_LIBTORCHLESS)
|
||||
# ---[ Installation copied from c10/CMakeLists.txt
|
||||
install(TARGETS headeronly EXPORT Caffe2Targets DESTINATION lib)
|
||||
install(
|
||||
TARGETS headeronly
|
||||
EXPORT Caffe2Targets
|
||||
DESTINATION lib)
|
||||
endif()
|
||||
|
||||
@ -4,24 +4,27 @@ cmake_minimum_required(VERSION 3.27 FATAL_ERROR)
|
||||
set(TORCH_ROOT ${CMAKE_CURRENT_LIST_DIR}/../../../)
|
||||
|
||||
if(NOT LIBSHM_INSTALL_LIB_SUBDIR)
|
||||
set(LIBSHM_INSTALL_LIB_SUBDIR "lib" CACHE PATH "libshm install library directory")
|
||||
set(LIBSHM_INSTALL_LIB_SUBDIR
|
||||
"lib"
|
||||
CACHE PATH "libshm install library directory")
|
||||
endif()
|
||||
|
||||
add_library(shm SHARED core.cpp)
|
||||
if(HAVE_SOVERSION)
|
||||
set_target_properties(shm PROPERTIES
|
||||
VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION})
|
||||
set_target_properties(shm PROPERTIES VERSION ${TORCH_VERSION}
|
||||
SOVERSION ${TORCH_SOVERSION})
|
||||
endif()
|
||||
|
||||
target_include_directories(shm PUBLIC
|
||||
${TORCH_ROOT}/torch/lib # provides "libshm/libshm.h"
|
||||
target_include_directories(
|
||||
shm PUBLIC ${TORCH_ROOT}/torch/lib # provides "libshm/libshm.h"
|
||||
)
|
||||
|
||||
### Torch packages supposes libraries prefix is "lib"
|
||||
set_target_properties(shm PROPERTIES
|
||||
PREFIX "lib"
|
||||
IMPORT_PREFIX "lib"
|
||||
CXX_STANDARD 17)
|
||||
# Torch packages supposes libraries prefix is "lib"
|
||||
set_target_properties(
|
||||
shm
|
||||
PROPERTIES PREFIX "lib"
|
||||
IMPORT_PREFIX "lib"
|
||||
CXX_STANDARD 17)
|
||||
target_link_libraries(shm PRIVATE ${TORCH_CPU_LIB})
|
||||
|
||||
if(UNIX AND NOT APPLE)
|
||||
@ -33,22 +36,20 @@ if(UNIX AND NOT APPLE)
|
||||
target_link_libraries(shm PUBLIC rt)
|
||||
else()
|
||||
message(STATUS "Checking if rt requires pthread")
|
||||
# Sometimes, rt won't be available unless you also link against
|
||||
# pthreads. In this case, the NEED_LIBRT test will fail, because
|
||||
# check_library_exists isn't going to build the C file with the
|
||||
# pthread file, and the build will fail, setting NEED_LIBRT to
|
||||
# false (this is TOTALLY BOGUS, this situation should be an error
|
||||
# situation, not a "oh, I guess rt is not supported", but it's
|
||||
# not too easy to distinguish between the two situations). So,
|
||||
# if it fails, we try again, but this time also with a dependency
|
||||
# on pthread. If it succeeds this time, we know we not only need
|
||||
# an rt dependency, but we also need pthread.
|
||||
# Sometimes, rt won't be available unless you also link against pthreads. In
|
||||
# this case, the NEED_LIBRT test will fail, because check_library_exists
|
||||
# isn't going to build the C file with the pthread file, and the build will
|
||||
# fail, setting NEED_LIBRT to false (this is TOTALLY BOGUS, this situation
|
||||
# should be an error situation, not a "oh, I guess rt is not supported", but
|
||||
# it's not too easy to distinguish between the two situations). So, if it
|
||||
# fails, we try again, but this time also with a dependency on pthread. If
|
||||
# it succeeds this time, we know we not only need an rt dependency, but we
|
||||
# also need pthread.
|
||||
#
|
||||
# BTW, this test looks for shm_open, because that's what we
|
||||
# really care about (not clock_gettime). I didn't change the
|
||||
# site above though in case there was a reason we were testing
|
||||
# against clock_gettime. In principle, the choice of symbol you
|
||||
# test for shouldn't matter.
|
||||
# BTW, this test looks for shm_open, because that's what we really care
|
||||
# about (not clock_gettime). I didn't change the site above though in case
|
||||
# there was a reason we were testing against clock_gettime. In principle,
|
||||
# the choice of symbol you test for shouldn't matter.
|
||||
set(CMAKE_REQUIRED_LIBRARIES Threads::Threads)
|
||||
check_library_exists(rt shm_open "sys/mman.h" NEED_RT_AND_PTHREAD)
|
||||
unset(CMAKE_REQUIRED_LIBRARIES)
|
||||
@ -66,8 +67,8 @@ else()
|
||||
# we need to link directly to c10 here otherwise we miss symbols
|
||||
target_link_libraries(torch_shm_manager PRIVATE shm c10)
|
||||
endif()
|
||||
set_target_properties(torch_shm_manager PROPERTIES
|
||||
INSTALL_RPATH "${_rpath_portable_origin}/../lib")
|
||||
set_target_properties(
|
||||
torch_shm_manager PROPERTIES INSTALL_RPATH "${_rpath_portable_origin}/../lib")
|
||||
|
||||
install(TARGETS shm LIBRARY DESTINATION ${LIBSHM_INSTALL_LIB_SUBDIR})
|
||||
install(FILES libshm.h DESTINATION "include")
|
||||
|
||||
@ -1,27 +1,32 @@
|
||||
if(NOT LIBSHM_INSTALL_LIB_SUBDIR)
|
||||
set(LIBSHM_INSTALL_BIN_SUBDIR "bin" CACHE PATH "libshm install binary directory")
|
||||
set(LIBSHM_INSTALL_LIB_SUBDIR "lib" CACHE PATH "libshm install library directory")
|
||||
set(LIBSHM_INSTALL_BIN_SUBDIR
|
||||
"bin"
|
||||
CACHE PATH "libshm install binary directory")
|
||||
set(LIBSHM_INSTALL_LIB_SUBDIR
|
||||
"lib"
|
||||
CACHE PATH "libshm install library directory")
|
||||
endif()
|
||||
|
||||
add_library(shm SHARED core.cpp)
|
||||
|
||||
target_compile_definitions(shm PRIVATE
|
||||
"_CRT_SECURE_NO_DEPRECATE=1"
|
||||
"SHM_EXPORTS"
|
||||
)
|
||||
target_compile_definitions(shm PRIVATE "_CRT_SECURE_NO_DEPRECATE=1"
|
||||
"SHM_EXPORTS")
|
||||
|
||||
target_include_directories(shm PRIVATE
|
||||
${CMAKE_BINARY_DIR}/aten/src # provides "ATen/TypeExtendedInterface.h" to ATen.h
|
||||
${TORCH_ROOT}/torch/lib # provides "libshm/libshm.h"
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
)
|
||||
target_include_directories(
|
||||
shm
|
||||
PRIVATE ${CMAKE_BINARY_DIR}/aten/src # provides "ATen/TypeExtendedInterface.h"
|
||||
# to ATen.h
|
||||
${TORCH_ROOT}/torch/lib # provides "libshm/libshm.h"
|
||||
${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
target_link_libraries(shm torch c10)
|
||||
|
||||
|
||||
install(TARGETS shm DESTINATION "${LIBSHM_INSTALL_LIB_SUBDIR}")
|
||||
install(FILES libshm.h DESTINATION "include")
|
||||
|
||||
if(MSVC AND BUILD_SHARED_LIBS)
|
||||
install(FILES $<TARGET_PDB_FILE:shm> DESTINATION "${LIBSHM_INSTALL_LIB_SUBDIR}" OPTIONAL)
|
||||
install(
|
||||
FILES $<TARGET_PDB_FILE:shm>
|
||||
DESTINATION "${LIBSHM_INSTALL_LIB_SUBDIR}"
|
||||
OPTIONAL)
|
||||
endif()
|
||||
|
||||
Reference in New Issue
Block a user