mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Revert "[Submodule] Remove deprecated USE_TBB option and TBB submodule (#127051)"
This reverts commit 4fdbaa794f9d5af2f171f772a51cb710c51c925f. Reverted https://github.com/pytorch/pytorch/pull/127051 on behalf of https://github.com/PaliC due to This PR needs to be synced using the import button as there is a bug in our diff train ([comment](https://github.com/pytorch/pytorch/pull/127051#issuecomment-2136428735))
This commit is contained in:
@ -44,7 +44,10 @@ if [[ "$BUILD_ENVIRONMENT" == *cuda11* ]]; then
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ ${BUILD_ENVIRONMENT} == *"parallelnative"* ]]; then
|
||||
if [[ ${BUILD_ENVIRONMENT} == *"paralleltbb"* ]]; then
|
||||
export ATEN_THREADING=TBB
|
||||
export USE_TBB=1
|
||||
elif [[ ${BUILD_ENVIRONMENT} == *"parallelnative"* ]]; then
|
||||
export ATEN_THREADING=NATIVE
|
||||
fi
|
||||
|
||||
|
@ -693,6 +693,7 @@ test_aten() {
|
||||
${SUDO} ln -sf "$TORCH_LIB_DIR"/libmkldnn* "$TEST_BASE_DIR"
|
||||
${SUDO} ln -sf "$TORCH_LIB_DIR"/libnccl* "$TEST_BASE_DIR"
|
||||
${SUDO} ln -sf "$TORCH_LIB_DIR"/libtorch* "$TEST_BASE_DIR"
|
||||
${SUDO} ln -sf "$TORCH_LIB_DIR"/libtbb* "$TEST_BASE_DIR"
|
||||
|
||||
ls "$TEST_BASE_DIR"
|
||||
aten/tools/run_tests.sh "$TEST_BASE_DIR"
|
||||
@ -717,6 +718,21 @@ test_without_numpy() {
|
||||
popd
|
||||
}
|
||||
|
||||
# pytorch extensions require including torch/extension.h which includes all.h
|
||||
# which includes utils.h which includes Parallel.h.
|
||||
# So you can call for instance parallel_for() from your extension,
|
||||
# but the compilation will fail because of Parallel.h has only declarations
|
||||
# and definitions are conditionally included Parallel.h(see last lines of Parallel.h).
|
||||
# I tried to solve it #39612 and #39881 by including Config.h into Parallel.h
|
||||
# But if Pytorch is built with TBB it provides Config.h
|
||||
# that has AT_PARALLEL_NATIVE_TBB=1(see #3961 or #39881) and it means that if you include
|
||||
# torch/extension.h which transitively includes Parallel.h
|
||||
# which transitively includes tbb.h which is not available!
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *tbb* ]]; then
|
||||
sudo mkdir -p /usr/include/tbb
|
||||
sudo cp -r "$PWD"/third_party/tbb/include/tbb/* /usr/include/tbb
|
||||
fi
|
||||
|
||||
test_libtorch() {
|
||||
local SHARD="$1"
|
||||
|
||||
@ -730,6 +746,7 @@ test_libtorch() {
|
||||
ln -sf "$TORCH_LIB_DIR"/libc10* "$TORCH_BIN_DIR"
|
||||
ln -sf "$TORCH_LIB_DIR"/libshm* "$TORCH_BIN_DIR"
|
||||
ln -sf "$TORCH_LIB_DIR"/libtorch* "$TORCH_BIN_DIR"
|
||||
ln -sf "$TORCH_LIB_DIR"/libtbb* "$TORCH_BIN_DIR"
|
||||
ln -sf "$TORCH_LIB_DIR"/libnvfuser* "$TORCH_BIN_DIR"
|
||||
|
||||
export CPP_TESTS_DIR="${TORCH_BIN_DIR}"
|
||||
@ -866,6 +883,7 @@ test_rpc() {
|
||||
# test reporting process to function as expected.
|
||||
ln -sf "$TORCH_LIB_DIR"/libtorch* "$TORCH_BIN_DIR"
|
||||
ln -sf "$TORCH_LIB_DIR"/libc10* "$TORCH_BIN_DIR"
|
||||
ln -sf "$TORCH_LIB_DIR"/libtbb* "$TORCH_BIN_DIR"
|
||||
|
||||
CPP_TESTS_DIR="${TORCH_BIN_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_cpp_rpc
|
||||
}
|
||||
|
4
.gitmodules
vendored
4
.gitmodules
vendored
@ -82,6 +82,10 @@
|
||||
ignore = dirty
|
||||
path = third_party/foxi
|
||||
url = https://github.com/houseroad/foxi.git
|
||||
[submodule "third_party/tbb"]
|
||||
path = third_party/tbb
|
||||
url = https://github.com/01org/tbb
|
||||
branch = tbb_2018
|
||||
[submodule "android/libs/fbjni"]
|
||||
ignore = dirty
|
||||
path = android/libs/fbjni
|
||||
|
14
BUILD.bazel
14
BUILD.bazel
@ -125,6 +125,10 @@ filegroup(
|
||||
data = [":generate-code"],
|
||||
)
|
||||
|
||||
exports_files(
|
||||
srcs = ["aten/src/ATen/cpu/tbb/extra/version_string.ver.in"],
|
||||
)
|
||||
|
||||
# ATen
|
||||
filegroup(
|
||||
name = "aten_base_cpp",
|
||||
@ -271,6 +275,7 @@ header_template_rule(
|
||||
"@AT_BUILD_WITH_LAPACK@": "1",
|
||||
"@AT_PARALLEL_OPENMP@": "0",
|
||||
"@AT_PARALLEL_NATIVE@": "1",
|
||||
"@AT_PARALLEL_NATIVE_TBB@": "0",
|
||||
"@AT_BLAS_F2C@": "0",
|
||||
"@AT_BLAS_USE_CBLAS_DOT@": "1",
|
||||
},
|
||||
@ -354,9 +359,6 @@ cc_library(
|
||||
":aten_src_ATen_config",
|
||||
] + generated_cpu_cpp + aten_ufunc_generated_cpu_sources("aten/src/ATen/{}"),
|
||||
copts = ATEN_COPTS,
|
||||
linkopts = [
|
||||
"-ldl",
|
||||
],
|
||||
data = if_cuda(
|
||||
[":libcaffe2_nvrtc.so"],
|
||||
[],
|
||||
@ -834,9 +836,6 @@ cc_library(
|
||||
],
|
||||
)) + torch_sources,
|
||||
copts = TORCH_COPTS,
|
||||
linkopts = [
|
||||
"-lrt",
|
||||
],
|
||||
defines = [
|
||||
"CAFFE2_NIGHTLY_VERSION=20200115",
|
||||
],
|
||||
@ -857,9 +856,6 @@ cc_library(
|
||||
cc_library(
|
||||
name = "shm",
|
||||
srcs = glob(["torch/lib/libshm/*.cpp"]),
|
||||
linkopts = [
|
||||
"-lrt",
|
||||
],
|
||||
deps = [
|
||||
":torch",
|
||||
],
|
||||
|
@ -329,6 +329,9 @@ cmake_dependent_option(
|
||||
cmake_dependent_option(
|
||||
USE_TENSORPIPE "Use TensorPipe. Only available if USE_DISTRIBUTED is on." ON
|
||||
"USE_DISTRIBUTED" OFF)
|
||||
option(USE_TBB "Use TBB (Deprecated)" OFF)
|
||||
cmake_dependent_option(
|
||||
USE_SYSTEM_TBB "Use system-provided Intel TBB." OFF "USE_TBB" OFF)
|
||||
option(ONNX_ML "Enable traditional ONNX ML API." ON)
|
||||
option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" OFF)
|
||||
option(BUILD_LIBTORCH_CPU_WITH_DEBUG "Enable RelWithDebInfo for libtorch_cpu target only" OFF)
|
||||
@ -428,6 +431,9 @@ if(USE_SYSTEM_LIBS)
|
||||
if(USE_NCCL)
|
||||
set(USE_SYSTEM_NCCL ON)
|
||||
endif()
|
||||
if(USE_TBB)
|
||||
set(USE_SYSTEM_TBB ON)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Used when building Caffe2 through setup.py
|
||||
|
10
WORKSPACE
10
WORKSPACE
@ -168,6 +168,16 @@ new_local_repository(
|
||||
path = "third_party/opentelemetry-cpp",
|
||||
)
|
||||
|
||||
new_patched_local_repository(
|
||||
name = "tbb",
|
||||
build_file = "//third_party:tbb.BUILD",
|
||||
patch_strip = 1,
|
||||
patches = [
|
||||
"@//third_party:tbb.patch",
|
||||
],
|
||||
path = "third_party/tbb",
|
||||
)
|
||||
|
||||
new_local_repository(
|
||||
name = "tensorpipe",
|
||||
build_file = "//third_party:tensorpipe.BUILD",
|
||||
|
@ -349,6 +349,16 @@ endif()
|
||||
|
||||
list(APPEND ATen_CPU_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/..)
|
||||
|
||||
if(USE_TBB)
|
||||
if(USE_SYSTEM_TBB)
|
||||
message("ATen is compiled with system-provided Intel TBB.")
|
||||
else()
|
||||
message("ATen is compiled with Intel TBB (${TBB_ROOT_DIR}).")
|
||||
endif()
|
||||
list(APPEND ATen_CPU_INCLUDE ${TBB_INCLUDE_DIR})
|
||||
list(APPEND ATen_CPU_DEPENDENCY_LIBS TBB::tbb)
|
||||
endif()
|
||||
|
||||
if(BLAS_FOUND)
|
||||
if($ENV{TH_BINARY_BUILD})
|
||||
message(STATUS "TH_BINARY_BUILD detected. Enabling special linkage.")
|
||||
|
@ -17,5 +17,6 @@
|
||||
#define AT_BUILD_WITH_LAPACK() @AT_BUILD_WITH_LAPACK@
|
||||
#define AT_PARALLEL_OPENMP @AT_PARALLEL_OPENMP@
|
||||
#define AT_PARALLEL_NATIVE @AT_PARALLEL_NATIVE@
|
||||
#define AT_PARALLEL_NATIVE_TBB @AT_PARALLEL_NATIVE_TBB@
|
||||
#define AT_BLAS_F2C() @AT_BLAS_F2C@
|
||||
#define AT_BLAS_USE_CBLAS_DOT() @AT_BLAS_USE_CBLAS_DOT@
|
||||
|
@ -153,6 +153,8 @@ TORCH_API int intraop_default_num_threads();
|
||||
#include <ATen/ParallelOpenMP.h> // IWYU pragma: keep
|
||||
#elif AT_PARALLEL_NATIVE
|
||||
#include <ATen/ParallelNative.h> // IWYU pragma: keep
|
||||
#elif AT_PARALLEL_NATIVE_TBB
|
||||
#include <ATen/ParallelNativeTBB.h> // IWYU pragma: keep
|
||||
#endif
|
||||
|
||||
#include <ATen/Parallel-inl.h> // IWYU pragma: keep
|
||||
|
@ -80,6 +80,8 @@ std::string get_parallel_info() {
|
||||
ss << "OpenMP";
|
||||
#elif AT_PARALLEL_NATIVE
|
||||
ss << "native thread pool";
|
||||
#elif AT_PARALLEL_NATIVE_TBB
|
||||
ss << "native thread pool and TBB";
|
||||
#endif
|
||||
#ifdef C10_MOBILE
|
||||
ss << " [mobile]";
|
||||
|
115
aten/src/ATen/ParallelNativeTBB.cpp
Normal file
115
aten/src/ATen/ParallelNativeTBB.cpp
Normal file
@ -0,0 +1,115 @@
|
||||
#include <ATen/Config.h>
|
||||
#if AT_PARALLEL_NATIVE_TBB
|
||||
#include <ATen/Parallel.h>
|
||||
#include <ATen/ParallelFuture.h>
|
||||
#include <ATen/PTThreadPool.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
|
||||
#include <tbb/tbb.h>
|
||||
#define TBB_PREVIEW_GLOBAL_CONTROL 1
|
||||
#include <tbb/global_control.h>
|
||||
|
||||
#ifdef _OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
#if AT_MKL_ENABLED()
|
||||
#include <mkl.h>
|
||||
#endif
|
||||
|
||||
namespace at {
|
||||
|
||||
namespace {
|
||||
static thread_local tbb::task_group tg_;
|
||||
thread_local int this_thread_id{0};
|
||||
|
||||
std::mutex global_thread_mutex_;
|
||||
std::shared_ptr<tbb::global_control> global_thread_limit_ = nullptr;
|
||||
std::atomic<int> num_intraop_threads_{-1};
|
||||
|
||||
void _internal_set_num_threads(int nthreads) {
|
||||
TORCH_INTERNAL_ASSERT(nthreads > 0);
|
||||
{
|
||||
std::unique_lock<std::mutex> lk(global_thread_mutex_);
|
||||
// This is an antipattern and we shouldn't be constraining the number of
|
||||
// threads in library code.
|
||||
// TODO: Think of a smarter way to leverage tbb::thread_arena to limit the
|
||||
// number of slots instead of the number of threads.
|
||||
global_thread_limit_ = std::make_shared<tbb::global_control>(
|
||||
tbb::global_control::max_allowed_parallelism, nthreads);
|
||||
num_intraop_threads_.store(nthreads);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void init_num_threads() {
|
||||
#ifdef _OPENMP
|
||||
omp_set_num_threads(1);
|
||||
#endif
|
||||
|
||||
#if AT_MKL_ENABLED()
|
||||
mkl_set_num_threads(1);
|
||||
#endif
|
||||
|
||||
int nthreads = num_intraop_threads_.load();
|
||||
if (nthreads < 0) {
|
||||
nthreads = intraop_default_num_threads();
|
||||
}
|
||||
_internal_set_num_threads(nthreads);
|
||||
}
|
||||
|
||||
void set_num_threads(int nthreads) {
|
||||
TORCH_CHECK(nthreads > 0);
|
||||
|
||||
_internal_set_num_threads(nthreads);
|
||||
}
|
||||
|
||||
int get_num_threads() {
|
||||
at::internal::lazy_init_num_threads();
|
||||
return tbb::global_control::active_value(
|
||||
tbb::global_control::max_allowed_parallelism);
|
||||
}
|
||||
|
||||
int get_thread_num() {
|
||||
return this_thread_id;
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
void set_thread_num(int id) {
|
||||
this_thread_id = id;
|
||||
}
|
||||
}
|
||||
|
||||
bool in_parallel_region() {
|
||||
return tbb::this_task_arena::current_thread_index() >= 0;
|
||||
}
|
||||
|
||||
void intraop_launch(std::function<void()> func) {
|
||||
if (get_num_threads() > 1) {
|
||||
tg_.run(func);
|
||||
} else {
|
||||
func();
|
||||
}
|
||||
}
|
||||
|
||||
c10::intrusive_ptr<c10::ivalue::Future> intraop_launch_future(
|
||||
std::function<void()> func) {
|
||||
auto future = c10::make_intrusive<c10::ivalue::Future>(NoneType::get());
|
||||
if (get_num_threads() > 1) {
|
||||
tg_.run(
|
||||
[func, future]() {
|
||||
func();
|
||||
future->markCompleted();
|
||||
}
|
||||
);
|
||||
} else {
|
||||
func();
|
||||
future->markCompleted();
|
||||
}
|
||||
return future;
|
||||
}
|
||||
|
||||
} // namespace at
|
||||
#endif
|
52
aten/src/ATen/ParallelNativeTBB.h
Normal file
52
aten/src/ATen/ParallelNativeTBB.h
Normal file
@ -0,0 +1,52 @@
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <cstddef>
|
||||
#include <exception>
|
||||
|
||||
#include <c10/util/Exception.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#endif
|
||||
#endif
|
||||
#include <tbb/tbb.h>
|
||||
|
||||
#define INTRA_OP_PARALLEL
|
||||
|
||||
namespace at::internal {
|
||||
|
||||
template <typename F>
|
||||
inline void invoke_parallel(
|
||||
const int64_t begin,
|
||||
const int64_t end,
|
||||
const int64_t grain_size,
|
||||
const F& f) {
|
||||
// Choose number of tasks based on grain size and number of threads.
|
||||
int64_t chunk_size = divup((end - begin), get_num_threads());
|
||||
// Make sure each task is at least grain_size size.
|
||||
chunk_size = std::max(grain_size, chunk_size);
|
||||
|
||||
std::atomic_flag err_flag = ATOMIC_FLAG_INIT;
|
||||
std::exception_ptr eptr;
|
||||
tbb::parallel_for(
|
||||
tbb::blocked_range<int64_t>(begin, end, chunk_size),
|
||||
[&eptr, &err_flag, f](const tbb::blocked_range<int64_t>& r) {
|
||||
try {
|
||||
internal::ThreadIdGuard tid_guard(
|
||||
tbb::this_task_arena::current_thread_index());
|
||||
f(r.begin(), r.end());
|
||||
} catch (...) {
|
||||
if (!err_flag.test_and_set()) {
|
||||
eptr = std::current_exception();
|
||||
}
|
||||
}
|
||||
},
|
||||
tbb::static_partitioner{});
|
||||
if (eptr) {
|
||||
std::rethrow_exception(eptr);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace at::internal
|
@ -1,5 +1,5 @@
|
||||
#include <ATen/Config.h>
|
||||
#if AT_PARALLEL_OPENMP || AT_PARALLEL_NATIVE
|
||||
#if AT_PARALLEL_OPENMP || AT_PARALLEL_NATIVE || AT_PARALLEL_NATIVE_TBB
|
||||
#include <ATen/Parallel.h>
|
||||
#include <ATen/PTThreadPool.h>
|
||||
#include <ATen/ThreadLocalState.h>
|
||||
|
391
aten/src/ATen/cpu/tbb/CMakeLists.txt
Normal file
391
aten/src/ATen/cpu/tbb/CMakeLists.txt
Normal file
@ -0,0 +1,391 @@
|
||||
# Based on https://github.com/wjakob/tbb/blob/master/CMakeLists.txt
|
||||
# All credit goes to Wenzel Jakob!
|
||||
|
||||
cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR)
|
||||
project(tbb CXX)
|
||||
|
||||
include(CheckCXXCompilerFlag)
|
||||
include(CheckCXXSourceRuns)
|
||||
|
||||
if(POLICY CMP0058)
|
||||
cmake_policy(SET CMP0058 NEW)
|
||||
endif()
|
||||
|
||||
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
||||
message(STATUS "Setting build type to 'Release' as none was specified.")
|
||||
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
|
||||
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release"
|
||||
"MinSizeRel" "RelWithDebInfo")
|
||||
endif()
|
||||
|
||||
if(NOT TBB_ROOT_DIR)
|
||||
set(TBB_ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
endif()
|
||||
if(NOT TBB_INSTALL_EXPORT_NAME)
|
||||
set(TBB_INSTALL_EXPORT_NAME "Caffe2Targets")
|
||||
endif()
|
||||
if(NOT TBB_INSTALL_EXPORT_DESTINATION)
|
||||
set(TBB_INSTALL_EXPORT_DESTINATION lib)
|
||||
endif()
|
||||
if(NOT TBB_INSTALL_RUNTIME_DIR)
|
||||
set(TBB_INSTALL_RUNTIME_DIR bin)
|
||||
endif()
|
||||
if(NOT TBB_INSTALL_LIBRARY_DIR)
|
||||
set(TBB_INSTALL_LIBRARY_DIR lib)
|
||||
endif()
|
||||
if(NOT TBB_INSTALL_ARCHIVE_DIR)
|
||||
set(TBB_INSTALL_ARCHIVE_DIR lib)
|
||||
endif()
|
||||
if(NOT TBB_INSTALL_INCLUDE_DIR)
|
||||
set(TBB_INSTALL_INCLUDE_DIR "${TBB_ROOT_DIR}/include")
|
||||
endif()
|
||||
|
||||
set(TBB_INCLUDES
|
||||
"${TBB_ROOT_DIR}/include"
|
||||
"${TBB_ROOT_DIR}/src"
|
||||
"${TBB_ROOT_DIR}/src/rml/include"
|
||||
${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
option(TBB_BUILD_SHARED "Build TBB shared library" ON)
|
||||
option(TBB_BUILD_STATIC "Build TBB static library" ON)
|
||||
option(TBB_BUILD_TBBMALLOC "Build TBB malloc library" ON)
|
||||
option(TBB_BUILD_TBBMALLOC_PROXY "Build TBB malloc proxy library" ON)
|
||||
option(TBB_BUILD_TESTS "Build TBB tests and enable testing infrastructure" ON)
|
||||
option(TBB_CI_BUILD "Is this a continuous integration build?" OFF)
|
||||
|
||||
if(APPLE)
|
||||
set(CMAKE_MACOSX_RPATH ON)
|
||||
endif()
|
||||
|
||||
file(GLOB tbb_src "${TBB_ROOT_DIR}/src/tbb/*.cpp" "${TBB_ROOT_DIR}/src/old/*.cpp")
|
||||
list(APPEND tbb_src ${TBB_ROOT_DIR}/src/rml/client/rml_tbb.cpp)
|
||||
file(GLOB to_remove "${TBB_ROOT_DIR}/src/old/test*.cpp")
|
||||
if(NOT "${to_remove}" STREQUAL "")
|
||||
list(REMOVE_ITEM tbb_src ${to_remove})
|
||||
endif()
|
||||
|
||||
set(tbbmalloc_static_src
|
||||
src/tbbmalloc/backend.cpp
|
||||
src/tbbmalloc/large_objects.cpp
|
||||
src/tbbmalloc/backref.cpp
|
||||
src/tbbmalloc/tbbmalloc.cpp
|
||||
src/tbbmalloc/frontend.cpp
|
||||
src/tbb/itt_notify.cpp)
|
||||
|
||||
set(tbbmalloc_src ${tbbmalloc_static_src})
|
||||
|
||||
set(tbbmalloc_proxy_src
|
||||
src/tbbmalloc/proxy.cpp
|
||||
src/tbbmalloc/tbb_function_replacement.cpp)
|
||||
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(i386|x86_64)")
|
||||
if(NOT APPLE AND NOT MINGW)
|
||||
add_definitions(-DDO_ITT_NOTIFY)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(APPLE)
|
||||
# Disable annoying "has no symbols" warnings
|
||||
set(CMAKE_C_ARCHIVE_CREATE "<CMAKE_AR> Scr <TARGET> <LINK_FLAGS> <OBJECTS>")
|
||||
set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> Scr <TARGET> <LINK_FLAGS> <OBJECTS>")
|
||||
set(CMAKE_C_ARCHIVE_FINISH "<CMAKE_RANLIB> -no_warning_for_no_symbols -c <TARGET>")
|
||||
set(CMAKE_CXX_ARCHIVE_FINISH "<CMAKE_RANLIB> -no_warning_for_no_symbols -c <TARGET>")
|
||||
endif()
|
||||
|
||||
macro(CHECK_CXX_COMPILER_AND_LINKER_FLAGS _RESULT _CXX_FLAGS _LINKER_FLAGS)
|
||||
set(CMAKE_REQUIRED_FLAGS ${_CXX_FLAGS})
|
||||
set(CMAKE_REQUIRED_LIBRARIES ${_LINKER_FLAGS})
|
||||
set(CMAKE_REQUIRED_QUIET TRUE)
|
||||
check_cxx_source_runs("#include <iostream>\nint main(int argc, char **argv) { std::cout << \"test\"; return 0; }" ${_RESULT})
|
||||
set(CMAKE_REQUIRED_FLAGS "")
|
||||
set(CMAKE_REQUIRED_LIBRARIES "")
|
||||
endmacro()
|
||||
|
||||
# Prefer libc++ in conjunction with Clang
|
||||
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||
if(CMAKE_CXX_FLAGS MATCHES "-stdlib=libc\\+\\+")
|
||||
message(STATUS "TBB: using libc++.")
|
||||
else()
|
||||
CHECK_CXX_COMPILER_AND_LINKER_FLAGS(HAS_LIBCPP "-stdlib=libc++" "-stdlib=libc++")
|
||||
if(HAS_LIBCPP)
|
||||
string(APPEND CMAKE_CXX_FLAGS " -stdlib=libc++ -D_LIBCPP_VERSION")
|
||||
string(APPEND CMAKE_EXE_LINKER_FLAGS " -stdlib=libc++")
|
||||
string(APPEND CMAKE_SHARED_LINKER_FLAGS " -stdlib=libc++")
|
||||
message(STATUS "TBB: using libc++.")
|
||||
else()
|
||||
message(STATUS "TBB: NOT using libc++.")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(UNIX)
|
||||
add_definitions(-DUSE_PTHREAD)
|
||||
|
||||
check_cxx_compiler_flag("-std=c++17" SUPPORTS_STDCXX17)
|
||||
if(SUPPORTS_STDCXX17)
|
||||
set(CMAKE_CXX_FLAGS "-std=c++17 ${CMAKE_CXX_FLAGS}")
|
||||
endif()
|
||||
|
||||
check_cxx_compiler_flag("-mrtm -Werror" SUPPORTS_MRTM)
|
||||
if(SUPPORTS_MRTM)
|
||||
set(CMAKE_CXX_FLAGS "-mrtm ${CMAKE_CXX_FLAGS}")
|
||||
endif()
|
||||
|
||||
elseif(WIN32)
|
||||
if(MSVC)
|
||||
cmake_minimum_required(VERSION 3.1)
|
||||
enable_language(ASM_MASM)
|
||||
set(CMAKE_CXX_FLAGS "/GS- /Zc:wchar_t /Zc:forScope /DUSE_WINTHREAD ${CMAKE_CXX_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "/D_CRT_SECURE_NO_DEPRECATE /D_WIN32_WINNT=0x0600 ${CMAKE_CXX_FLAGS}")
|
||||
check_cxx_compiler_flag("/volatile:iso" SUPPORTS_VOLATILE_FLAG)
|
||||
if(SUPPORTS_VOLATILE_FLAG)
|
||||
set(CMAKE_CXX_FLAGS "/volatile:iso ${CMAKE_CXX_FLAGS}")
|
||||
endif()
|
||||
set(CMAKE_CXX_FLAGS "/wd4267 /wd4800 /wd4146 /wd4244 /wd4577 /wd4018 ${CMAKE_CXX_FLAGS}")
|
||||
if(NOT CMAKE_SIZEOF_VOID_P)
|
||||
message(FATAL_ERROR "'CMAKE_SIZEOF_VOID_P' is undefined. Please delete your build directory and rerun CMake again!")
|
||||
endif()
|
||||
|
||||
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
list(APPEND tbb_src "${TBB_ROOT_DIR}/src/tbb/intel64-masm/atomic_support.asm")
|
||||
list(APPEND tbb_src "${TBB_ROOT_DIR}/src/tbb/intel64-masm/itsx.asm")
|
||||
list(APPEND tbb_src "${TBB_ROOT_DIR}/src/tbb/intel64-masm/intel64_misc.asm")
|
||||
list(APPEND tbbmalloc_src "${TBB_ROOT_DIR}/src/tbb/intel64-masm/atomic_support.asm")
|
||||
set(CMAKE_ASM_MASM_FLAGS "/DEM64T=1 ${CMAKE_ASM_MASM_FLAGS}")
|
||||
else()
|
||||
list(APPEND tbb_src "${TBB_ROOT_DIR}/src/tbb/ia32-masm/atomic_support.asm"
|
||||
"${TBB_ROOT_DIR}/src/tbb/ia32-masm/itsx.asm src/tbb/ia32-masm/lock_byte.asm")
|
||||
# Enable SAFESEH feature for assembly (x86 builds only).
|
||||
set(CMAKE_ASM_MASM_FLAGS "/safeseh ${CMAKE_ASM_MASM_FLAGS}")
|
||||
endif()
|
||||
elseif(MINGW)
|
||||
add_definitions(-DUSE_WINTHREAD)
|
||||
add_definitions(-D_WIN32_WINNT=0x0502)
|
||||
set(CMAKE_CXX_FLAGS "-mthreads ${CMAKE_CXX_FLAGS}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
set(ENABLE_RTTI "/EHsc /GR ")
|
||||
set(DISABLE_RTTI "/EHs- /GR- ")
|
||||
elseif(UNIX)
|
||||
set(ENABLE_RTTI "-frtti -fexceptions ")
|
||||
set(DISABLE_RTTI "-fno-rtti -fno-exceptions ")
|
||||
endif()
|
||||
|
||||
##--------
|
||||
# - Added TBB_USE_GLIBCXX_VERSION macro to specify the version of GNU
|
||||
# libstdc++ when it cannot be properly recognized, e.g. when used
|
||||
# with Clang on Linux* OS. Inspired by a contribution from David A.
|
||||
if(NOT TBB_USE_GLIBCXX_VERSION AND UNIX AND NOT APPLE)
|
||||
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
||||
# using Clang
|
||||
string(REPLACE "." "0" TBB_USE_GLIBCXX_VERSION ${CMAKE_CXX_COMPILER_VERSION})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(TBB_USE_GLIBCXX_VERSION)
|
||||
add_definitions(-DTBB_USE_GLIBCXX_VERSION=${TBB_USE_GLIBCXX_VERSION})
|
||||
endif()
|
||||
|
||||
##-------
|
||||
|
||||
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
||||
check_cxx_compiler_flag("-flifetime-dse=1" SUPPORTS_FLIFETIME)
|
||||
if(SUPPORTS_FLIFETIME)
|
||||
add_definitions(-flifetime-dse=1)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Linker export definitions
|
||||
if(APPLE)
|
||||
set(ARCH_PREFIX "mac")
|
||||
elseif(WIN32)
|
||||
set(ARCH_PREFIX "win")
|
||||
else()
|
||||
set(ARCH_PREFIX "lin")
|
||||
endif()
|
||||
|
||||
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
set(ARCH_PREFIX "${ARCH_PREFIX}64")
|
||||
else()
|
||||
set(ARCH_PREFIX "${ARCH_PREFIX}32")
|
||||
endif()
|
||||
|
||||
if(MINGW)
|
||||
set(ARCH_PREFIX "${ARCH_PREFIX}-gcc")
|
||||
# there's no win32-gcc-tbb-export.def, use lin32-tbb-export.def
|
||||
execute_process(COMMAND ${CMAKE_COMMAND} -E copy ${TBB_ROOT_DIR}/src/tbb/lin32-tbb-export.def ${TBB_ROOT_DIR}/src/tbb/win32-gcc-tbb-export.def)
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
add_custom_command(OUTPUT tbb.def
|
||||
COMMAND ${CMAKE_CXX_COMPILER} /TC /EP ${TBB_ROOT_DIR}/src/tbb/${ARCH_PREFIX}-tbb-export.def -I ${TBB_ROOT_DIR}/include > tbb.def
|
||||
MAIN_DEPENDENCY ${TBB_ROOT_DIR}/src/tbb/${ARCH_PREFIX}-tbb-export.def
|
||||
COMMENT "Preprocessing tbb.def"
|
||||
)
|
||||
|
||||
add_custom_command(OUTPUT tbbmalloc.def
|
||||
COMMAND ${CMAKE_CXX_COMPILER} /TC /EP ${TBB_ROOT_DIR}/src/tbbmalloc/${ARCH_PREFIX}-tbbmalloc-export.def -I ${TBB_ROOT_DIR}/include > tbbmalloc.def
|
||||
MAIN_DEPENDENCY ${TBB_ROOT_DIR}/src/tbbmalloc/${ARCH_PREFIX}-tbbmalloc-export.def
|
||||
COMMENT "Preprocessing tbbmalloc.def"
|
||||
)
|
||||
else()
|
||||
add_custom_command(OUTPUT tbb.def
|
||||
COMMAND ${CMAKE_CXX_COMPILER} -xc++ -E ${TBB_ROOT_DIR}/src/tbb/${ARCH_PREFIX}-tbb-export.def -I ${TBB_ROOT_DIR}/include -o tbb.def
|
||||
MAIN_DEPENDENCY ${TBB_ROOT_DIR}/src/tbb/${ARCH_PREFIX}-tbb-export.def
|
||||
COMMENT "Preprocessing tbb.def"
|
||||
)
|
||||
|
||||
add_custom_command(OUTPUT tbbmalloc.def
|
||||
COMMAND ${CMAKE_CXX_COMPILER} -xc++ -E ${TBB_ROOT_DIR}/src/tbbmalloc/${ARCH_PREFIX}-tbbmalloc-export.def -I ${TBB_ROOT_DIR}/include -o tbbmalloc.def
|
||||
MAIN_DEPENDENCY ${TBB_ROOT_DIR}/src/tbbmalloc/${ARCH_PREFIX}-tbbmalloc-export.def
|
||||
COMMENT "Preprocessing tbbmalloc.def"
|
||||
)
|
||||
endif()
|
||||
|
||||
add_custom_target(tbb_def_files DEPENDS tbb.def tbbmalloc.def)
|
||||
|
||||
# TBB library
|
||||
if(TBB_BUILD_STATIC)
|
||||
add_library(tbb_static STATIC ${tbb_src})
|
||||
target_include_directories(tbb_static PRIVATE ${TBB_INCLUDES})
|
||||
set_property(TARGET tbb_static APPEND PROPERTY COMPILE_DEFINITIONS "__TBB_BUILD=1")
|
||||
set_property(TARGET tbb_static APPEND_STRING PROPERTY COMPILE_FLAGS ${ENABLE_RTTI})
|
||||
install(TARGETS tbb_static
|
||||
EXPORT ${TBB_INSTALL_EXPORT_NAME} DESTINATION ${TBB_INSTALL_EXPORT_DESTINATION}
|
||||
ARCHIVE DESTINATION ${TBB_INSTALL_ARCHIVE_DIR})
|
||||
if(MSVC)
|
||||
target_compile_definitions(tbb_static PUBLIC __TBB_NO_IMPLICIT_LINKAGE=1)
|
||||
endif()
|
||||
|
||||
if(UNIX AND NOT APPLE)
|
||||
target_link_libraries(tbb_static PUBLIC pthread dl)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(TBB_BUILD_SHARED)
|
||||
add_library(tbb SHARED ${tbb_src})
|
||||
target_include_directories(tbb PRIVATE ${TBB_INCLUDES})
|
||||
set_property(TARGET tbb APPEND PROPERTY COMPILE_DEFINITIONS "__TBB_BUILD=1")
|
||||
set_property(TARGET tbb APPEND_STRING PROPERTY COMPILE_FLAGS ${ENABLE_RTTI})
|
||||
add_dependencies(tbb tbb_def_files)
|
||||
|
||||
if(APPLE)
|
||||
set_property(TARGET tbb APPEND PROPERTY LINK_FLAGS "-Wl,-exported_symbols_list,\"${CMAKE_CURRENT_BINARY_DIR}/tbb.def\"")
|
||||
elseif(MSVC)
|
||||
set_property(TARGET tbb APPEND PROPERTY LINK_FLAGS "/DEF:\"${CMAKE_CURRENT_BINARY_DIR}/tbb.def\"")
|
||||
else()
|
||||
set_property(TARGET tbb APPEND PROPERTY LINK_FLAGS "-Wl,-version-script,\"${CMAKE_CURRENT_BINARY_DIR}/tbb.def\"")
|
||||
endif()
|
||||
|
||||
install(TARGETS tbb
|
||||
EXPORT ${TBB_INSTALL_EXPORT_NAME} DESTINATION ${TBB_INSTALL_EXPORT_DESTINATION}
|
||||
LIBRARY DESTINATION ${TBB_INSTALL_LIBRARY_DIR}
|
||||
ARCHIVE DESTINATION ${TBB_INSTALL_ARCHIVE_DIR}
|
||||
RUNTIME DESTINATION ${TBB_INSTALL_RUNTIME_DIR})
|
||||
if(UNIX AND NOT APPLE)
|
||||
target_link_libraries(tbb PUBLIC pthread dl)
|
||||
endif()
|
||||
if(MSVC)
|
||||
target_compile_definitions(tbb PUBLIC __TBB_NO_IMPLICIT_LINKAGE=1)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
||||
# Quench a warning on GCC
|
||||
set_source_files_properties(${TBB_ROOT_DIR}/src/tbb/governor.cpp COMPILE_FLAGS "-Wno-missing-field-initializers ")
|
||||
elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
||||
# Quench a warning on Clang
|
||||
set_source_files_properties(${TBB_ROOT_DIR}/src/tbb/itt_notify.cpp COMPILE_FLAGS "-Wno-varargs ")
|
||||
elseif(MSVC)
|
||||
# Quench a warning on MSVC
|
||||
set_source_files_properties(${TBB_ROOT_DIR}/src/tbb/scheduler.cpp COMPILE_FLAGS "/wd4458 ")
|
||||
endif()
|
||||
|
||||
if(TBB_BUILD_TBBMALLOC)
|
||||
# TBB malloc library
|
||||
if(TBB_BUILD_STATIC)
|
||||
add_library(tbbmalloc_static STATIC ${tbbmalloc_static_src})
|
||||
target_include_directories(tbbmalloc_static PRIVATE ${TBB_INCLUDES})
|
||||
set_property(TARGET tbbmalloc_static APPEND PROPERTY COMPILE_DEFINITIONS "__TBBMALLOC_BUILD=1")
|
||||
set_property(TARGET tbbmalloc_static APPEND_STRING PROPERTY COMPILE_FLAGS ${DISABLE_RTTI})
|
||||
if(MSVC)
|
||||
target_compile_definitions(tbbmalloc_static PUBLIC __TBB_NO_IMPLICIT_LINKAGE=1 __TBBMALLOC_NO_IMPLICIT_LINKAGE=1)
|
||||
endif()
|
||||
install(TARGETS tbbmalloc_static
|
||||
EXPORT ${TBB_INSTALL_EXPORT_NAME} DESTINATION ${TBB_INSTALL_EXPORT_DESTINATION}
|
||||
ARCHIVE DESTINATION ${TBB_INSTALL_ARCHIVE_DIR})
|
||||
endif()
|
||||
|
||||
if(TBB_BUILD_SHARED)
|
||||
add_library(tbbmalloc SHARED ${tbbmalloc_src})
|
||||
target_include_directories(tbbmalloc PRIVATE ${TBB_INCLUDES})
|
||||
set_property(TARGET tbbmalloc APPEND PROPERTY COMPILE_DEFINITIONS "__TBBMALLOC_BUILD=1")
|
||||
set_property(TARGET tbbmalloc APPEND_STRING PROPERTY COMPILE_FLAGS ${DISABLE_RTTI})
|
||||
add_dependencies(tbbmalloc tbb_def_files)
|
||||
if(APPLE)
|
||||
set_property(TARGET tbbmalloc APPEND PROPERTY LINK_FLAGS "-Wl,-exported_symbols_list,\"${CMAKE_CURRENT_BINARY_DIR}/tbbmalloc.def\"")
|
||||
elseif(MSVC)
|
||||
set_property(TARGET tbbmalloc APPEND PROPERTY LINK_FLAGS "/DEF:\"${CMAKE_CURRENT_BINARY_DIR}/tbbmalloc.def\"")
|
||||
else()
|
||||
set_property(TARGET tbbmalloc APPEND PROPERTY LINK_FLAGS "-Wl,-version-script,\"${CMAKE_CURRENT_BINARY_DIR}/tbbmalloc.def\"")
|
||||
endif()
|
||||
if(MSVC)
|
||||
target_compile_definitions(tbbmalloc PUBLIC __TBB_NO_IMPLICIT_LINKAGE=1 __TBBMALLOC_NO_IMPLICIT_LINKAGE=1)
|
||||
endif()
|
||||
install(TARGETS tbbmalloc
|
||||
EXPORT ${TBB_INSTALL_EXPORT_NAME} DESTINATION ${TBB_INSTALL_EXPORT_DESTINATION}
|
||||
LIBRARY DESTINATION ${TBB_INSTALL_LIBRARY_DIR}
|
||||
ARCHIVE DESTINATION ${TBB_INSTALL_ARCHIVE_DIR}
|
||||
RUNTIME DESTINATION ${TBB_INSTALL_RUNTIME_DIR})
|
||||
if(UNIX AND NOT APPLE)
|
||||
target_link_libraries(tbbmalloc PUBLIC pthread dl)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(TBB_BUILD_TBBMALLOC_PROXY)
|
||||
# TBB malloc proxy library
|
||||
if(TBB_BUILD_STATIC)
|
||||
add_library(tbbmalloc_proxy_static STATIC ${tbbmalloc_proxy_src})
|
||||
set_property(TARGET tbbmalloc_proxy_static APPEND PROPERTY COMPILE_DEFINITIONS "__TBBMALLOC_BUILD=1")
|
||||
set_property(TARGET tbbmalloc_proxy_static APPEND_STRING PROPERTY COMPILE_FLAGS ${DISABLE_RTTI})
|
||||
install(TARGETS tbbmalloc_proxy_static
|
||||
EXPORT ${TBB_INSTALL_EXPORT_NAME} DESTINATION ${TBB_INSTALL_EXPORT_DESTINATION}
|
||||
ARCHIVE DESTINATION ${TBB_INSTALL_ARCHIVE_DIR})
|
||||
endif()
|
||||
|
||||
if(TBB_BUILD_SHARED)
|
||||
add_library(tbbmalloc_proxy SHARED ${tbbmalloc_proxy_src})
|
||||
set_property(TARGET tbbmalloc_proxy APPEND PROPERTY COMPILE_DEFINITIONS "__TBBMALLOC_BUILD=1")
|
||||
set_property(TARGET tbbmalloc_proxy APPEND_STRING PROPERTY COMPILE_FLAGS ${DISABLE_RTTI})
|
||||
target_link_libraries(tbbmalloc_proxy PUBLIC tbbmalloc)
|
||||
install(TARGETS tbbmalloc_proxy
|
||||
EXPORT ${TBB_INSTALL_EXPORT_NAME} DESTINATION ${TBB_INSTALL_EXPORT_DESTINATION}
|
||||
LIBRARY DESTINATION ${TBB_INSTALL_LIBRARY_DIR}
|
||||
ARCHIVE DESTINATION ${TBB_INSTALL_ARCHIVE_DIR}
|
||||
RUNTIME DESTINATION ${TBB_INSTALL_RUNTIME_DIR})
|
||||
if(UNIX AND NOT APPLE)
|
||||
target_link_libraries(tbbmalloc_proxy PUBLIC pthread dl)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
install(DIRECTORY "${TBB_ROOT_DIR}/include/tbb" DESTINATION ${TBB_INSTALL_INCLUDE_DIR})
|
||||
|
||||
# version_string.ver
|
||||
if(UNIX)
|
||||
execute_process(COMMAND date "+%a, %d %b %Y %H:%M:%S %z"
|
||||
OUTPUT_VARIABLE _configure_date
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
elseif(WIN32)
|
||||
execute_process(COMMAND cmd " /C date /T"
|
||||
OUTPUT_VARIABLE _configure_date
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
else()
|
||||
set(_configure_date "Unknown")
|
||||
endif()
|
||||
include_directories(${CMAKE_BINARY_DIR})
|
||||
configure_file(extra/version_string.ver.in version_string.ver @ONLY)
|
11
aten/src/ATen/cpu/tbb/extra/version_string.ver.in
Normal file
11
aten/src/ATen/cpu/tbb/extra/version_string.ver.in
Normal file
@ -0,0 +1,11 @@
|
||||
#define __TBB_VERSION_STRINGS(N) \
|
||||
#N": BUILD_HOST @CMAKE_SYSTEM_NAME@" ENDL \
|
||||
#N": BUILD_OS @CMAKE_SYSTEM@" ENDL \
|
||||
#N": BUILD_KERNEL @CMAKE_SYSTEM_VERSION@" ENDL \
|
||||
#N": BUILD_GCC @CMAKE_CXX_COMPILER_ID@" ENDL \
|
||||
#N": BUILD_LIBC Unknown" ENDL \
|
||||
#N": BUILD_LD Unknown" ENDL \
|
||||
#N": BUILD_TARGET Unknown" ENDL \
|
||||
#N": BUILD_COMMAND Unknown" ENDL
|
||||
|
||||
#define __TBB_DATETIME "@_configure_date@"
|
@ -261,6 +261,7 @@ def get_aten_preprocessor_flags():
|
||||
"-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION",
|
||||
"-DAT_PARALLEL_OPENMP_FBXPLAT=0",
|
||||
"-DAT_PARALLEL_NATIVE_FBXPLAT=1",
|
||||
"-DAT_PARALLEL_NATIVE_TBB_FBXPLAT=0",
|
||||
"-DUSE_LAPACK_FBXPLAT=0",
|
||||
"-DAT_BLAS_F2C_FBXPLAT=0",
|
||||
"-DAT_BLAS_USE_CBLAS_DOT_FBXPLAT=0",
|
||||
@ -1111,6 +1112,9 @@ def define_buck_targets(
|
||||
"@AT_PARALLEL_NATIVE@",
|
||||
"AT_PARALLEL_NATIVE_FBXPLAT",
|
||||
"--replace",
|
||||
"@AT_PARALLEL_NATIVE_TBB@",
|
||||
"AT_PARALLEL_NATIVE_TBB_FBXPLAT",
|
||||
"--replace",
|
||||
"@AT_BUILD_WITH_LAPACK@",
|
||||
"USE_LAPACK_FBXPLAT",
|
||||
"--replace",
|
||||
|
@ -999,6 +999,7 @@ aten_cpu_source_non_codegen_list = [
|
||||
"aten/src/ATen/NestedTensorImpl.cpp",
|
||||
"aten/src/ATen/ParallelCommon.cpp",
|
||||
"aten/src/ATen/ParallelNative.cpp",
|
||||
"aten/src/ATen/ParallelNativeTBB.cpp",
|
||||
"aten/src/ATen/ParallelOpenMP.cpp",
|
||||
"aten/src/ATen/ParallelThreadPoolNative.cpp",
|
||||
"aten/src/ATen/PythonTorchFunctionTLS.cpp",
|
||||
|
@ -16,11 +16,14 @@ endif()
|
||||
# ATen parallelism settings
|
||||
# OMP - OpenMP for intra-op, native thread pool for inter-op parallelism
|
||||
# NATIVE - using native thread pool for intra- and inter-op parallelism
|
||||
# TBB - using TBB for intra- and native thread pool for inter-op parallelism
|
||||
if(INTERN_BUILD_MOBILE)
|
||||
set(ATEN_THREADING "NATIVE" CACHE STRING "ATen parallel backend")
|
||||
else()
|
||||
if(USE_OPENMP)
|
||||
set(ATEN_THREADING "OMP" CACHE STRING "ATen parallel backend")
|
||||
elseif(USE_TBB)
|
||||
set(ATEN_THREADING "TBB" CACHE STRING "ATen parallel backend")
|
||||
else()
|
||||
set(ATEN_THREADING "NATIVE" CACHE STRING "ATen parallel backend")
|
||||
endif()
|
||||
@ -28,12 +31,19 @@ endif()
|
||||
|
||||
set(AT_PARALLEL_OPENMP 0)
|
||||
set(AT_PARALLEL_NATIVE 0)
|
||||
set(AT_PARALLEL_NATIVE_TBB 0)
|
||||
|
||||
message(STATUS "Using ATen parallel backend: ${ATEN_THREADING}")
|
||||
if("${ATEN_THREADING}" STREQUAL "OMP")
|
||||
set(AT_PARALLEL_OPENMP 1)
|
||||
elseif("${ATEN_THREADING}" STREQUAL "NATIVE")
|
||||
set(AT_PARALLEL_NATIVE 1)
|
||||
elseif("${ATEN_THREADING}" STREQUAL "TBB")
|
||||
if(NOT USE_TBB)
|
||||
message(FATAL_ERROR "Using TBB backend but USE_TBB is off")
|
||||
endif()
|
||||
message(WARNING "ATEN TBB Threading is deprectated.")
|
||||
set(AT_PARALLEL_NATIVE_TBB 1)
|
||||
else()
|
||||
message(FATAL_ERROR "Unknown ATen parallel backend: ${ATEN_THREADING}")
|
||||
endif()
|
||||
@ -1187,6 +1197,11 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU"
|
||||
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/quantized/qlinear_unpack.cpp PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations)
|
||||
endif()
|
||||
|
||||
if(USE_TBB)
|
||||
list(APPEND ATen_CPU_INCLUDE ${TBB_INCLUDE_DIR})
|
||||
target_link_libraries(torch_cpu PUBLIC TBB::tbb)
|
||||
endif()
|
||||
|
||||
target_include_directories(torch_cpu PRIVATE ${ATen_CPU_INCLUDE})
|
||||
|
||||
target_include_directories(torch_cpu PRIVATE
|
||||
@ -1663,6 +1678,10 @@ if(BUILD_SHARED_LIBS)
|
||||
target_link_libraries(torch_global_deps ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS})
|
||||
target_link_libraries(torch_global_deps torch::cudart torch::nvtoolsext)
|
||||
endif()
|
||||
if(USE_TBB)
|
||||
target_link_libraries(torch_global_deps TBB::tbb)
|
||||
endif()
|
||||
|
||||
install(TARGETS torch_global_deps DESTINATION "${TORCH_INSTALL_LIB_DIR}")
|
||||
endif()
|
||||
|
||||
|
@ -134,6 +134,35 @@ else()
|
||||
"Cannot find threading library. PyTorch requires Threads to compile.")
|
||||
endif()
|
||||
|
||||
if(USE_TBB)
|
||||
if(USE_SYSTEM_TBB)
|
||||
find_package(TBB 2018.0 REQUIRED CONFIG COMPONENTS tbb)
|
||||
|
||||
get_target_property(TBB_INCLUDE_DIR TBB::tbb INTERFACE_INCLUDE_DIRECTORIES)
|
||||
else()
|
||||
message(STATUS "Compiling TBB from source")
|
||||
# Unset our restrictive C++ flags here and reset them later.
|
||||
# Remove this once we use proper target_compile_options.
|
||||
set(OLD_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
set(CMAKE_CXX_FLAGS)
|
||||
|
||||
set(TBB_ROOT_DIR "${PROJECT_SOURCE_DIR}/third_party/tbb")
|
||||
set(TBB_BUILD_STATIC OFF CACHE BOOL " " FORCE)
|
||||
set(TBB_BUILD_SHARED ON CACHE BOOL " " FORCE)
|
||||
set(TBB_BUILD_TBBMALLOC OFF CACHE BOOL " " FORCE)
|
||||
set(TBB_BUILD_TBBMALLOC_PROXY OFF CACHE BOOL " " FORCE)
|
||||
set(TBB_BUILD_TESTS OFF CACHE BOOL " " FORCE)
|
||||
add_subdirectory(${PROJECT_SOURCE_DIR}/aten/src/ATen/cpu/tbb)
|
||||
set_property(TARGET tbb tbb_def_files PROPERTY FOLDER "dependencies")
|
||||
|
||||
set(CMAKE_CXX_FLAGS ${OLD_CMAKE_CXX_FLAGS})
|
||||
|
||||
set(TBB_INCLUDE_DIR "${TBB_ROOT_DIR}/include")
|
||||
|
||||
add_library(TBB::tbb ALIAS tbb)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# ---[ protobuf
|
||||
if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO)
|
||||
if(USE_LITE_PROTO)
|
||||
|
@ -71,8 +71,8 @@ IF (NOT "${MKL_THREADING}" STREQUAL "SEQ" AND
|
||||
MESSAGE(FATAL_ERROR "Invalid MKL_THREADING (${MKL_THREADING}), should be one of: SEQ, TBB, OMP")
|
||||
ENDIF()
|
||||
|
||||
IF ("${MKL_THREADING}" STREQUAL "TBB" AND NOT TARGET TBB::tbb)
|
||||
MESSAGE(FATAL_ERROR "MKL_THREADING is TBB but TBB is not found")
|
||||
IF ("${MKL_THREADING}" STREQUAL "TBB" AND NOT USE_TBB)
|
||||
MESSAGE(FATAL_ERROR "MKL_THREADING is TBB but USE_TBB is turned off")
|
||||
ENDIF()
|
||||
|
||||
MESSAGE(STATUS "MKL_THREADING = ${MKL_THREADING}")
|
||||
|
@ -101,7 +101,7 @@ IF(NOT MKLDNN_FOUND)
|
||||
IF(NOT MKLDNN_CPU_RUNTIME)
|
||||
SET(MKLDNN_CPU_RUNTIME "OMP" CACHE STRING "")
|
||||
ELSEIF(MKLDNN_CPU_RUNTIME STREQUAL "TBB")
|
||||
IF(TARGET TBB::tbb)
|
||||
IF(USE_TBB)
|
||||
MESSAGE(STATUS "MKL-DNN is using TBB")
|
||||
|
||||
SET(TBB_cmake_included TRUE)
|
||||
|
@ -152,6 +152,10 @@ function(caffe2_print_configuration_summary)
|
||||
message(STATUS " USE_OBSERVERS : ${USE_OBSERVERS}")
|
||||
message(STATUS " USE_OPENCL : ${USE_OPENCL}")
|
||||
message(STATUS " USE_OPENMP : ${USE_OPENMP}")
|
||||
message(STATUS " USE_TBB : ${USE_TBB}")
|
||||
if(${USE_TBB})
|
||||
message(STATUS " USE_SYSTEM_TBB : ${USE_SYSTEM_TBB}")
|
||||
endif()
|
||||
message(STATUS " USE_MIMALLOC : ${USE_MIMALLOC}")
|
||||
message(STATUS " USE_VULKAN : ${USE_VULKAN}")
|
||||
if(${USE_VULKAN})
|
||||
|
@ -317,6 +317,9 @@ function(caffe2_binary_target target_name_or_src)
|
||||
if(DEFINED Caffe2_MODULES)
|
||||
target_link_libraries(${__target} ${Caffe2_MODULES})
|
||||
endif()
|
||||
if(USE_TBB AND NOT USE_SYSTEM_TBB)
|
||||
target_include_directories(${__target} PUBLIC ${TBB_INCLUDE_DIR})
|
||||
endif()
|
||||
install(TARGETS ${__target} DESTINATION bin)
|
||||
endfunction()
|
||||
|
||||
|
2
defs.bzl
2
defs.bzl
@ -64,6 +64,8 @@ def get_cpu_parallel_backend_flags():
|
||||
defs = []
|
||||
if parallel_backend == "openmp":
|
||||
defs.append("-DAT_PARALLEL_OPENMP_FBCODE=1")
|
||||
elif parallel_backend == "tbb":
|
||||
defs.append("-DAT_PARALLEL_NATIVE_TBB_FBCODE=1")
|
||||
elif parallel_backend == "native":
|
||||
defs.append("-DAT_PARALLEL_NATIVE_FBCODE=1")
|
||||
else:
|
||||
|
8
setup.py
8
setup.py
@ -180,6 +180,13 @@
|
||||
# possible values:
|
||||
# OMP - use OpenMP for intra-op and native backend for inter-op tasks
|
||||
# NATIVE - use native thread pool for both intra- and inter-op tasks
|
||||
# TBB - using TBB for intra- and native thread pool for inter-op parallelism
|
||||
#
|
||||
# USE_TBB
|
||||
# enable TBB support
|
||||
#
|
||||
# USE_SYSTEM_TBB
|
||||
# Use system-provided Intel TBB.
|
||||
#
|
||||
# USE_SYSTEM_LIBS (work in progress)
|
||||
# Use system-provided libraries to satisfy the build dependencies.
|
||||
@ -322,6 +329,7 @@ def get_submodule_folders():
|
||||
for name in [
|
||||
"gloo",
|
||||
"cpuinfo",
|
||||
"tbb",
|
||||
"onnx",
|
||||
"foxi",
|
||||
"QNNPACK",
|
||||
|
5
third_party/mkl-dnn.BUILD
vendored
5
third_party/mkl-dnn.BUILD
vendored
@ -130,7 +130,10 @@ cc_library(
|
||||
],
|
||||
deps = [
|
||||
"@mkl",
|
||||
],
|
||||
] + select({
|
||||
"@pytorch//tools/config:thread_sanitizer": [],
|
||||
"//conditions:default": ["@tbb"],
|
||||
}),
|
||||
defines = [
|
||||
"DNNL_ENABLE_MAX_CPU_ISA",
|
||||
"DNNL_ENABLE_CONCURRENT_EXEC",
|
||||
|
5
third_party/mkl.BUILD
vendored
5
third_party/mkl.BUILD
vendored
@ -12,7 +12,10 @@ cc_library(
|
||||
"libmkl_vml_avx2.so",
|
||||
"libmkl_vml_avx512.so",
|
||||
"libmkl_vml_def.so",
|
||||
],
|
||||
] + select({
|
||||
"@pytorch//tools/config:thread_sanitizer": [],
|
||||
"//conditions:default": ["libmkl_tbb_thread.so"],
|
||||
}),
|
||||
visibility = ["//visibility:public"],
|
||||
deps = ["@mkl_headers"],
|
||||
)
|
||||
|
1
third_party/tbb
vendored
Submodule
1
third_party/tbb
vendored
Submodule
Submodule third_party/tbb added at a51a90bc60
75
third_party/tbb.BUILD
vendored
Normal file
75
third_party/tbb.BUILD
vendored
Normal file
@ -0,0 +1,75 @@
|
||||
load("@rules_cc//cc:defs.bzl", "cc_library")
|
||||
load("@pytorch//third_party:substitution.bzl", "template_rule")
|
||||
|
||||
licenses(["notice"]) # Apache 2.0
|
||||
|
||||
template_rule(
|
||||
name = "version_string",
|
||||
src = "@//:aten/src/ATen/cpu/tbb/extra/version_string.ver.in",
|
||||
out = "version_string.h",
|
||||
substitutions = {
|
||||
"@CMAKE_SYSTEM_NAME@": "Unknown",
|
||||
"@CMAKE_SYSTEM@": "Unknown",
|
||||
"@CMAKE_SYSTEM_VERSION@": "Unknown",
|
||||
"@CMAKE_CXX_COMPILER_ID@": "Unknown",
|
||||
"@_configure_date@": "Unknown",
|
||||
}
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tbb",
|
||||
srcs = [":version_string"] + glob(
|
||||
[
|
||||
"src/old/*.h",
|
||||
"src/rml/client/*.h",
|
||||
"src/rml/include/*.h",
|
||||
"src/rml/server/*.h",
|
||||
"src/tbb/*.h",
|
||||
"src/tbb/tools_api/*.h",
|
||||
"src/tbb/tools_api/legacy/*.h",
|
||||
"src/old/*.cpp",
|
||||
"src/tbb/*.cpp",
|
||||
],
|
||||
exclude = ["src/old/test_*.cpp"],
|
||||
) + ["src/rml/client/rml_tbb.cpp"],
|
||||
hdrs = glob(
|
||||
[
|
||||
"include/tbb/*",
|
||||
"include/tbb/compat/*",
|
||||
"include/tbb/internal/*",
|
||||
"include/tbb/machine/*",
|
||||
],
|
||||
exclude = ["include/tbb/scalable_allocator.h"],
|
||||
),
|
||||
copts = [
|
||||
"-Iexternal/tbb/src/rml/include",
|
||||
"-Iexternal/tbb/src",
|
||||
"-pthread",
|
||||
"-DDO_ITT_NOTIFY=1",
|
||||
"-DUSE_PTHREAD=1",
|
||||
"-D__TBB_BUILD=1",
|
||||
"-D__TBB_DYNAMIC_LOAD_ENABLED=0",
|
||||
"-D__TBB_SOURCE_DIRECTLY_INCLUDED=1",
|
||||
"-fno-sanitize=vptr",
|
||||
"-fno-sanitize=thread",
|
||||
],
|
||||
defines = [
|
||||
# TBB Cannot detect the standard library version when using clang with libstdc++.
|
||||
# See https://github.com/01org/tbb/issues/22
|
||||
"TBB_USE_GLIBCXX_VERSION=(_GLIBCXX_RELEASE*10000)",
|
||||
"TBB_PREVIEW_GLOBAL_CONTROL=1",
|
||||
"TBB_PREVIEW_LOCAL_OBSERVER=1",
|
||||
"__TBB_ALLOW_MUTABLE_FUNCTORS=1",
|
||||
],
|
||||
includes = [
|
||||
"include",
|
||||
"src/tbb/tools_api",
|
||||
],
|
||||
linkopts = [
|
||||
"-ldl",
|
||||
"-lpthread",
|
||||
"-lrt",
|
||||
],
|
||||
textual_hdrs = ["src/tbb/tools_api/ittnotify_static.c"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
34
third_party/tbb.patch
vendored
Normal file
34
third_party/tbb.patch
vendored
Normal file
@ -0,0 +1,34 @@
|
||||
diff --git a/src/rml/server/rml_server.cpp b/src/rml/server/rml_server.cpp
|
||||
index 2508465..1e22ad2 100644
|
||||
--- a/src/rml/server/rml_server.cpp
|
||||
+++ b/src/rml/server/rml_server.cpp
|
||||
@@ -3279,10 +3279,10 @@ extern "C" void __KMP_call_with_my_server_info( ::rml::server_info_callback_t cb
|
||||
/*
|
||||
* RML server info
|
||||
*/
|
||||
-#include "version_string.ver"
|
||||
+#include "version_string.h"
|
||||
|
||||
#ifndef __TBB_VERSION_STRINGS
|
||||
-#pragma message("Warning: version_string.ver isn't generated properly by version_info.sh script!")
|
||||
+#pragma message("Warning: version_string.h isn't generated properly by version_info.sh script!")
|
||||
#endif
|
||||
|
||||
// We use the build time as the RML server info. TBB is required to build RML, so we make it the same as the TBB build time.
|
||||
diff --git a/src/tbb/tbb_version.h b/src/tbb/tbb_version.h
|
||||
index dcaa55b..4981a8a 100644
|
||||
--- a/src/tbb/tbb_version.h
|
||||
+++ b/src/tbb/tbb_version.h
|
||||
@@ -25,10 +25,10 @@
|
||||
#ifndef ENDL
|
||||
#define ENDL "\n"
|
||||
#endif
|
||||
-#include "version_string.ver"
|
||||
+#include "version_string.h"
|
||||
|
||||
#ifndef __TBB_VERSION_STRINGS
|
||||
-#pragma message("Warning: version_string.ver isn't generated properly by version_info.sh script!")
|
||||
+#pragma message("Warning: version_string.h isn't generated properly by version_info.sh script!")
|
||||
// here is an example of macros value:
|
||||
#define __TBB_VERSION_STRINGS \
|
||||
"TBB: BUILD_HOST\tUnknown\n" \
|
@ -24,7 +24,7 @@ from torch.testing._internal.common_nn import (
|
||||
marginrankingloss_reference, multimarginloss_reference, multilabelmarginloss_reference,
|
||||
nllloss_reference, nlllossNd_reference, smoothl1loss_reference, softmarginloss_reference, get_reduction)
|
||||
from torch.testing._internal.common_utils import (
|
||||
freeze_rng_state, skipIfMps, GRADCHECK_NONDET_TOL, TEST_WITH_ROCM, IS_WINDOWS,
|
||||
freeze_rng_state, set_single_threaded_if_parallel_tbb, skipIfMps, GRADCHECK_NONDET_TOL, TEST_WITH_ROCM, IS_WINDOWS,
|
||||
skipIfTorchDynamo)
|
||||
from types import ModuleType
|
||||
from typing import List, Tuple, Type, Set, Dict
|
||||
@ -235,7 +235,7 @@ class ModuleInfo:
|
||||
self.is_lazy = issubclass(module_cls, torch.nn.modules.lazy.LazyModuleMixin)
|
||||
|
||||
def get_decorators(self, test_class, test_name, device, dtype, param_kwargs):
|
||||
result = []
|
||||
result = [set_single_threaded_if_parallel_tbb]
|
||||
for decorator in self.decorators:
|
||||
if isinstance(decorator, DecorateInfo):
|
||||
if decorator.is_active(test_class, test_name, device, dtype, param_kwargs):
|
||||
|
@ -39,6 +39,7 @@ from torch.testing._internal.common_device_type import tol, toleranceOverride
|
||||
from torch.testing._internal.common_methods_invocations import DecorateInfo
|
||||
from torch.testing._internal.common_utils import (
|
||||
_TestParametrizer,
|
||||
set_single_threaded_if_parallel_tbb,
|
||||
skipIfMps,
|
||||
skipIfTorchDynamo,
|
||||
TEST_WITH_TORCHDYNAMO,
|
||||
@ -160,7 +161,7 @@ class OptimizerInfo:
|
||||
self.supports_fused_on = supports_fused_on
|
||||
|
||||
def get_decorators(self, test_class, test_name, device, dtype, param_kwargs):
|
||||
result = []
|
||||
result = [set_single_threaded_if_parallel_tbb]
|
||||
for decorator in self.decorators:
|
||||
if isinstance(decorator, DecorateInfo):
|
||||
if decorator.is_active(
|
||||
|
@ -1494,6 +1494,8 @@ def disable_translation_validation_if_dynamic_shapes(fn):
|
||||
# See: https://github.com/pytorch/pytorch/pull/59402#issuecomment-858811135
|
||||
TestEnvironment.def_flag("TEST_CUDA_MEM_LEAK_CHECK", env_var="PYTORCH_TEST_CUDA_MEM_LEAK_CHECK")
|
||||
|
||||
# True if CI is running TBB-enabled Pytorch
|
||||
IS_TBB = "tbb" in os.getenv("BUILD_ENVIRONMENT", "")
|
||||
|
||||
# Dict of NumPy dtype -> torch dtype (when the correspondence exists)
|
||||
numpy_to_torch_dtype_dict = {
|
||||
@ -1870,6 +1872,19 @@ def skipIfNoSciPy(fn):
|
||||
fn(*args, **kwargs)
|
||||
return wrapper
|
||||
|
||||
|
||||
def skipIfTBB(message="This test makes TBB sad"):
|
||||
def dec_fn(fn):
|
||||
@wraps(fn)
|
||||
def wrapper(*args, **kwargs):
|
||||
if IS_TBB:
|
||||
raise unittest.SkipTest(message)
|
||||
else:
|
||||
fn(*args, **kwargs)
|
||||
return wrapper
|
||||
return dec_fn
|
||||
|
||||
|
||||
def skip_if_pytest(fn):
|
||||
@wraps(fn)
|
||||
def wrapped(*args, **kwargs):
|
||||
@ -4732,6 +4747,24 @@ dtype_abbrs = {
|
||||
}
|
||||
|
||||
|
||||
def set_single_threaded_if_parallel_tbb(fn):
|
||||
"""Set test to be single threaded for parallel tbb.
|
||||
|
||||
See https://github.com/pytorch/pytorch/issues/64571#issuecomment-914691883
|
||||
"""
|
||||
if not IS_TBB:
|
||||
return fn
|
||||
|
||||
@wraps(fn)
|
||||
def wrap_fn(*args, **kwargs):
|
||||
num_threads = torch.get_num_threads()
|
||||
torch.set_num_threads(1)
|
||||
try:
|
||||
return fn(*args, **kwargs)
|
||||
finally:
|
||||
torch.set_num_threads(num_threads)
|
||||
return wrap_fn
|
||||
|
||||
|
||||
@functools.lru_cache
|
||||
def get_cycles_per_ms() -> float:
|
||||
|
@ -1880,6 +1880,9 @@ def _prepare_ldflags(extra_ldflags, with_cuda, verbose, is_standalone):
|
||||
if not is_standalone:
|
||||
extra_ldflags.append('-ltorch_python')
|
||||
|
||||
if is_standalone and "TBB" in torch.__config__.parallel_info():
|
||||
extra_ldflags.append('-ltbb')
|
||||
|
||||
if is_standalone:
|
||||
extra_ldflags.append(f"-Wl,-rpath,{TORCH_LIB_PATH}")
|
||||
|
||||
|
Reference in New Issue
Block a user