Revert "[Submodule] Remove deprecated USE_TBB option and TBB submodule (#127051)"

This reverts commit 699db7988d84d163ebb6919f78885e4630182a7a.

Reverted https://github.com/pytorch/pytorch/pull/127051 on behalf of https://github.com/PaliC due to This PR needs to be synced using the import button as there is a bug in our diff train ([comment](https://github.com/pytorch/pytorch/pull/127051#issuecomment-2138496995))
This commit is contained in:
PyTorch MergeBot
2024-05-30 01:16:57 +00:00
parent 1abcac9dab
commit 67739d8c6f
34 changed files with 863 additions and 19 deletions

View File

@ -44,7 +44,10 @@ if [[ "$BUILD_ENVIRONMENT" == *cuda11* ]]; then
fi
fi
if [[ ${BUILD_ENVIRONMENT} == *"parallelnative"* ]]; then
if [[ ${BUILD_ENVIRONMENT} == *"paralleltbb"* ]]; then
export ATEN_THREADING=TBB
export USE_TBB=1
elif [[ ${BUILD_ENVIRONMENT} == *"parallelnative"* ]]; then
export ATEN_THREADING=NATIVE
fi

View File

@ -776,6 +776,7 @@ test_aten() {
${SUDO} ln -sf "$TORCH_LIB_DIR"/libmkldnn* "$TEST_BASE_DIR"
${SUDO} ln -sf "$TORCH_LIB_DIR"/libnccl* "$TEST_BASE_DIR"
${SUDO} ln -sf "$TORCH_LIB_DIR"/libtorch* "$TEST_BASE_DIR"
${SUDO} ln -sf "$TORCH_LIB_DIR"/libtbb* "$TEST_BASE_DIR"
ls "$TEST_BASE_DIR"
aten/tools/run_tests.sh "$TEST_BASE_DIR"
@ -800,6 +801,21 @@ test_without_numpy() {
popd
}
# pytorch extensions require including torch/extension.h which includes all.h
# which includes utils.h which includes Parallel.h.
# So you can call for instance parallel_for() from your extension,
# but the compilation will fail because of Parallel.h has only declarations
# and definitions are conditionally included Parallel.h(see last lines of Parallel.h).
# I tried to solve it #39612 and #39881 by including Config.h into Parallel.h
# But if Pytorch is built with TBB it provides Config.h
# that has AT_PARALLEL_NATIVE_TBB=1(see #3961 or #39881) and it means that if you include
# torch/extension.h which transitively includes Parallel.h
# which transitively includes tbb.h which is not available!
if [[ "${BUILD_ENVIRONMENT}" == *tbb* ]]; then
sudo mkdir -p /usr/include/tbb
sudo cp -r "$PWD"/third_party/tbb/include/tbb/* /usr/include/tbb
fi
test_libtorch() {
local SHARD="$1"
@ -813,6 +829,7 @@ test_libtorch() {
ln -sf "$TORCH_LIB_DIR"/libc10* "$TORCH_BIN_DIR"
ln -sf "$TORCH_LIB_DIR"/libshm* "$TORCH_BIN_DIR"
ln -sf "$TORCH_LIB_DIR"/libtorch* "$TORCH_BIN_DIR"
ln -sf "$TORCH_LIB_DIR"/libtbb* "$TORCH_BIN_DIR"
ln -sf "$TORCH_LIB_DIR"/libnvfuser* "$TORCH_BIN_DIR"
export CPP_TESTS_DIR="${TORCH_BIN_DIR}"
@ -949,6 +966,7 @@ test_rpc() {
# test reporting process to function as expected.
ln -sf "$TORCH_LIB_DIR"/libtorch* "$TORCH_BIN_DIR"
ln -sf "$TORCH_LIB_DIR"/libc10* "$TORCH_BIN_DIR"
ln -sf "$TORCH_LIB_DIR"/libtbb* "$TORCH_BIN_DIR"
CPP_TESTS_DIR="${TORCH_BIN_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_cpp_rpc
}

4
.gitmodules vendored
View File

@ -82,6 +82,10 @@
ignore = dirty
path = third_party/foxi
url = https://github.com/houseroad/foxi.git
[submodule "third_party/tbb"]
path = third_party/tbb
url = https://github.com/01org/tbb
branch = tbb_2018
[submodule "android/libs/fbjni"]
ignore = dirty
path = android/libs/fbjni

View File

@ -125,6 +125,10 @@ filegroup(
data = [":generate-code"],
)
exports_files(
srcs = ["aten/src/ATen/cpu/tbb/extra/version_string.ver.in"],
)
# ATen
filegroup(
name = "aten_base_cpp",
@ -271,6 +275,7 @@ header_template_rule(
"@AT_BUILD_WITH_LAPACK@": "1",
"@AT_PARALLEL_OPENMP@": "0",
"@AT_PARALLEL_NATIVE@": "1",
"@AT_PARALLEL_NATIVE_TBB@": "0",
"@AT_BLAS_F2C@": "0",
"@AT_BLAS_USE_CBLAS_DOT@": "1",
},
@ -354,9 +359,6 @@ cc_library(
":aten_src_ATen_config",
] + generated_cpu_cpp + aten_ufunc_generated_cpu_sources("aten/src/ATen/{}"),
copts = ATEN_COPTS,
linkopts = [
"-ldl",
],
data = if_cuda(
[":libcaffe2_nvrtc.so"],
[],
@ -770,9 +772,6 @@ cc_library(
],
)) + torch_sources,
copts = TORCH_COPTS,
linkopts = [
"-lrt",
],
defines = [
"CAFFE2_NIGHTLY_VERSION=20200115",
],
@ -792,9 +791,6 @@ cc_library(
cc_library(
name = "shm",
srcs = glob(["torch/lib/libshm/*.cpp"]),
linkopts = [
"-lrt",
],
deps = [
":torch",
],

View File

@ -362,6 +362,9 @@ cmake_dependent_option(
cmake_dependent_option(
USE_TENSORPIPE "Use TensorPipe. Only available if USE_DISTRIBUTED is on." ON
"USE_DISTRIBUTED" OFF)
option(USE_TBB "Use TBB (Deprecated)" OFF)
cmake_dependent_option(
USE_SYSTEM_TBB "Use system-provided Intel TBB." OFF "USE_TBB" OFF)
option(ONNX_ML "Enable traditional ONNX ML API." ON)
option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" OFF)
option(BUILD_LIBTORCH_CPU_WITH_DEBUG
@ -480,6 +483,9 @@ if(USE_SYSTEM_LIBS)
if(USE_NCCL)
set(USE_SYSTEM_NCCL ON)
endif()
if(USE_TBB)
set(USE_SYSTEM_TBB ON)
endif()
endif()
# Used when building Caffe2 through setup.py

View File

@ -168,6 +168,16 @@ new_local_repository(
path = "third_party/opentelemetry-cpp",
)
new_patched_local_repository(
name = "tbb",
build_file = "//third_party:tbb.BUILD",
patch_strip = 1,
patches = [
"@//third_party:tbb.patch",
],
path = "third_party/tbb",
)
new_local_repository(
name = "tensorpipe",
build_file = "//third_party:tensorpipe.BUILD",

View File

@ -349,6 +349,16 @@ endif()
list(APPEND ATen_CPU_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/..)
if(USE_TBB)
if(USE_SYSTEM_TBB)
message("ATen is compiled with system-provided Intel TBB.")
else()
message("ATen is compiled with Intel TBB (${TBB_ROOT_DIR}).")
endif()
list(APPEND ATen_CPU_INCLUDE ${TBB_INCLUDE_DIR})
list(APPEND ATen_CPU_DEPENDENCY_LIBS TBB::tbb)
endif()
if(BLAS_FOUND)
if($ENV{TH_BINARY_BUILD})
message(STATUS "TH_BINARY_BUILD detected. Enabling special linkage.")

View File

@ -17,5 +17,6 @@
#define AT_BUILD_WITH_LAPACK() @AT_BUILD_WITH_LAPACK@
#define AT_PARALLEL_OPENMP @AT_PARALLEL_OPENMP@
#define AT_PARALLEL_NATIVE @AT_PARALLEL_NATIVE@
#define AT_PARALLEL_NATIVE_TBB @AT_PARALLEL_NATIVE_TBB@
#define AT_BLAS_F2C() @AT_BLAS_F2C@
#define AT_BLAS_USE_CBLAS_DOT() @AT_BLAS_USE_CBLAS_DOT@

View File

@ -153,6 +153,8 @@ TORCH_API int intraop_default_num_threads();
#include <ATen/ParallelOpenMP.h> // IWYU pragma: keep
#elif AT_PARALLEL_NATIVE
#include <ATen/ParallelNative.h> // IWYU pragma: keep
#elif AT_PARALLEL_NATIVE_TBB
#include <ATen/ParallelNativeTBB.h> // IWYU pragma: keep
#endif
#include <ATen/Parallel-inl.h> // IWYU pragma: keep

View File

@ -80,6 +80,8 @@ std::string get_parallel_info() {
ss << "OpenMP";
#elif AT_PARALLEL_NATIVE
ss << "native thread pool";
#elif AT_PARALLEL_NATIVE_TBB
ss << "native thread pool and TBB";
#endif
#ifdef C10_MOBILE
ss << " [mobile]";

View File

@ -0,0 +1,115 @@
#include <ATen/Config.h>
#if AT_PARALLEL_NATIVE_TBB
#include <ATen/Parallel.h>
#include <ATen/ParallelFuture.h>
#include <ATen/PTThreadPool.h>
#include <atomic>
#include <mutex>
#include <tbb/tbb.h>
#define TBB_PREVIEW_GLOBAL_CONTROL 1
#include <tbb/global_control.h>
#ifdef _OPENMP
#include <omp.h>
#endif
#if AT_MKL_ENABLED()
#include <mkl.h>
#endif
namespace at {
namespace {
static thread_local tbb::task_group tg_;
thread_local int this_thread_id{0};
std::mutex global_thread_mutex_;
std::shared_ptr<tbb::global_control> global_thread_limit_ = nullptr;
std::atomic<int> num_intraop_threads_{-1};
void _internal_set_num_threads(int nthreads) {
TORCH_INTERNAL_ASSERT(nthreads > 0);
{
std::unique_lock<std::mutex> lk(global_thread_mutex_);
// This is an antipattern and we shouldn't be constraining the number of
// threads in library code.
// TODO: Think of a smarter way to leverage tbb::thread_arena to limit the
// number of slots instead of the number of threads.
global_thread_limit_ = std::make_shared<tbb::global_control>(
tbb::global_control::max_allowed_parallelism, nthreads);
num_intraop_threads_.store(nthreads);
}
}
}
void init_num_threads() {
#ifdef _OPENMP
omp_set_num_threads(1);
#endif
#if AT_MKL_ENABLED()
mkl_set_num_threads(1);
#endif
int nthreads = num_intraop_threads_.load();
if (nthreads < 0) {
nthreads = intraop_default_num_threads();
}
_internal_set_num_threads(nthreads);
}
void set_num_threads(int nthreads) {
TORCH_CHECK(nthreads > 0);
_internal_set_num_threads(nthreads);
}
int get_num_threads() {
at::internal::lazy_init_num_threads();
return tbb::global_control::active_value(
tbb::global_control::max_allowed_parallelism);
}
int get_thread_num() {
return this_thread_id;
}
namespace internal {
void set_thread_num(int id) {
this_thread_id = id;
}
}
bool in_parallel_region() {
return tbb::this_task_arena::current_thread_index() >= 0;
}
void intraop_launch(std::function<void()> func) {
if (get_num_threads() > 1) {
tg_.run(func);
} else {
func();
}
}
c10::intrusive_ptr<c10::ivalue::Future> intraop_launch_future(
std::function<void()> func) {
auto future = c10::make_intrusive<c10::ivalue::Future>(NoneType::get());
if (get_num_threads() > 1) {
tg_.run(
[func, future]() {
func();
future->markCompleted();
}
);
} else {
func();
future->markCompleted();
}
return future;
}
} // namespace at
#endif

View File

@ -0,0 +1,52 @@
#pragma once
#include <atomic>
#include <cstddef>
#include <exception>
#include <c10/util/Exception.h>
#ifdef _WIN32
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#endif
#include <tbb/tbb.h>
#define INTRA_OP_PARALLEL
namespace at::internal {
template <typename F>
inline void invoke_parallel(
const int64_t begin,
const int64_t end,
const int64_t grain_size,
const F& f) {
// Choose number of tasks based on grain size and number of threads.
int64_t chunk_size = divup((end - begin), get_num_threads());
// Make sure each task is at least grain_size size.
chunk_size = std::max(grain_size, chunk_size);
std::atomic_flag err_flag = ATOMIC_FLAG_INIT;
std::exception_ptr eptr;
tbb::parallel_for(
tbb::blocked_range<int64_t>(begin, end, chunk_size),
[&eptr, &err_flag, f](const tbb::blocked_range<int64_t>& r) {
try {
internal::ThreadIdGuard tid_guard(
tbb::this_task_arena::current_thread_index());
f(r.begin(), r.end());
} catch (...) {
if (!err_flag.test_and_set()) {
eptr = std::current_exception();
}
}
},
tbb::static_partitioner{});
if (eptr) {
std::rethrow_exception(eptr);
}
}
} // namespace at::internal

View File

@ -1,5 +1,5 @@
#include <ATen/Config.h>
#if AT_PARALLEL_OPENMP || AT_PARALLEL_NATIVE
#if AT_PARALLEL_OPENMP || AT_PARALLEL_NATIVE || AT_PARALLEL_NATIVE_TBB
#include <ATen/Parallel.h>
#include <ATen/PTThreadPool.h>
#include <ATen/ThreadLocalState.h>

View File

@ -0,0 +1,391 @@
# Based on https://github.com/wjakob/tbb/blob/master/CMakeLists.txt
# All credit goes to Wenzel Jakob!
cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR)
project(tbb CXX)
include(CheckCXXCompilerFlag)
include(CheckCXXSourceRuns)
if(POLICY CMP0058)
cmake_policy(SET CMP0058 NEW)
endif()
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
message(STATUS "Setting build type to 'Release' as none was specified.")
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release"
"MinSizeRel" "RelWithDebInfo")
endif()
if(NOT TBB_ROOT_DIR)
set(TBB_ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
endif()
if(NOT TBB_INSTALL_EXPORT_NAME)
set(TBB_INSTALL_EXPORT_NAME "Caffe2Targets")
endif()
if(NOT TBB_INSTALL_EXPORT_DESTINATION)
set(TBB_INSTALL_EXPORT_DESTINATION lib)
endif()
if(NOT TBB_INSTALL_RUNTIME_DIR)
set(TBB_INSTALL_RUNTIME_DIR bin)
endif()
if(NOT TBB_INSTALL_LIBRARY_DIR)
set(TBB_INSTALL_LIBRARY_DIR lib)
endif()
if(NOT TBB_INSTALL_ARCHIVE_DIR)
set(TBB_INSTALL_ARCHIVE_DIR lib)
endif()
if(NOT TBB_INSTALL_INCLUDE_DIR)
set(TBB_INSTALL_INCLUDE_DIR "${TBB_ROOT_DIR}/include")
endif()
set(TBB_INCLUDES
"${TBB_ROOT_DIR}/include"
"${TBB_ROOT_DIR}/src"
"${TBB_ROOT_DIR}/src/rml/include"
${CMAKE_CURRENT_BINARY_DIR})
option(TBB_BUILD_SHARED "Build TBB shared library" ON)
option(TBB_BUILD_STATIC "Build TBB static library" ON)
option(TBB_BUILD_TBBMALLOC "Build TBB malloc library" ON)
option(TBB_BUILD_TBBMALLOC_PROXY "Build TBB malloc proxy library" ON)
option(TBB_BUILD_TESTS "Build TBB tests and enable testing infrastructure" ON)
option(TBB_CI_BUILD "Is this a continuous integration build?" OFF)
if(APPLE)
set(CMAKE_MACOSX_RPATH ON)
endif()
file(GLOB tbb_src "${TBB_ROOT_DIR}/src/tbb/*.cpp" "${TBB_ROOT_DIR}/src/old/*.cpp")
list(APPEND tbb_src ${TBB_ROOT_DIR}/src/rml/client/rml_tbb.cpp)
file(GLOB to_remove "${TBB_ROOT_DIR}/src/old/test*.cpp")
if(NOT "${to_remove}" STREQUAL "")
list(REMOVE_ITEM tbb_src ${to_remove})
endif()
set(tbbmalloc_static_src
src/tbbmalloc/backend.cpp
src/tbbmalloc/large_objects.cpp
src/tbbmalloc/backref.cpp
src/tbbmalloc/tbbmalloc.cpp
src/tbbmalloc/frontend.cpp
src/tbb/itt_notify.cpp)
set(tbbmalloc_src ${tbbmalloc_static_src})
set(tbbmalloc_proxy_src
src/tbbmalloc/proxy.cpp
src/tbbmalloc/tbb_function_replacement.cpp)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(i386|x86_64)")
if(NOT APPLE AND NOT MINGW)
add_definitions(-DDO_ITT_NOTIFY)
endif()
endif()
if(APPLE)
# Disable annoying "has no symbols" warnings
set(CMAKE_C_ARCHIVE_CREATE "<CMAKE_AR> Scr <TARGET> <LINK_FLAGS> <OBJECTS>")
set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> Scr <TARGET> <LINK_FLAGS> <OBJECTS>")
set(CMAKE_C_ARCHIVE_FINISH "<CMAKE_RANLIB> -no_warning_for_no_symbols -c <TARGET>")
set(CMAKE_CXX_ARCHIVE_FINISH "<CMAKE_RANLIB> -no_warning_for_no_symbols -c <TARGET>")
endif()
macro(CHECK_CXX_COMPILER_AND_LINKER_FLAGS _RESULT _CXX_FLAGS _LINKER_FLAGS)
set(CMAKE_REQUIRED_FLAGS ${_CXX_FLAGS})
set(CMAKE_REQUIRED_LIBRARIES ${_LINKER_FLAGS})
set(CMAKE_REQUIRED_QUIET TRUE)
check_cxx_source_runs("#include <iostream>\nint main(int argc, char **argv) { std::cout << \"test\"; return 0; }" ${_RESULT})
set(CMAKE_REQUIRED_FLAGS "")
set(CMAKE_REQUIRED_LIBRARIES "")
endmacro()
# Prefer libc++ in conjunction with Clang
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
if(CMAKE_CXX_FLAGS MATCHES "-stdlib=libc\\+\\+")
message(STATUS "TBB: using libc++.")
else()
CHECK_CXX_COMPILER_AND_LINKER_FLAGS(HAS_LIBCPP "-stdlib=libc++" "-stdlib=libc++")
if(HAS_LIBCPP)
string(APPEND CMAKE_CXX_FLAGS " -stdlib=libc++ -D_LIBCPP_VERSION")
string(APPEND CMAKE_EXE_LINKER_FLAGS " -stdlib=libc++")
string(APPEND CMAKE_SHARED_LINKER_FLAGS " -stdlib=libc++")
message(STATUS "TBB: using libc++.")
else()
message(STATUS "TBB: NOT using libc++.")
endif()
endif()
endif()
if(UNIX)
add_definitions(-DUSE_PTHREAD)
check_cxx_compiler_flag("-std=c++17" SUPPORTS_STDCXX17)
if(SUPPORTS_STDCXX17)
set(CMAKE_CXX_FLAGS "-std=c++17 ${CMAKE_CXX_FLAGS}")
endif()
check_cxx_compiler_flag("-mrtm -Werror" SUPPORTS_MRTM)
if(SUPPORTS_MRTM)
set(CMAKE_CXX_FLAGS "-mrtm ${CMAKE_CXX_FLAGS}")
endif()
elseif(WIN32)
if(MSVC)
cmake_minimum_required(VERSION 3.1)
enable_language(ASM_MASM)
set(CMAKE_CXX_FLAGS "/GS- /Zc:wchar_t /Zc:forScope /DUSE_WINTHREAD ${CMAKE_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "/D_CRT_SECURE_NO_DEPRECATE /D_WIN32_WINNT=0x0600 ${CMAKE_CXX_FLAGS}")
check_cxx_compiler_flag("/volatile:iso" SUPPORTS_VOLATILE_FLAG)
if(SUPPORTS_VOLATILE_FLAG)
set(CMAKE_CXX_FLAGS "/volatile:iso ${CMAKE_CXX_FLAGS}")
endif()
set(CMAKE_CXX_FLAGS "/wd4267 /wd4800 /wd4146 /wd4244 /wd4577 /wd4018 ${CMAKE_CXX_FLAGS}")
if(NOT CMAKE_SIZEOF_VOID_P)
message(FATAL_ERROR "'CMAKE_SIZEOF_VOID_P' is undefined. Please delete your build directory and rerun CMake again!")
endif()
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
list(APPEND tbb_src "${TBB_ROOT_DIR}/src/tbb/intel64-masm/atomic_support.asm")
list(APPEND tbb_src "${TBB_ROOT_DIR}/src/tbb/intel64-masm/itsx.asm")
list(APPEND tbb_src "${TBB_ROOT_DIR}/src/tbb/intel64-masm/intel64_misc.asm")
list(APPEND tbbmalloc_src "${TBB_ROOT_DIR}/src/tbb/intel64-masm/atomic_support.asm")
set(CMAKE_ASM_MASM_FLAGS "/DEM64T=1 ${CMAKE_ASM_MASM_FLAGS}")
else()
list(APPEND tbb_src "${TBB_ROOT_DIR}/src/tbb/ia32-masm/atomic_support.asm"
"${TBB_ROOT_DIR}/src/tbb/ia32-masm/itsx.asm src/tbb/ia32-masm/lock_byte.asm")
# Enable SAFESEH feature for assembly (x86 builds only).
set(CMAKE_ASM_MASM_FLAGS "/safeseh ${CMAKE_ASM_MASM_FLAGS}")
endif()
elseif(MINGW)
add_definitions(-DUSE_WINTHREAD)
add_definitions(-D_WIN32_WINNT=0x0502)
set(CMAKE_CXX_FLAGS "-mthreads ${CMAKE_CXX_FLAGS}")
endif()
endif()
if(MSVC)
set(ENABLE_RTTI "/EHsc /GR ")
set(DISABLE_RTTI "/EHs- /GR- ")
elseif(UNIX)
set(ENABLE_RTTI "-frtti -fexceptions ")
set(DISABLE_RTTI "-fno-rtti -fno-exceptions ")
endif()
##--------
# - Added TBB_USE_GLIBCXX_VERSION macro to specify the version of GNU
# libstdc++ when it cannot be properly recognized, e.g. when used
# with Clang on Linux* OS. Inspired by a contribution from David A.
if(NOT TBB_USE_GLIBCXX_VERSION AND UNIX AND NOT APPLE)
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
# using Clang
string(REPLACE "." "0" TBB_USE_GLIBCXX_VERSION ${CMAKE_CXX_COMPILER_VERSION})
endif()
endif()
if(TBB_USE_GLIBCXX_VERSION)
add_definitions(-DTBB_USE_GLIBCXX_VERSION=${TBB_USE_GLIBCXX_VERSION})
endif()
##-------
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
check_cxx_compiler_flag("-flifetime-dse=1" SUPPORTS_FLIFETIME)
if(SUPPORTS_FLIFETIME)
add_definitions(-flifetime-dse=1)
endif()
endif()
# Linker export definitions
if(APPLE)
set(ARCH_PREFIX "mac")
elseif(WIN32)
set(ARCH_PREFIX "win")
else()
set(ARCH_PREFIX "lin")
endif()
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
set(ARCH_PREFIX "${ARCH_PREFIX}64")
else()
set(ARCH_PREFIX "${ARCH_PREFIX}32")
endif()
if(MINGW)
set(ARCH_PREFIX "${ARCH_PREFIX}-gcc")
# there's no win32-gcc-tbb-export.def, use lin32-tbb-export.def
execute_process(COMMAND ${CMAKE_COMMAND} -E copy ${TBB_ROOT_DIR}/src/tbb/lin32-tbb-export.def ${TBB_ROOT_DIR}/src/tbb/win32-gcc-tbb-export.def)
endif()
if(MSVC)
add_custom_command(OUTPUT tbb.def
COMMAND ${CMAKE_CXX_COMPILER} /TC /EP ${TBB_ROOT_DIR}/src/tbb/${ARCH_PREFIX}-tbb-export.def -I ${TBB_ROOT_DIR}/include > tbb.def
MAIN_DEPENDENCY ${TBB_ROOT_DIR}/src/tbb/${ARCH_PREFIX}-tbb-export.def
COMMENT "Preprocessing tbb.def"
)
add_custom_command(OUTPUT tbbmalloc.def
COMMAND ${CMAKE_CXX_COMPILER} /TC /EP ${TBB_ROOT_DIR}/src/tbbmalloc/${ARCH_PREFIX}-tbbmalloc-export.def -I ${TBB_ROOT_DIR}/include > tbbmalloc.def
MAIN_DEPENDENCY ${TBB_ROOT_DIR}/src/tbbmalloc/${ARCH_PREFIX}-tbbmalloc-export.def
COMMENT "Preprocessing tbbmalloc.def"
)
else()
add_custom_command(OUTPUT tbb.def
COMMAND ${CMAKE_CXX_COMPILER} -xc++ -E ${TBB_ROOT_DIR}/src/tbb/${ARCH_PREFIX}-tbb-export.def -I ${TBB_ROOT_DIR}/include -o tbb.def
MAIN_DEPENDENCY ${TBB_ROOT_DIR}/src/tbb/${ARCH_PREFIX}-tbb-export.def
COMMENT "Preprocessing tbb.def"
)
add_custom_command(OUTPUT tbbmalloc.def
COMMAND ${CMAKE_CXX_COMPILER} -xc++ -E ${TBB_ROOT_DIR}/src/tbbmalloc/${ARCH_PREFIX}-tbbmalloc-export.def -I ${TBB_ROOT_DIR}/include -o tbbmalloc.def
MAIN_DEPENDENCY ${TBB_ROOT_DIR}/src/tbbmalloc/${ARCH_PREFIX}-tbbmalloc-export.def
COMMENT "Preprocessing tbbmalloc.def"
)
endif()
add_custom_target(tbb_def_files DEPENDS tbb.def tbbmalloc.def)
# TBB library
if(TBB_BUILD_STATIC)
add_library(tbb_static STATIC ${tbb_src})
target_include_directories(tbb_static PRIVATE ${TBB_INCLUDES})
set_property(TARGET tbb_static APPEND PROPERTY COMPILE_DEFINITIONS "__TBB_BUILD=1")
set_property(TARGET tbb_static APPEND_STRING PROPERTY COMPILE_FLAGS ${ENABLE_RTTI})
install(TARGETS tbb_static
EXPORT ${TBB_INSTALL_EXPORT_NAME} DESTINATION ${TBB_INSTALL_EXPORT_DESTINATION}
ARCHIVE DESTINATION ${TBB_INSTALL_ARCHIVE_DIR})
if(MSVC)
target_compile_definitions(tbb_static PUBLIC __TBB_NO_IMPLICIT_LINKAGE=1)
endif()
if(UNIX AND NOT APPLE)
target_link_libraries(tbb_static PUBLIC pthread dl)
endif()
endif()
if(TBB_BUILD_SHARED)
add_library(tbb SHARED ${tbb_src})
target_include_directories(tbb PRIVATE ${TBB_INCLUDES})
set_property(TARGET tbb APPEND PROPERTY COMPILE_DEFINITIONS "__TBB_BUILD=1")
set_property(TARGET tbb APPEND_STRING PROPERTY COMPILE_FLAGS ${ENABLE_RTTI})
add_dependencies(tbb tbb_def_files)
if(APPLE)
set_property(TARGET tbb APPEND PROPERTY LINK_FLAGS "-Wl,-exported_symbols_list,\"${CMAKE_CURRENT_BINARY_DIR}/tbb.def\"")
elseif(MSVC)
set_property(TARGET tbb APPEND PROPERTY LINK_FLAGS "/DEF:\"${CMAKE_CURRENT_BINARY_DIR}/tbb.def\"")
else()
set_property(TARGET tbb APPEND PROPERTY LINK_FLAGS "-Wl,-version-script,\"${CMAKE_CURRENT_BINARY_DIR}/tbb.def\"")
endif()
install(TARGETS tbb
EXPORT ${TBB_INSTALL_EXPORT_NAME} DESTINATION ${TBB_INSTALL_EXPORT_DESTINATION}
LIBRARY DESTINATION ${TBB_INSTALL_LIBRARY_DIR}
ARCHIVE DESTINATION ${TBB_INSTALL_ARCHIVE_DIR}
RUNTIME DESTINATION ${TBB_INSTALL_RUNTIME_DIR})
if(UNIX AND NOT APPLE)
target_link_libraries(tbb PUBLIC pthread dl)
endif()
if(MSVC)
target_compile_definitions(tbb PUBLIC __TBB_NO_IMPLICIT_LINKAGE=1)
endif()
endif()
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
# Quench a warning on GCC
set_source_files_properties(${TBB_ROOT_DIR}/src/tbb/governor.cpp COMPILE_FLAGS "-Wno-missing-field-initializers ")
elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
# Quench a warning on Clang
set_source_files_properties(${TBB_ROOT_DIR}/src/tbb/itt_notify.cpp COMPILE_FLAGS "-Wno-varargs ")
elseif(MSVC)
# Quench a warning on MSVC
set_source_files_properties(${TBB_ROOT_DIR}/src/tbb/scheduler.cpp COMPILE_FLAGS "/wd4458 ")
endif()
if(TBB_BUILD_TBBMALLOC)
# TBB malloc library
if(TBB_BUILD_STATIC)
add_library(tbbmalloc_static STATIC ${tbbmalloc_static_src})
target_include_directories(tbbmalloc_static PRIVATE ${TBB_INCLUDES})
set_property(TARGET tbbmalloc_static APPEND PROPERTY COMPILE_DEFINITIONS "__TBBMALLOC_BUILD=1")
set_property(TARGET tbbmalloc_static APPEND_STRING PROPERTY COMPILE_FLAGS ${DISABLE_RTTI})
if(MSVC)
target_compile_definitions(tbbmalloc_static PUBLIC __TBB_NO_IMPLICIT_LINKAGE=1 __TBBMALLOC_NO_IMPLICIT_LINKAGE=1)
endif()
install(TARGETS tbbmalloc_static
EXPORT ${TBB_INSTALL_EXPORT_NAME} DESTINATION ${TBB_INSTALL_EXPORT_DESTINATION}
ARCHIVE DESTINATION ${TBB_INSTALL_ARCHIVE_DIR})
endif()
if(TBB_BUILD_SHARED)
add_library(tbbmalloc SHARED ${tbbmalloc_src})
target_include_directories(tbbmalloc PRIVATE ${TBB_INCLUDES})
set_property(TARGET tbbmalloc APPEND PROPERTY COMPILE_DEFINITIONS "__TBBMALLOC_BUILD=1")
set_property(TARGET tbbmalloc APPEND_STRING PROPERTY COMPILE_FLAGS ${DISABLE_RTTI})
add_dependencies(tbbmalloc tbb_def_files)
if(APPLE)
set_property(TARGET tbbmalloc APPEND PROPERTY LINK_FLAGS "-Wl,-exported_symbols_list,\"${CMAKE_CURRENT_BINARY_DIR}/tbbmalloc.def\"")
elseif(MSVC)
set_property(TARGET tbbmalloc APPEND PROPERTY LINK_FLAGS "/DEF:\"${CMAKE_CURRENT_BINARY_DIR}/tbbmalloc.def\"")
else()
set_property(TARGET tbbmalloc APPEND PROPERTY LINK_FLAGS "-Wl,-version-script,\"${CMAKE_CURRENT_BINARY_DIR}/tbbmalloc.def\"")
endif()
if(MSVC)
target_compile_definitions(tbbmalloc PUBLIC __TBB_NO_IMPLICIT_LINKAGE=1 __TBBMALLOC_NO_IMPLICIT_LINKAGE=1)
endif()
install(TARGETS tbbmalloc
EXPORT ${TBB_INSTALL_EXPORT_NAME} DESTINATION ${TBB_INSTALL_EXPORT_DESTINATION}
LIBRARY DESTINATION ${TBB_INSTALL_LIBRARY_DIR}
ARCHIVE DESTINATION ${TBB_INSTALL_ARCHIVE_DIR}
RUNTIME DESTINATION ${TBB_INSTALL_RUNTIME_DIR})
if(UNIX AND NOT APPLE)
target_link_libraries(tbbmalloc PUBLIC pthread dl)
endif()
endif()
endif()
if(TBB_BUILD_TBBMALLOC_PROXY)
# TBB malloc proxy library
if(TBB_BUILD_STATIC)
add_library(tbbmalloc_proxy_static STATIC ${tbbmalloc_proxy_src})
set_property(TARGET tbbmalloc_proxy_static APPEND PROPERTY COMPILE_DEFINITIONS "__TBBMALLOC_BUILD=1")
set_property(TARGET tbbmalloc_proxy_static APPEND_STRING PROPERTY COMPILE_FLAGS ${DISABLE_RTTI})
install(TARGETS tbbmalloc_proxy_static
EXPORT ${TBB_INSTALL_EXPORT_NAME} DESTINATION ${TBB_INSTALL_EXPORT_DESTINATION}
ARCHIVE DESTINATION ${TBB_INSTALL_ARCHIVE_DIR})
endif()
if(TBB_BUILD_SHARED)
add_library(tbbmalloc_proxy SHARED ${tbbmalloc_proxy_src})
set_property(TARGET tbbmalloc_proxy APPEND PROPERTY COMPILE_DEFINITIONS "__TBBMALLOC_BUILD=1")
set_property(TARGET tbbmalloc_proxy APPEND_STRING PROPERTY COMPILE_FLAGS ${DISABLE_RTTI})
target_link_libraries(tbbmalloc_proxy PUBLIC tbbmalloc)
install(TARGETS tbbmalloc_proxy
EXPORT ${TBB_INSTALL_EXPORT_NAME} DESTINATION ${TBB_INSTALL_EXPORT_DESTINATION}
LIBRARY DESTINATION ${TBB_INSTALL_LIBRARY_DIR}
ARCHIVE DESTINATION ${TBB_INSTALL_ARCHIVE_DIR}
RUNTIME DESTINATION ${TBB_INSTALL_RUNTIME_DIR})
if(UNIX AND NOT APPLE)
target_link_libraries(tbbmalloc_proxy PUBLIC pthread dl)
endif()
endif()
endif()
install(DIRECTORY "${TBB_ROOT_DIR}/include/tbb" DESTINATION ${TBB_INSTALL_INCLUDE_DIR})
# version_string.ver
if(UNIX)
execute_process(COMMAND date "+%a, %d %b %Y %H:%M:%S %z"
OUTPUT_VARIABLE _configure_date
OUTPUT_STRIP_TRAILING_WHITESPACE)
elseif(WIN32)
execute_process(COMMAND cmd " /C date /T"
OUTPUT_VARIABLE _configure_date
OUTPUT_STRIP_TRAILING_WHITESPACE)
else()
set(_configure_date "Unknown")
endif()
include_directories(${CMAKE_BINARY_DIR})
configure_file(extra/version_string.ver.in version_string.ver @ONLY)

View File

@ -0,0 +1,11 @@
#define __TBB_VERSION_STRINGS(N) \
#N": BUILD_HOST @CMAKE_SYSTEM_NAME@" ENDL \
#N": BUILD_OS @CMAKE_SYSTEM@" ENDL \
#N": BUILD_KERNEL @CMAKE_SYSTEM_VERSION@" ENDL \
#N": BUILD_GCC @CMAKE_CXX_COMPILER_ID@" ENDL \
#N": BUILD_LIBC Unknown" ENDL \
#N": BUILD_LD Unknown" ENDL \
#N": BUILD_TARGET Unknown" ENDL \
#N": BUILD_COMMAND Unknown" ENDL
#define __TBB_DATETIME "@_configure_date@"

View File

@ -261,6 +261,7 @@ def get_aten_preprocessor_flags():
"-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION",
"-DAT_PARALLEL_OPENMP_FBXPLAT=0",
"-DAT_PARALLEL_NATIVE_FBXPLAT=1",
"-DAT_PARALLEL_NATIVE_TBB_FBXPLAT=0",
"-DUSE_LAPACK_FBXPLAT=0",
"-DAT_BLAS_F2C_FBXPLAT=0",
"-DAT_BLAS_USE_CBLAS_DOT_FBXPLAT=0",
@ -1111,6 +1112,9 @@ def define_buck_targets(
"@AT_PARALLEL_NATIVE@",
"AT_PARALLEL_NATIVE_FBXPLAT",
"--replace",
"@AT_PARALLEL_NATIVE_TBB@",
"AT_PARALLEL_NATIVE_TBB_FBXPLAT",
"--replace",
"@AT_BUILD_WITH_LAPACK@",
"USE_LAPACK_FBXPLAT",
"--replace",

View File

@ -999,6 +999,7 @@ aten_cpu_source_non_codegen_list = [
"aten/src/ATen/NestedTensorImpl.cpp",
"aten/src/ATen/ParallelCommon.cpp",
"aten/src/ATen/ParallelNative.cpp",
"aten/src/ATen/ParallelNativeTBB.cpp",
"aten/src/ATen/ParallelOpenMP.cpp",
"aten/src/ATen/ParallelThreadPoolNative.cpp",
"aten/src/ATen/PythonTorchFunctionTLS.cpp",

View File

@ -16,11 +16,14 @@ endif()
# ATen parallelism settings
# OMP - OpenMP for intra-op, native thread pool for inter-op parallelism
# NATIVE - using native thread pool for intra- and inter-op parallelism
# TBB - using TBB for intra- and native thread pool for inter-op parallelism
if(INTERN_BUILD_MOBILE)
set(ATEN_THREADING "NATIVE" CACHE STRING "ATen parallel backend")
else()
if(USE_OPENMP)
set(ATEN_THREADING "OMP" CACHE STRING "ATen parallel backend")
elseif(USE_TBB)
set(ATEN_THREADING "TBB" CACHE STRING "ATen parallel backend")
else()
set(ATEN_THREADING "NATIVE" CACHE STRING "ATen parallel backend")
endif()
@ -28,12 +31,19 @@ endif()
set(AT_PARALLEL_OPENMP 0)
set(AT_PARALLEL_NATIVE 0)
set(AT_PARALLEL_NATIVE_TBB 0)
message(STATUS "Using ATen parallel backend: ${ATEN_THREADING}")
if("${ATEN_THREADING}" STREQUAL "OMP")
set(AT_PARALLEL_OPENMP 1)
elseif("${ATEN_THREADING}" STREQUAL "NATIVE")
set(AT_PARALLEL_NATIVE 1)
elseif("${ATEN_THREADING}" STREQUAL "TBB")
if(NOT USE_TBB)
message(FATAL_ERROR "Using TBB backend but USE_TBB is off")
endif()
message(WARNING "ATEN TBB Threading is deprectated.")
set(AT_PARALLEL_NATIVE_TBB 1)
else()
message(FATAL_ERROR "Unknown ATen parallel backend: ${ATEN_THREADING}")
endif()
@ -1213,6 +1223,11 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU"
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/../aten/src/ATen/native/quantized/qlinear_unpack.cpp PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations)
endif()
if(USE_TBB)
list(APPEND ATen_CPU_INCLUDE ${TBB_INCLUDE_DIR})
target_link_libraries(torch_cpu PUBLIC TBB::tbb)
endif()
target_include_directories(torch_cpu PRIVATE ${ATen_CPU_INCLUDE})
target_include_directories(torch_cpu PRIVATE
@ -1690,6 +1705,10 @@ if(BUILD_SHARED_LIBS)
target_link_libraries(torch_global_deps ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS})
target_link_libraries(torch_global_deps torch::cudart torch::nvtoolsext)
endif()
if(USE_TBB)
target_link_libraries(torch_global_deps TBB::tbb)
endif()
install(TARGETS torch_global_deps DESTINATION "${TORCH_INSTALL_LIB_DIR}")
endif()

View File

@ -134,6 +134,35 @@ else()
"Cannot find threading library. PyTorch requires Threads to compile.")
endif()
if(USE_TBB)
if(USE_SYSTEM_TBB)
find_package(TBB 2018.0 REQUIRED CONFIG COMPONENTS tbb)
get_target_property(TBB_INCLUDE_DIR TBB::tbb INTERFACE_INCLUDE_DIRECTORIES)
else()
message(STATUS "Compiling TBB from source")
# Unset our restrictive C++ flags here and reset them later.
# Remove this once we use proper target_compile_options.
set(OLD_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
set(CMAKE_CXX_FLAGS)
set(TBB_ROOT_DIR "${PROJECT_SOURCE_DIR}/third_party/tbb")
set(TBB_BUILD_STATIC OFF CACHE BOOL " " FORCE)
set(TBB_BUILD_SHARED ON CACHE BOOL " " FORCE)
set(TBB_BUILD_TBBMALLOC OFF CACHE BOOL " " FORCE)
set(TBB_BUILD_TBBMALLOC_PROXY OFF CACHE BOOL " " FORCE)
set(TBB_BUILD_TESTS OFF CACHE BOOL " " FORCE)
add_subdirectory(${PROJECT_SOURCE_DIR}/aten/src/ATen/cpu/tbb)
set_property(TARGET tbb tbb_def_files PROPERTY FOLDER "dependencies")
set(CMAKE_CXX_FLAGS ${OLD_CMAKE_CXX_FLAGS})
set(TBB_INCLUDE_DIR "${TBB_ROOT_DIR}/include")
add_library(TBB::tbb ALIAS tbb)
endif()
endif()
# ---[ protobuf
if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO)
if(USE_LITE_PROTO)

View File

@ -71,8 +71,8 @@ IF (NOT "${MKL_THREADING}" STREQUAL "SEQ" AND
MESSAGE(FATAL_ERROR "Invalid MKL_THREADING (${MKL_THREADING}), should be one of: SEQ, TBB, OMP")
ENDIF()
IF ("${MKL_THREADING}" STREQUAL "TBB" AND NOT TARGET TBB::tbb)
MESSAGE(FATAL_ERROR "MKL_THREADING is TBB but TBB is not found")
IF ("${MKL_THREADING}" STREQUAL "TBB" AND NOT USE_TBB)
MESSAGE(FATAL_ERROR "MKL_THREADING is TBB but USE_TBB is turned off")
ENDIF()
MESSAGE(STATUS "MKL_THREADING = ${MKL_THREADING}")

View File

@ -101,7 +101,7 @@ IF(NOT MKLDNN_FOUND)
IF(NOT MKLDNN_CPU_RUNTIME)
SET(MKLDNN_CPU_RUNTIME "OMP" CACHE STRING "")
ELSEIF(MKLDNN_CPU_RUNTIME STREQUAL "TBB")
IF(TARGET TBB::tbb)
IF(USE_TBB)
MESSAGE(STATUS "MKL-DNN is using TBB")
SET(TBB_cmake_included TRUE)

View File

@ -151,6 +151,10 @@ function(caffe2_print_configuration_summary)
message(STATUS " USE_OBSERVERS : ${USE_OBSERVERS}")
message(STATUS " USE_OPENCL : ${USE_OPENCL}")
message(STATUS " USE_OPENMP : ${USE_OPENMP}")
message(STATUS " USE_TBB : ${USE_TBB}")
if(${USE_TBB})
message(STATUS " USE_SYSTEM_TBB : ${USE_SYSTEM_TBB}")
endif()
message(STATUS " USE_MIMALLOC : ${USE_MIMALLOC}")
message(STATUS " USE_VULKAN : ${USE_VULKAN}")
if(${USE_VULKAN})

View File

@ -317,6 +317,9 @@ function(caffe2_binary_target target_name_or_src)
if(DEFINED Caffe2_MODULES)
target_link_libraries(${__target} ${Caffe2_MODULES})
endif()
if(USE_TBB AND NOT USE_SYSTEM_TBB)
target_include_directories(${__target} PUBLIC ${TBB_INCLUDE_DIR})
endif()
install(TARGETS ${__target} DESTINATION bin)
endfunction()

View File

@ -64,6 +64,8 @@ def get_cpu_parallel_backend_flags():
defs = []
if parallel_backend == "openmp":
defs.append("-DAT_PARALLEL_OPENMP_FBCODE=1")
elif parallel_backend == "tbb":
defs.append("-DAT_PARALLEL_NATIVE_TBB_FBCODE=1")
elif parallel_backend == "native":
defs.append("-DAT_PARALLEL_NATIVE_FBCODE=1")
else:

View File

@ -179,6 +179,13 @@
# possible values:
# OMP - use OpenMP for intra-op and native backend for inter-op tasks
# NATIVE - use native thread pool for both intra- and inter-op tasks
# TBB - using TBB for intra- and native thread pool for inter-op parallelism
#
# USE_TBB
# enable TBB support
#
# USE_SYSTEM_TBB
# Use system-provided Intel TBB.
#
# USE_SYSTEM_LIBS (work in progress)
# Use system-provided libraries to satisfy the build dependencies.
@ -364,6 +371,7 @@ def get_submodule_folders():
for name in [
"gloo",
"cpuinfo",
"tbb",
"onnx",
"foxi",
"QNNPACK",

View File

@ -130,7 +130,10 @@ cc_library(
],
deps = [
"@mkl",
],
] + select({
"@pytorch//tools/config:thread_sanitizer": [],
"//conditions:default": ["@tbb"],
}),
defines = [
"DNNL_ENABLE_MAX_CPU_ISA",
"DNNL_ENABLE_CONCURRENT_EXEC",

View File

@ -12,7 +12,10 @@ cc_library(
"libmkl_vml_avx2.so",
"libmkl_vml_avx512.so",
"libmkl_vml_def.so",
],
] + select({
"@pytorch//tools/config:thread_sanitizer": [],
"//conditions:default": ["libmkl_tbb_thread.so"],
}),
visibility = ["//visibility:public"],
deps = ["@mkl_headers"],
)

1
third_party/tbb vendored Submodule

Submodule third_party/tbb added at a51a90bc60

75
third_party/tbb.BUILD vendored Normal file
View File

@ -0,0 +1,75 @@
load("@rules_cc//cc:defs.bzl", "cc_library")
load("@pytorch//third_party:substitution.bzl", "template_rule")
licenses(["notice"]) # Apache 2.0
template_rule(
name = "version_string",
src = "@//:aten/src/ATen/cpu/tbb/extra/version_string.ver.in",
out = "version_string.h",
substitutions = {
"@CMAKE_SYSTEM_NAME@": "Unknown",
"@CMAKE_SYSTEM@": "Unknown",
"@CMAKE_SYSTEM_VERSION@": "Unknown",
"@CMAKE_CXX_COMPILER_ID@": "Unknown",
"@_configure_date@": "Unknown",
}
)
cc_library(
name = "tbb",
srcs = [":version_string"] + glob(
[
"src/old/*.h",
"src/rml/client/*.h",
"src/rml/include/*.h",
"src/rml/server/*.h",
"src/tbb/*.h",
"src/tbb/tools_api/*.h",
"src/tbb/tools_api/legacy/*.h",
"src/old/*.cpp",
"src/tbb/*.cpp",
],
exclude = ["src/old/test_*.cpp"],
) + ["src/rml/client/rml_tbb.cpp"],
hdrs = glob(
[
"include/tbb/*",
"include/tbb/compat/*",
"include/tbb/internal/*",
"include/tbb/machine/*",
],
exclude = ["include/tbb/scalable_allocator.h"],
),
copts = [
"-Iexternal/tbb/src/rml/include",
"-Iexternal/tbb/src",
"-pthread",
"-DDO_ITT_NOTIFY=1",
"-DUSE_PTHREAD=1",
"-D__TBB_BUILD=1",
"-D__TBB_DYNAMIC_LOAD_ENABLED=0",
"-D__TBB_SOURCE_DIRECTLY_INCLUDED=1",
"-fno-sanitize=vptr",
"-fno-sanitize=thread",
],
defines = [
# TBB Cannot detect the standard library version when using clang with libstdc++.
# See https://github.com/01org/tbb/issues/22
"TBB_USE_GLIBCXX_VERSION=(_GLIBCXX_RELEASE*10000)",
"TBB_PREVIEW_GLOBAL_CONTROL=1",
"TBB_PREVIEW_LOCAL_OBSERVER=1",
"__TBB_ALLOW_MUTABLE_FUNCTORS=1",
],
includes = [
"include",
"src/tbb/tools_api",
],
linkopts = [
"-ldl",
"-lpthread",
"-lrt",
],
textual_hdrs = ["src/tbb/tools_api/ittnotify_static.c"],
visibility = ["//visibility:public"],
)

34
third_party/tbb.patch vendored Normal file
View File

@ -0,0 +1,34 @@
diff --git a/src/rml/server/rml_server.cpp b/src/rml/server/rml_server.cpp
index 2508465..1e22ad2 100644
--- a/src/rml/server/rml_server.cpp
+++ b/src/rml/server/rml_server.cpp
@@ -3279,10 +3279,10 @@ extern "C" void __KMP_call_with_my_server_info( ::rml::server_info_callback_t cb
/*
* RML server info
*/
-#include "version_string.ver"
+#include "version_string.h"
#ifndef __TBB_VERSION_STRINGS
-#pragma message("Warning: version_string.ver isn't generated properly by version_info.sh script!")
+#pragma message("Warning: version_string.h isn't generated properly by version_info.sh script!")
#endif
// We use the build time as the RML server info. TBB is required to build RML, so we make it the same as the TBB build time.
diff --git a/src/tbb/tbb_version.h b/src/tbb/tbb_version.h
index dcaa55b..4981a8a 100644
--- a/src/tbb/tbb_version.h
+++ b/src/tbb/tbb_version.h
@@ -25,10 +25,10 @@
#ifndef ENDL
#define ENDL "\n"
#endif
-#include "version_string.ver"
+#include "version_string.h"
#ifndef __TBB_VERSION_STRINGS
-#pragma message("Warning: version_string.ver isn't generated properly by version_info.sh script!")
+#pragma message("Warning: version_string.h isn't generated properly by version_info.sh script!")
// here is an example of macros value:
#define __TBB_VERSION_STRINGS \
"TBB: BUILD_HOST\tUnknown\n" \

View File

@ -24,7 +24,7 @@ from torch.testing._internal.common_nn import (
marginrankingloss_reference, multimarginloss_reference, multilabelmarginloss_reference,
nllloss_reference, nlllossNd_reference, smoothl1loss_reference, softmarginloss_reference, get_reduction)
from torch.testing._internal.common_utils import (
freeze_rng_state, skipIfMps, GRADCHECK_NONDET_TOL, TEST_WITH_ROCM, IS_WINDOWS,
freeze_rng_state, set_single_threaded_if_parallel_tbb, skipIfMps, GRADCHECK_NONDET_TOL, TEST_WITH_ROCM, IS_WINDOWS,
skipIfTorchDynamo)
from types import ModuleType
from typing import List, Tuple, Type, Set, Dict
@ -235,7 +235,7 @@ class ModuleInfo:
self.is_lazy = issubclass(module_cls, torch.nn.modules.lazy.LazyModuleMixin)
def get_decorators(self, test_class, test_name, device, dtype, param_kwargs):
result = []
result = [set_single_threaded_if_parallel_tbb]
for decorator in self.decorators:
if isinstance(decorator, DecorateInfo):
if decorator.is_active(test_class, test_name, device, dtype, param_kwargs):

View File

@ -39,6 +39,7 @@ from torch.testing._internal.common_device_type import tol, toleranceOverride
from torch.testing._internal.common_methods_invocations import DecorateInfo
from torch.testing._internal.common_utils import (
_TestParametrizer,
set_single_threaded_if_parallel_tbb,
skipIfMps,
skipIfTorchDynamo,
TEST_WITH_TORCHDYNAMO,
@ -160,7 +161,7 @@ class OptimizerInfo:
self.supports_fused_on = supports_fused_on
def get_decorators(self, test_class, test_name, device, dtype, param_kwargs):
result = []
result = [set_single_threaded_if_parallel_tbb]
for decorator in self.decorators:
if isinstance(decorator, DecorateInfo):
if decorator.is_active(

View File

@ -1497,6 +1497,8 @@ def disable_translation_validation_if_dynamic_shapes(fn):
# See: https://github.com/pytorch/pytorch/pull/59402#issuecomment-858811135
TestEnvironment.def_flag("TEST_CUDA_MEM_LEAK_CHECK", env_var="PYTORCH_TEST_CUDA_MEM_LEAK_CHECK")
# True if CI is running TBB-enabled Pytorch
IS_TBB = "tbb" in os.getenv("BUILD_ENVIRONMENT", "")
# Dict of NumPy dtype -> torch dtype (when the correspondence exists)
numpy_to_torch_dtype_dict = {
@ -1873,6 +1875,19 @@ def skipIfNoSciPy(fn):
fn(*args, **kwargs)
return wrapper
def skipIfTBB(message="This test makes TBB sad"):
def dec_fn(fn):
@wraps(fn)
def wrapper(*args, **kwargs):
if IS_TBB:
raise unittest.SkipTest(message)
else:
fn(*args, **kwargs)
return wrapper
return dec_fn
def skip_if_pytest(fn):
@wraps(fn)
def wrapped(*args, **kwargs):
@ -4708,6 +4723,24 @@ dtype_abbrs = {
}
def set_single_threaded_if_parallel_tbb(fn):
"""Set test to be single threaded for parallel tbb.
See https://github.com/pytorch/pytorch/issues/64571#issuecomment-914691883
"""
if not IS_TBB:
return fn
@wraps(fn)
def wrap_fn(*args, **kwargs):
num_threads = torch.get_num_threads()
torch.set_num_threads(1)
try:
return fn(*args, **kwargs)
finally:
torch.set_num_threads(num_threads)
return wrap_fn
@functools.lru_cache
def get_cycles_per_ms() -> float:

View File

@ -1880,6 +1880,9 @@ def _prepare_ldflags(extra_ldflags, with_cuda, verbose, is_standalone):
if not is_standalone:
extra_ldflags.append('-ltorch_python')
if is_standalone and "TBB" in torch.__config__.parallel_info():
extra_ldflags.append('-ltbb')
if is_standalone:
extra_ldflags.append(f"-Wl,-rpath,{TORCH_LIB_PATH}")