mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
Temporarily remove TBB (#8255)
This commit is contained in:
4
.gitmodules
vendored
4
.gitmodules
vendored
@ -1,7 +1,3 @@
|
||||
[submodule "third_party/tbb"]
|
||||
path = third_party/tbb
|
||||
url = https://github.com/01org/tbb
|
||||
branch = tbb_2018
|
||||
[submodule "third_party/catch"]
|
||||
path = third_party/catch
|
||||
url = https://github.com/catchorg/Catch2.git
|
||||
|
@ -89,24 +89,6 @@ IF(NOT AT_LINK_STYLE)
|
||||
SET(AT_LINK_STYLE SHARED)
|
||||
ENDIF()
|
||||
|
||||
# Unset our restrictive C++ flags here and reset them later.
|
||||
# Remove this once we use proper target_compile_options.
|
||||
set(OLD_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
set(CMAKE_CXX_FLAGS)
|
||||
|
||||
set(TBB_ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/tbb")
|
||||
set(TBB_BUILD_STATIC ON CACHE BOOL " " FORCE)
|
||||
set(TBB_BUILD_SHARED OFF CACHE BOOL " " FORCE)
|
||||
set(TBB_BUILD_TBBMALLOC OFF CACHE BOOL " " FORCE)
|
||||
set(TBB_BUILD_TBBMALLOC_PROXY OFF CACHE BOOL " " FORCE)
|
||||
set(TBB_BUILD_TESTS OFF CACHE BOOL " " FORCE)
|
||||
add_subdirectory(cpu/tbb)
|
||||
set_property(TARGET tbb_static tbb_def_files PROPERTY FOLDER "dependencies")
|
||||
list(APPEND ATen_THIRD_PARTY_INCLUDE ${TBB_ROOT_DIR}/include)
|
||||
list(APPEND ATen_CPU_DEPENDENCY_LIBS tbb_static)
|
||||
|
||||
set(CMAKE_CXX_FLAGS ${OLD_CMAKE_CXX_FLAGS})
|
||||
|
||||
IF(BLAS_FOUND)
|
||||
IF ($ENV{TH_BINARY_BUILD})
|
||||
MESSAGE(STATUS "TH_BINARY_BUILD detected. Enabling special linkage.")
|
||||
|
@ -149,7 +149,6 @@ inline bool _apply_preamble(ArrayRef<Tensor> tensors) {
|
||||
for (auto& t : tensors)
|
||||
if (t.sizes().equals({0}))
|
||||
return false;
|
||||
internal::init_tbb_num_threads();
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -351,7 +350,7 @@ template <typename scalar1, typename Op>
|
||||
inline void CPU_tensor_parallel_apply1(
|
||||
Tensor tensor1,
|
||||
const Op op,
|
||||
int64_t grain_size = internal::TBB_GRAIN_SIZE) {
|
||||
int64_t grain_size = internal::GRAIN_SIZE) {
|
||||
if (!_apply_preamble({tensor1}))
|
||||
return;
|
||||
if (tensor1.ndimension() < 8) {
|
||||
@ -383,7 +382,7 @@ inline void CPU_tensor_parallel_apply2(
|
||||
Tensor tensor1,
|
||||
Tensor tensor2,
|
||||
const Op op,
|
||||
int64_t grain_size = internal::TBB_GRAIN_SIZE) {
|
||||
int64_t grain_size = internal::GRAIN_SIZE) {
|
||||
if (!_apply_preamble({tensor1, tensor2}))
|
||||
return;
|
||||
if (tensor1.ndimension() < 8 && tensor2.ndimension() < 8) {
|
||||
|
@ -1,56 +0,0 @@
|
||||
#include <ATen/CPUGeneral.h>
|
||||
#include <ATen/Parallel.h>
|
||||
#include <tbb/blocked_range.h>
|
||||
#include <tbb/parallel_reduce.h>
|
||||
#include <tbb/partitioner.h>
|
||||
#include <tbb/tbb.h>
|
||||
#include <cassert>
|
||||
#include <thread>
|
||||
|
||||
|
||||
namespace at { namespace internal {
|
||||
|
||||
// thread_local variable with internal linkage
|
||||
// requires no guarding as it's storage duration is defined to be per thread
|
||||
static thread_local tbb::task_scheduler_init tbbinit(
|
||||
tbb::task_scheduler_init::deferred);
|
||||
// Tracks number of threads uses which TBB doesn't track.
|
||||
static thread_local int num_threads_ = -1;
|
||||
|
||||
// Negative number of threads means default value
|
||||
void init_tbb_num_threads() {
|
||||
static thread_local bool first_call = true;
|
||||
int num_threads = at::get_num_threads();
|
||||
// In order to have control over the number of threads this function
|
||||
// must be called first before any other tbb parallel construct is
|
||||
// excercised within a particular thread. Otherwise the default
|
||||
// scheduler will be created over which we do not have control.
|
||||
// The following code will and must throw an error if tbb has
|
||||
// already been initialized before this function was called.
|
||||
if (!tbbinit.is_active() && !first_call)
|
||||
throw std::runtime_error(
|
||||
"tbb initialization failed: scheduler not active after first call");
|
||||
if (first_call) {
|
||||
if (tbbinit.is_active())
|
||||
throw std::runtime_error(
|
||||
"tbb initialization failed: scheduler active on first call");
|
||||
if (num_threads < 0) {
|
||||
int max_threads = tbbinit.default_num_threads();
|
||||
tbbinit.initialize(max_threads);
|
||||
} else {
|
||||
tbbinit.initialize(num_threads);
|
||||
}
|
||||
first_call = false;
|
||||
}
|
||||
if (num_threads == 0) {
|
||||
// TODO: For PyTorch 0 means 1
|
||||
num_threads = 1;
|
||||
}
|
||||
if (num_threads > 0 && (num_threads_ != num_threads)) {
|
||||
tbbinit.terminate();
|
||||
tbbinit.initialize(num_threads);
|
||||
num_threads_ = num_threads;
|
||||
}
|
||||
}
|
||||
} // namespace internal
|
||||
} // namespace at
|
@ -1,54 +1,59 @@
|
||||
#pragma once
|
||||
#include <ATen/ATen.h>
|
||||
#include <cstddef>
|
||||
#include <tbb/tbb.h>
|
||||
|
||||
#ifdef _OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
namespace at {
|
||||
namespace internal {
|
||||
// This needs to be called before the first use of any algorithm such as
|
||||
// parallel or it will have no effect and the default task scheduler is
|
||||
// created which uses all available cores.
|
||||
// See
|
||||
// https://www.threadingbuildingblocks.org/docs/help/reference/task_scheduler/task_scheduler_init_cls.html
|
||||
// This does not initializes the number of workers in the market (the overall
|
||||
// of workers available to a process). It is merely a request to the market
|
||||
// for a certain number of workers. If there are multiple threads making
|
||||
// a request at the size of the maximum number of threads, they will
|
||||
// be allocated a number proportional to the other requests.
|
||||
AT_API void init_tbb_num_threads();
|
||||
// This parameter is heuristically chosen to determine the minimum number of
|
||||
// work that warrants paralellism. For example, when summing an array, it is
|
||||
// deemed inefficient to parallelise over arrays shorter than 32768. Further,
|
||||
// no parallel algorithm (such as parallel_reduce) should split work into
|
||||
// smaller than GRAIN_SIZE chunks.
|
||||
constexpr int64_t TBB_GRAIN_SIZE = 32768;
|
||||
constexpr int64_t GRAIN_SIZE = 32768;
|
||||
} // namespace internal
|
||||
|
||||
inline int64_t divup(int64_t x, int64_t y) {
|
||||
return (x + y - 1) / y;
|
||||
}
|
||||
|
||||
template <class F>
|
||||
inline void parallel_for(
|
||||
int64_t begin,
|
||||
int64_t end,
|
||||
int64_t grain_size,
|
||||
const F& f) {
|
||||
internal::init_tbb_num_threads();
|
||||
|
||||
#ifdef __PPC64__
|
||||
using default_partitioner_type = tbb::simple_partitioner;
|
||||
#else
|
||||
using default_partitioner_type = tbb::affinity_partitioner;
|
||||
#endif
|
||||
|
||||
thread_local static default_partitioner_type ap;
|
||||
|
||||
if ((end - begin) < grain_size || get_num_threads() == 1) {
|
||||
f(begin, end);
|
||||
} else {
|
||||
tbb::parallel_for(
|
||||
tbb::blocked_range<int64_t>(begin, end, grain_size),
|
||||
[f](const tbb::blocked_range<int64_t>& r) { f(r.begin(), r.end()); },
|
||||
ap);
|
||||
const int64_t begin,
|
||||
const int64_t end,
|
||||
const int64_t grain_size_,
|
||||
const F f) {
|
||||
const int64_t min_grain_size = divup((end - begin), get_num_threads());
|
||||
const int64_t grain_size = std::max(min_grain_size, grain_size_);
|
||||
#pragma omp parallel for if ((end - begin) >= grain_size && get_num_threads() > 1)
|
||||
for (int64_t i = begin; i < end; i += grain_size) {
|
||||
f(i, i + std::min(end - i, grain_size));
|
||||
}
|
||||
}
|
||||
|
||||
template <class scalar_t, class F, class SF>
|
||||
inline scalar_t parallel_reduce(
|
||||
const int64_t begin,
|
||||
const int64_t end,
|
||||
const int64_t grain_size_,
|
||||
const scalar_t ident,
|
||||
const F f,
|
||||
const SF sf) {
|
||||
const int64_t min_grain_size = divup((end - begin), get_num_threads());
|
||||
const int64_t grain_size = std::max(min_grain_size, grain_size_);
|
||||
const int64_t num_results = divup((end - begin), grain_size);
|
||||
std::vector<scalar_t> results(num_results);
|
||||
scalar_t* results_data = results.data();
|
||||
#pragma omp parallel for if ((end - begin) >= grain_size && get_num_threads() > 1)
|
||||
for (int64_t id = 0; id < num_results; id++) {
|
||||
int64_t i = begin + id * grain_size;
|
||||
results_data[id] = f(i, i + std::min(end - i, grain_size), ident);
|
||||
}
|
||||
return std::accumulate(
|
||||
results_data, results_data + results.size(), ident, sf);
|
||||
}
|
||||
|
||||
} // namespace at
|
||||
|
@ -1,376 +0,0 @@
|
||||
# Based on https://github.com/wjakob/tbb/blob/master/CMakeLists.txt
|
||||
# All credit goes to Wenzel Jakob!
|
||||
|
||||
cmake_minimum_required (VERSION 2.8.12 FATAL_ERROR)
|
||||
project (tbb CXX)
|
||||
|
||||
include(CheckCXXCompilerFlag)
|
||||
include(CheckCXXSourceRuns)
|
||||
|
||||
if(POLICY CMP0058)
|
||||
cmake_policy(SET CMP0058 NEW)
|
||||
endif()
|
||||
|
||||
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
||||
message(STATUS "Setting build type to 'Release' as none was specified.")
|
||||
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
|
||||
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release"
|
||||
"MinSizeRel" "RelWithDebInfo")
|
||||
endif()
|
||||
|
||||
if(NOT TBB_ROOT_DIR)
|
||||
set(TBB_ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
endif()
|
||||
if(NOT TBB_INSTALL_RUNTIME_DIR)
|
||||
set(TBB_INSTALL_RUNTIME_DIR bin)
|
||||
endif()
|
||||
if(NOT TBB_INSTALL_LIBRARY_DIR)
|
||||
set(TBB_INSTALL_LIBRARY_DIR lib)
|
||||
endif()
|
||||
if(NOT TBB_INSTALL_ARCHIVE_DIR)
|
||||
set(TBB_INSTALL_ARCHIVE_DIR lib)
|
||||
endif()
|
||||
if(NOT TBB_INSTALL_INCLUDE_DIR)
|
||||
set(TBB_INSTALL_INCLUDE_DIR "${TBB_ROOT_DIR}/include")
|
||||
endif()
|
||||
|
||||
set(TBB_INCLUDES
|
||||
"${TBB_ROOT_DIR}/include"
|
||||
"${TBB_ROOT_DIR}/src"
|
||||
"${TBB_ROOT_DIR}/src/rml/include"
|
||||
${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
option(TBB_BUILD_SHARED "Build TBB shared library" ON)
|
||||
option(TBB_BUILD_STATIC "Build TBB static library" ON)
|
||||
option(TBB_BUILD_TBBMALLOC "Build TBB malloc library" ON)
|
||||
option(TBB_BUILD_TBBMALLOC_PROXY "Build TBB malloc proxy library" ON)
|
||||
option(TBB_BUILD_TESTS "Build TBB tests and enable testing infrastructure" ON)
|
||||
option(TBB_CI_BUILD "Is this a continuous integration build?" OFF)
|
||||
|
||||
if(APPLE)
|
||||
set(CMAKE_MACOSX_RPATH ON)
|
||||
endif()
|
||||
|
||||
file(GLOB tbb_src "${TBB_ROOT_DIR}/src/tbb/*.cpp" "${TBB_ROOT_DIR}/src/old/*.cpp")
|
||||
list(APPEND tbb_src ${TBB_ROOT_DIR}/src/rml/client/rml_tbb.cpp)
|
||||
file(GLOB to_remove "${TBB_ROOT_DIR}/src/old/test*.cpp")
|
||||
if (NOT "${to_remove}" STREQUAL "")
|
||||
list(REMOVE_ITEM tbb_src ${to_remove})
|
||||
endif()
|
||||
|
||||
set(tbbmalloc_static_src
|
||||
src/tbbmalloc/backend.cpp
|
||||
src/tbbmalloc/large_objects.cpp
|
||||
src/tbbmalloc/backref.cpp
|
||||
src/tbbmalloc/tbbmalloc.cpp
|
||||
src/tbbmalloc/frontend.cpp
|
||||
src/tbb/itt_notify.cpp)
|
||||
|
||||
set(tbbmalloc_src ${tbbmalloc_static_src})
|
||||
|
||||
set(tbbmalloc_proxy_src
|
||||
src/tbbmalloc/proxy.cpp
|
||||
src/tbbmalloc/tbb_function_replacement.cpp)
|
||||
|
||||
if (CMAKE_SYSTEM_PROCESSOR MATCHES "(i386|x86_64)")
|
||||
if (NOT APPLE AND NOT MINGW)
|
||||
add_definitions(-DDO_ITT_NOTIFY)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (APPLE)
|
||||
# Disable annoying "has no symbols" warnings
|
||||
set(CMAKE_C_ARCHIVE_CREATE "<CMAKE_AR> Scr <TARGET> <LINK_FLAGS> <OBJECTS>")
|
||||
set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> Scr <TARGET> <LINK_FLAGS> <OBJECTS>")
|
||||
set(CMAKE_C_ARCHIVE_FINISH "<CMAKE_RANLIB> -no_warning_for_no_symbols -c <TARGET>")
|
||||
set(CMAKE_CXX_ARCHIVE_FINISH "<CMAKE_RANLIB> -no_warning_for_no_symbols -c <TARGET>")
|
||||
endif()
|
||||
|
||||
macro(CHECK_CXX_COMPILER_AND_LINKER_FLAGS _RESULT _CXX_FLAGS _LINKER_FLAGS)
|
||||
set(CMAKE_REQUIRED_FLAGS ${_CXX_FLAGS})
|
||||
set(CMAKE_REQUIRED_LIBRARIES ${_LINKER_FLAGS})
|
||||
set(CMAKE_REQUIRED_QUIET TRUE)
|
||||
check_cxx_source_runs("#include <iostream>\nint main(int argc, char **argv) { std::cout << \"test\"; return 0; }" ${_RESULT})
|
||||
set(CMAKE_REQUIRED_FLAGS "")
|
||||
set(CMAKE_REQUIRED_LIBRARIES "")
|
||||
endmacro()
|
||||
|
||||
# Prefer libc++ in conjunction with Clang
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||
if (CMAKE_CXX_FLAGS MATCHES "-stdlib=libc\\+\\+")
|
||||
message(STATUS "TBB: using libc++.")
|
||||
else()
|
||||
CHECK_CXX_COMPILER_AND_LINKER_FLAGS(HAS_LIBCPP "-stdlib=libc++" "-stdlib=libc++")
|
||||
if (HAS_LIBCPP)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++ -D_LIBCPP_VERSION")
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libc++")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -stdlib=libc++")
|
||||
message(STATUS "TBB: using libc++.")
|
||||
else()
|
||||
message(STATUS "TBB: NOT using libc++.")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (UNIX)
|
||||
add_definitions (-DUSE_PTHREAD)
|
||||
|
||||
check_cxx_compiler_flag ("-std=c++11" SUPPORTS_STDCXX11)
|
||||
if (SUPPORTS_STDCXX11)
|
||||
set (CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}")
|
||||
endif ()
|
||||
|
||||
check_cxx_compiler_flag ("-mrtm -Werror" SUPPORTS_MRTM)
|
||||
if (SUPPORTS_MRTM)
|
||||
set (CMAKE_CXX_FLAGS "-mrtm ${CMAKE_CXX_FLAGS}")
|
||||
endif ()
|
||||
|
||||
elseif(WIN32)
|
||||
if (MSVC)
|
||||
cmake_minimum_required (VERSION 3.1)
|
||||
enable_language(ASM_MASM)
|
||||
set(CMAKE_CXX_FLAGS "/GS- /Zc:wchar_t /Zc:forScope /DUSE_WINTHREAD ${CMAKE_CXX_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "/D_CRT_SECURE_NO_DEPRECATE /D_WIN32_WINNT=0x0600 ${CMAKE_CXX_FLAGS}")
|
||||
check_cxx_compiler_flag ("/volatile:iso" SUPPORTS_VOLATILE_FLAG)
|
||||
if (SUPPORTS_VOLATILE_FLAG)
|
||||
set(CMAKE_CXX_FLAGS "/volatile:iso ${CMAKE_CXX_FLAGS}")
|
||||
endif ()
|
||||
set(CMAKE_CXX_FLAGS "/wd4267 /wd4800 /wd4146 /wd4244 /wd4577 /wd4018 ${CMAKE_CXX_FLAGS}")
|
||||
if (NOT CMAKE_SIZEOF_VOID_P)
|
||||
message(FATAL_ERROR "'CMAKE_SIZEOF_VOID_P' is undefined. Please delete your build directory and rerun CMake again!")
|
||||
endif()
|
||||
|
||||
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
list(APPEND tbb_src "${TBB_ROOT_DIR}/src/tbb/intel64-masm/atomic_support.asm")
|
||||
list(APPEND tbb_src "${TBB_ROOT_DIR}/src/tbb/intel64-masm/itsx.asm")
|
||||
list(APPEND tbb_src "${TBB_ROOT_DIR}/src/tbb/intel64-masm/intel64_misc.asm")
|
||||
list(APPEND tbbmalloc_src "${TBB_ROOT_DIR}/src/tbb/intel64-masm/atomic_support.asm")
|
||||
set(CMAKE_ASM_MASM_FLAGS "/DEM64T=1 ${CMAKE_ASM_MASM_FLAGS}")
|
||||
else()
|
||||
list(APPEND tbb_src "${TBB_ROOT_DIR}/src/tbb/ia32-masm/atomic_support.asm"
|
||||
"${TBB_ROOT_DIR}/src/tbb/ia32-masm/itsx.asm src/tbb/ia32-masm/lock_byte.asm")
|
||||
# Enable SAFESEH feature for assembly (x86 builds only).
|
||||
set(CMAKE_ASM_MASM_FLAGS "/safeseh ${CMAKE_ASM_MASM_FLAGS}")
|
||||
endif()
|
||||
elseif (MINGW)
|
||||
add_definitions(-DUSE_WINTHREAD)
|
||||
add_definitions(-D_WIN32_WINNT=0x0502)
|
||||
set(CMAKE_CXX_FLAGS "-mthreads ${CMAKE_CXX_FLAGS}")
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
if (MSVC)
|
||||
set(ENABLE_RTTI "/EHsc /GR ")
|
||||
set(DISABLE_RTTI "/EHs- /GR- ")
|
||||
elseif (UNIX)
|
||||
set(ENABLE_RTTI "-frtti -fexceptions ")
|
||||
set(DISABLE_RTTI "-fno-rtti -fno-exceptions ")
|
||||
endif ()
|
||||
|
||||
##--------
|
||||
# - Added TBB_USE_GLIBCXX_VERSION macro to specify the version of GNU
|
||||
# libstdc++ when it cannot be properly recognized, e.g. when used
|
||||
# with Clang on Linux* OS. Inspired by a contribution from David A.
|
||||
if (NOT TBB_USE_GLIBCXX_VERSION AND UNIX AND NOT APPLE)
|
||||
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
||||
# using Clang
|
||||
string(REPLACE "." "0" TBB_USE_GLIBCXX_VERSION ${CMAKE_CXX_COMPILER_VERSION})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (TBB_USE_GLIBCXX_VERSION)
|
||||
add_definitions(-DTBB_USE_GLIBCXX_VERSION=${TBB_USE_GLIBCXX_VERSION})
|
||||
endif()
|
||||
|
||||
##-------
|
||||
|
||||
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
||||
check_cxx_compiler_flag ("-flifetime-dse=1" SUPPORTS_FLIFETIME)
|
||||
if (SUPPORTS_FLIFETIME)
|
||||
add_definitions(-flifetime-dse=1)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Linker export definitions
|
||||
if (APPLE)
|
||||
set (ARCH_PREFIX "mac")
|
||||
elseif(WIN32)
|
||||
set (ARCH_PREFIX "win")
|
||||
else()
|
||||
set (ARCH_PREFIX "lin")
|
||||
endif()
|
||||
|
||||
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
set(ARCH_PREFIX "${ARCH_PREFIX}64")
|
||||
else()
|
||||
set(ARCH_PREFIX "${ARCH_PREFIX}32")
|
||||
endif()
|
||||
|
||||
if (MINGW)
|
||||
set (ARCH_PREFIX "${ARCH_PREFIX}-gcc")
|
||||
# there's no win32-gcc-tbb-export.def, use lin32-tbb-export.def
|
||||
execute_process (COMMAND ${CMAKE_COMMAND} -E copy ${TBB_ROOT_DIR}/src/tbb/lin32-tbb-export.def ${TBB_ROOT_DIR}/src/tbb/win32-gcc-tbb-export.def)
|
||||
endif()
|
||||
|
||||
if (MSVC)
|
||||
add_custom_command(OUTPUT tbb.def
|
||||
COMMAND ${CMAKE_CXX_COMPILER} /TC /EP ${TBB_ROOT_DIR}/src/tbb/${ARCH_PREFIX}-tbb-export.def -I ${TBB_ROOT_DIR}/include > tbb.def
|
||||
MAIN_DEPENDENCY ${TBB_ROOT_DIR}/src/tbb/${ARCH_PREFIX}-tbb-export.def
|
||||
COMMENT "Preprocessing tbb.def"
|
||||
)
|
||||
|
||||
add_custom_command(OUTPUT tbbmalloc.def
|
||||
COMMAND ${CMAKE_CXX_COMPILER} /TC /EP ${TBB_ROOT_DIR}/src/tbbmalloc/${ARCH_PREFIX}-tbbmalloc-export.def -I ${TBB_ROOT_DIR}/include > tbbmalloc.def
|
||||
MAIN_DEPENDENCY ${TBB_ROOT_DIR}/src/tbbmalloc/${ARCH_PREFIX}-tbbmalloc-export.def
|
||||
COMMENT "Preprocessing tbbmalloc.def"
|
||||
)
|
||||
else()
|
||||
add_custom_command(OUTPUT tbb.def
|
||||
COMMAND ${CMAKE_CXX_COMPILER} -xc++ -E ${TBB_ROOT_DIR}/src/tbb/${ARCH_PREFIX}-tbb-export.def -I ${TBB_ROOT_DIR}/include -o tbb.def
|
||||
MAIN_DEPENDENCY ${TBB_ROOT_DIR}/src/tbb/${ARCH_PREFIX}-tbb-export.def
|
||||
COMMENT "Preprocessing tbb.def"
|
||||
)
|
||||
|
||||
add_custom_command(OUTPUT tbbmalloc.def
|
||||
COMMAND ${CMAKE_CXX_COMPILER} -xc++ -E ${TBB_ROOT_DIR}/src/tbbmalloc/${ARCH_PREFIX}-tbbmalloc-export.def -I ${TBB_ROOT_DIR}/include -o tbbmalloc.def
|
||||
MAIN_DEPENDENCY ${TBB_ROOT_DIR}/src/tbbmalloc/${ARCH_PREFIX}-tbbmalloc-export.def
|
||||
COMMENT "Preprocessing tbbmalloc.def"
|
||||
)
|
||||
endif()
|
||||
|
||||
add_custom_target(tbb_def_files DEPENDS tbb.def tbbmalloc.def)
|
||||
|
||||
# TBB library
|
||||
if (TBB_BUILD_STATIC)
|
||||
add_library(tbb_static STATIC ${tbb_src})
|
||||
target_include_directories(tbb_static PRIVATE ${TBB_INCLUDES})
|
||||
set_property(TARGET tbb_static APPEND PROPERTY COMPILE_DEFINITIONS "__TBB_BUILD=1")
|
||||
set_property(TARGET tbb_static APPEND_STRING PROPERTY COMPILE_FLAGS ${ENABLE_RTTI})
|
||||
install(TARGETS tbb_static ARCHIVE DESTINATION ${TBB_INSTALL_ARCHIVE_DIR})
|
||||
if (MSVC)
|
||||
target_compile_definitions(tbb_static PUBLIC __TBB_NO_IMPLICIT_LINKAGE=1)
|
||||
endif()
|
||||
|
||||
if (UNIX AND NOT APPLE)
|
||||
target_link_libraries(tbb_static PUBLIC pthread dl)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (TBB_BUILD_SHARED)
|
||||
add_library(tbb SHARED ${tbb_src})
|
||||
target_include_directories(tbb PRIVATE ${TBB_INCLUDES})
|
||||
set_property(TARGET tbb APPEND PROPERTY COMPILE_DEFINITIONS "__TBB_BUILD=1")
|
||||
set_property(TARGET tbb APPEND_STRING PROPERTY COMPILE_FLAGS ${ENABLE_RTTI})
|
||||
add_dependencies(tbb tbb_def_files)
|
||||
|
||||
if (APPLE)
|
||||
set_property(TARGET tbb APPEND PROPERTY LINK_FLAGS "-Wl,-exported_symbols_list,\"${CMAKE_CURRENT_BINARY_DIR}/tbb.def\"")
|
||||
elseif (MSVC)
|
||||
set_property(TARGET tbb APPEND PROPERTY LINK_FLAGS "/DEF:\"${CMAKE_CURRENT_BINARY_DIR}/tbb.def\"")
|
||||
else ()
|
||||
set_property(TARGET tbb APPEND PROPERTY LINK_FLAGS "-Wl,-version-script,\"${CMAKE_CURRENT_BINARY_DIR}/tbb.def\"")
|
||||
endif()
|
||||
|
||||
install(TARGETS tbb
|
||||
LIBRARY DESTINATION ${TBB_INSTALL_LIBRARY_DIR}
|
||||
ARCHIVE DESTINATION ${TBB_INSTALL_ARCHIVE_DIR}
|
||||
RUNTIME DESTINATION ${TBB_INSTALL_RUNTIME_DIR})
|
||||
if (UNIX AND NOT APPLE)
|
||||
target_link_libraries(tbb PUBLIC pthread dl)
|
||||
endif()
|
||||
if (MSVC)
|
||||
target_compile_definitions(tbb PUBLIC __TBB_NO_IMPLICIT_LINKAGE=1)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
||||
# Quench a warning on GCC
|
||||
set_source_files_properties(${TBB_ROOT_DIR}/src/tbb/governor.cpp COMPILE_FLAGS "-Wno-missing-field-initializers ")
|
||||
elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
||||
# Quench a warning on Clang
|
||||
set_source_files_properties(${TBB_ROOT_DIR}/src/tbb/itt_notify.cpp COMPILE_FLAGS "-Wno-varargs ")
|
||||
elseif(MSVC)
|
||||
# Quench a warning on MSVC
|
||||
set_source_files_properties(${TBB_ROOT_DIR}/src/tbb/scheduler.cpp COMPILE_FLAGS "/wd4458 ")
|
||||
endif()
|
||||
|
||||
if(TBB_BUILD_TBBMALLOC)
|
||||
# TBB malloc library
|
||||
if (TBB_BUILD_STATIC)
|
||||
add_library(tbbmalloc_static STATIC ${tbbmalloc_static_src})
|
||||
target_include_directories(tbbmalloc_static PRIVATE ${TBB_INCLUDES})
|
||||
set_property(TARGET tbbmalloc_static APPEND PROPERTY COMPILE_DEFINITIONS "__TBBMALLOC_BUILD=1")
|
||||
set_property(TARGET tbbmalloc_static APPEND_STRING PROPERTY COMPILE_FLAGS ${DISABLE_RTTI})
|
||||
if (MSVC)
|
||||
target_compile_definitions(tbbmalloc_static PUBLIC __TBB_NO_IMPLICIT_LINKAGE=1 __TBBMALLOC_NO_IMPLICIT_LINKAGE=1)
|
||||
endif()
|
||||
install(TARGETS tbbmalloc_static ARCHIVE DESTINATION ${TBB_INSTALL_ARCHIVE_DIR})
|
||||
endif()
|
||||
|
||||
if (TBB_BUILD_SHARED)
|
||||
add_library(tbbmalloc SHARED ${tbbmalloc_src})
|
||||
target_include_directories(tbbmalloc PRIVATE ${TBB_INCLUDES})
|
||||
set_property(TARGET tbbmalloc APPEND PROPERTY COMPILE_DEFINITIONS "__TBBMALLOC_BUILD=1")
|
||||
set_property(TARGET tbbmalloc APPEND_STRING PROPERTY COMPILE_FLAGS ${DISABLE_RTTI})
|
||||
add_dependencies(tbbmalloc tbb_def_files)
|
||||
if (APPLE)
|
||||
set_property(TARGET tbbmalloc APPEND PROPERTY LINK_FLAGS "-Wl,-exported_symbols_list,\"${CMAKE_CURRENT_BINARY_DIR}/tbbmalloc.def\"")
|
||||
elseif (MSVC)
|
||||
set_property(TARGET tbbmalloc APPEND PROPERTY LINK_FLAGS "/DEF:\"${CMAKE_CURRENT_BINARY_DIR}/tbbmalloc.def\"")
|
||||
else ()
|
||||
set_property(TARGET tbbmalloc APPEND PROPERTY LINK_FLAGS "-Wl,-version-script,\"${CMAKE_CURRENT_BINARY_DIR}/tbbmalloc.def\"")
|
||||
endif()
|
||||
if (MSVC)
|
||||
target_compile_definitions(tbbmalloc PUBLIC __TBB_NO_IMPLICIT_LINKAGE=1 __TBBMALLOC_NO_IMPLICIT_LINKAGE=1)
|
||||
endif()
|
||||
install(TARGETS tbbmalloc
|
||||
LIBRARY DESTINATION ${TBB_INSTALL_LIBRARY_DIR}
|
||||
ARCHIVE DESTINATION ${TBB_INSTALL_ARCHIVE_DIR}
|
||||
RUNTIME DESTINATION ${TBB_INSTALL_RUNTIME_DIR})
|
||||
if (UNIX AND NOT APPLE)
|
||||
target_link_libraries(tbbmalloc PUBLIC pthread dl)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(TBB_BUILD_TBBMALLOC_PROXY)
|
||||
# TBB malloc proxy library
|
||||
if (TBB_BUILD_STATIC)
|
||||
add_library(tbbmalloc_proxy_static STATIC ${tbbmalloc_proxy_src})
|
||||
set_property(TARGET tbbmalloc_proxy_static APPEND PROPERTY COMPILE_DEFINITIONS "__TBBMALLOC_BUILD=1")
|
||||
set_property(TARGET tbbmalloc_proxy_static APPEND_STRING PROPERTY COMPILE_FLAGS ${DISABLE_RTTI})
|
||||
install(TARGETS tbbmalloc_proxy_static ARCHIVE DESTINATION ${TBB_INSTALL_ARCHIVE_DIR})
|
||||
endif()
|
||||
|
||||
if (TBB_BUILD_SHARED)
|
||||
add_library(tbbmalloc_proxy SHARED ${tbbmalloc_proxy_src})
|
||||
set_property(TARGET tbbmalloc_proxy APPEND PROPERTY COMPILE_DEFINITIONS "__TBBMALLOC_BUILD=1")
|
||||
set_property(TARGET tbbmalloc_proxy APPEND_STRING PROPERTY COMPILE_FLAGS ${DISABLE_RTTI})
|
||||
target_link_libraries(tbbmalloc_proxy PUBLIC tbbmalloc)
|
||||
install(TARGETS tbbmalloc_proxy
|
||||
LIBRARY DESTINATION ${TBB_INSTALL_LIBRARY_DIR}
|
||||
ARCHIVE DESTINATION ${TBB_INSTALL_ARCHIVE_DIR}
|
||||
RUNTIME DESTINATION ${TBB_INSTALL_RUNTIME_DIR})
|
||||
if (UNIX AND NOT APPLE)
|
||||
target_link_libraries(tbbmalloc_proxy PUBLIC pthread dl)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
install(DIRECTORY "${TBB_ROOT_DIR}/include/tbb" DESTINATION ${TBB_INSTALL_INCLUDE_DIR})
|
||||
|
||||
# version_string.ver
|
||||
if (UNIX)
|
||||
execute_process (COMMAND date "+%a, %d %b %Y %H:%M:%S %z"
|
||||
OUTPUT_VARIABLE _configure_date
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
elseif (WIN32)
|
||||
execute_process (COMMAND cmd " /C date /T"
|
||||
OUTPUT_VARIABLE _configure_date
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
else ()
|
||||
set (_configure_date "Unknown")
|
||||
endif()
|
||||
include_directories (${CMAKE_BINARY_DIR})
|
||||
configure_file (extra/version_string.ver.in version_string.ver @ONLY)
|
@ -1,11 +0,0 @@
|
||||
#define __TBB_VERSION_STRINGS(N) \
|
||||
#N": BUILD_HOST @CMAKE_SYSTEM_NAME@" ENDL \
|
||||
#N": BUILD_OS @CMAKE_SYSTEM@" ENDL \
|
||||
#N": BUILD_KERNEL @CMAKE_SYSTEM_VERSION@" ENDL \
|
||||
#N": BUILD_GCC @CMAKE_CXX_COMPILER_ID@" ENDL \
|
||||
#N": BUILD_LIBC Unknown" ENDL \
|
||||
#N": BUILD_LD Unknown" ENDL \
|
||||
#N": BUILD_TARGET Unknown" ENDL \
|
||||
#N": BUILD_COMMAND Unknown" ENDL
|
||||
|
||||
#define __TBB_DATETIME "@_configure_date@"
|
@ -22,7 +22,7 @@ namespace {
|
||||
template <class T>
|
||||
struct Vec256 {
|
||||
static constexpr int size = 32 / sizeof(T);
|
||||
T values[32 / sizeof(T)];
|
||||
T values[32 / sizeof(T)] = {0};
|
||||
Vec256() {}
|
||||
Vec256(T val) {
|
||||
for (int i = 0; i != size; i++) {
|
||||
|
@ -23,7 +23,7 @@ void host_softmax(Tensor output, const Tensor& input, const int64_t dim) {
|
||||
int64_t outer_stride = dim_size * dim_stride;
|
||||
scalar_t* input_data_base = input.data<scalar_t>();
|
||||
scalar_t* output_data_base = output.data<scalar_t>();
|
||||
int64_t grain_size = std::min(internal::TBB_GRAIN_SIZE / dim_size, (int64_t)1);
|
||||
int64_t grain_size = std::min(internal::GRAIN_SIZE / dim_size, (int64_t)1);
|
||||
parallel_for(
|
||||
0, outer_size * inner_size, grain_size,
|
||||
[&](int64_t begin, int64_t end) {
|
||||
@ -80,7 +80,7 @@ void host_softmax_backward(
|
||||
scalar_t* gradInput_data_base = gI.data<scalar_t>();
|
||||
scalar_t* output_data_base = output.data<scalar_t>();
|
||||
scalar_t* gradOutput_data_base = grad.data<scalar_t>();
|
||||
int64_t grain_size = std::min(internal::TBB_GRAIN_SIZE / dim_size, (int64_t)1);
|
||||
int64_t grain_size = std::min(internal::GRAIN_SIZE / dim_size, (int64_t)1);
|
||||
parallel_for(
|
||||
0, outer_size * inner_size, grain_size, [&](int64_t begin, int64_t end) {
|
||||
for (int64_t i = begin; i < end; i++) {
|
||||
|
@ -9,12 +9,6 @@
|
||||
#include "ATen/cpu/vec256/vec256.h"
|
||||
#include "ATen/optional.h"
|
||||
|
||||
#ifdef __PPC64__
|
||||
using default_partitioner_type = tbb::simple_partitioner;
|
||||
#else
|
||||
using default_partitioner_type = tbb::affinity_partitioner;
|
||||
#endif
|
||||
|
||||
namespace at { namespace native { namespace {
|
||||
|
||||
using namespace vec256;
|
||||
@ -23,19 +17,22 @@ static inline int64_t round_down(int64_t a, int64_t m) {
|
||||
return a - (a % m);
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
static void parallel_for(int64_t end, int64_t step, bool parallelize, F func) {
|
||||
template <typename F>
|
||||
static void _parallel_for(int64_t size, int64_t step, bool parallelize, F func) {
|
||||
if (parallelize) {
|
||||
tbb::parallel_for<int64_t>(0, end, step, func);
|
||||
parallel_for(0, size / step, 1, [func, step](int64_t begin, int64_t end) {
|
||||
int64_t k = begin * step;
|
||||
for (int64_t i = begin; i < end; i++, k += step) {
|
||||
func(k);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
for (int64_t i = 0; i != end; i += step) {
|
||||
for (int64_t i = 0; i != size; i += step) {
|
||||
func(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static default_partitioner_type ap;
|
||||
|
||||
// Vectorized reduction defined by reduce operation `Op` with identity `ident`.
|
||||
// The reduction is built on top of reduce128, which reduces down a column
|
||||
// 128 bytes wide (WIDTH scalar elements). The width of 128 bytes is chosen
|
||||
@ -50,8 +47,6 @@ struct Reduction {
|
||||
using ReduceScalar = Op<scalar_t>;
|
||||
|
||||
static void apply(Tensor& res, const Tensor& self, at::optional<int64_t> dim) {
|
||||
internal::init_tbb_num_threads();
|
||||
|
||||
auto out = res.data<scalar_t>();
|
||||
auto data = self.data<scalar_t>();
|
||||
auto numel = self.numel();
|
||||
@ -71,8 +66,8 @@ struct Reduction {
|
||||
}
|
||||
}
|
||||
int64_t batch = numel / (n * stride);
|
||||
bool paralellize = batch * n > internal::TBB_GRAIN_SIZE;
|
||||
parallel_for(batch, 1, paralellize, [=](int64_t b) {
|
||||
bool paralellize = batch * n > internal::GRAIN_SIZE;
|
||||
_parallel_for(batch, 1, paralellize, [=](int64_t b) {
|
||||
if (stride == 1) {
|
||||
out[b] = reduce_all(&data[b * n], n);
|
||||
} else {
|
||||
@ -84,23 +79,17 @@ struct Reduction {
|
||||
static scalar_t reduce_all(const scalar_t* data, int64_t size) {
|
||||
int64_t k = size / WIDTH;
|
||||
|
||||
scalar_t sum;
|
||||
if (size > internal::TBB_GRAIN_SIZE) {
|
||||
sum = tbb::parallel_reduce(
|
||||
tbb::blocked_range<int64_t>(0, k, internal::TBB_GRAIN_SIZE / WIDTH),
|
||||
scalar_t(ident),
|
||||
[=](const tbb::blocked_range<int64_t>& r, scalar_t init) {
|
||||
scalar_t buf[WIDTH];
|
||||
reduce128(&data[r.begin() * WIDTH], buf, r.end() - r.begin(), WIDTH);
|
||||
return std::accumulate(buf, buf + WIDTH, init, ReduceScalar());
|
||||
},
|
||||
ReduceScalar(),
|
||||
ap);
|
||||
} else {
|
||||
scalar_t buf[WIDTH];
|
||||
reduce128(data, buf, k, WIDTH);
|
||||
sum = std::accumulate(buf, buf + WIDTH, scalar_t(ident), ReduceScalar());
|
||||
}
|
||||
scalar_t sum = parallel_reduce(
|
||||
0,
|
||||
k,
|
||||
internal::GRAIN_SIZE / WIDTH,
|
||||
(scalar_t)ident,
|
||||
[data](int64_t begin, int64_t end, scalar_t init) {
|
||||
scalar_t buf[WIDTH];
|
||||
reduce128(&data[begin * WIDTH], buf, end - begin, WIDTH);
|
||||
return std::accumulate(buf, buf + WIDTH, init, ReduceScalar());
|
||||
},
|
||||
ReduceScalar());
|
||||
|
||||
for (int64_t i = k * WIDTH; i != size; i++) {
|
||||
sum = ReduceScalar()(sum, data[i]);
|
||||
@ -127,8 +116,8 @@ struct Reduction {
|
||||
// Reduce a 2d matrix down each column. Stores the results in out[0 ... cols-1]
|
||||
static void reduce2d(const scalar_t* data, scalar_t* out, int64_t rows, int64_t cols, int64_t stride) {
|
||||
int64_t cols_rounded = round_down(cols, WIDTH);
|
||||
bool paralellize = cols * rows > internal::TBB_GRAIN_SIZE;
|
||||
parallel_for(cols_rounded, WIDTH, paralellize, [=](int64_t col) {
|
||||
bool paralellize = cols * rows > internal::GRAIN_SIZE;
|
||||
_parallel_for(cols_rounded, WIDTH, paralellize, [=](int64_t col) {
|
||||
reduce128(&data[col], &out[col], rows, stride);
|
||||
});
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
// compiled with AVX/AVX2 This is because of SSE-AVX transitions and a bug in
|
||||
// Glibc2.23 See https://bugs.launchpad.net/ubuntu/+source/glibc/+bug/1663280
|
||||
//
|
||||
// On grainsize: The grainsize is chosen to roughly get TBB_GRAIN_SIZE number of
|
||||
// On grainsize: The grainsize is chosen to roughly get GRAIN_SIZE number of
|
||||
// computations per task. Each task works across dim_size elements. 16 should be
|
||||
// a very rough approximation of the number of computations per dim_size element
|
||||
// by counting simple computations (*, +, -) as 1 and exp or log as 4.
|
||||
@ -30,7 +30,7 @@ inline void _vec_log_softmax_lastdim(
|
||||
int64_t dim_size) {
|
||||
using Vec = vec256::Vec256<scalar_t>;
|
||||
static constexpr int64_t CHUNK_SIZE = (128 / sizeof(scalar_t)) * Vec::size;
|
||||
int64_t grain_size = internal::TBB_GRAIN_SIZE / (16 * dim_size * CHUNK_SIZE);
|
||||
int64_t grain_size = internal::GRAIN_SIZE / (16 * dim_size * CHUNK_SIZE);
|
||||
if (grain_size < CHUNK_SIZE)
|
||||
grain_size = CHUNK_SIZE;
|
||||
|
||||
@ -93,7 +93,7 @@ inline void _vec_softmax_lastdim(
|
||||
int64_t outer_size,
|
||||
int64_t dim_size) {
|
||||
using Vec = vec256::Vec256<scalar_t>;
|
||||
int64_t grain_size = internal::TBB_GRAIN_SIZE / (16 * dim_size);
|
||||
int64_t grain_size = internal::GRAIN_SIZE / (16 * dim_size);
|
||||
if (grain_size < 1)
|
||||
grain_size = 1;
|
||||
|
||||
@ -134,7 +134,7 @@ inline void _vec_host_softmax_backward_lastdim(
|
||||
int64_t outer_size,
|
||||
int64_t dim_size) {
|
||||
using Vec = vec256::Vec256<scalar_t>;
|
||||
int64_t grain_size = internal::TBB_GRAIN_SIZE / (16 * dim_size);
|
||||
int64_t grain_size = internal::GRAIN_SIZE / (16 * dim_size);
|
||||
if (grain_size < 1)
|
||||
grain_size = 1;
|
||||
|
||||
|
1
setup.py
1
setup.py
@ -355,7 +355,6 @@ class build_deps(PytorchCommand):
|
||||
check_file(os.path.join(third_party_path, "nanopb", "CMakeLists.txt"))
|
||||
check_file(os.path.join(third_party_path, "pybind11", "CMakeLists.txt"))
|
||||
check_file(os.path.join(third_party_path, 'cpuinfo', 'CMakeLists.txt'))
|
||||
check_file(os.path.join(third_party_path, 'tbb', 'Makefile'))
|
||||
check_file(os.path.join(third_party_path, 'catch', 'CMakeLists.txt'))
|
||||
check_file(os.path.join(third_party_path, 'onnx', 'CMakeLists.txt'))
|
||||
|
||||
|
1
third_party/tbb
vendored
1
third_party/tbb
vendored
Submodule third_party/tbb deleted from 633b01ad27
@ -27,7 +27,7 @@ git fetch fullrepo
|
||||
git checkout -b temporary-split-branch fullrepo/master
|
||||
# Cribbed from https://stackoverflow.com/questions/2982055/detach-many-subdirectories-into-a-new-separate-git-repository
|
||||
# and https://stackoverflow.com/questions/42355621/git-filter-branch-moving-a-folder-with-index-filter-does-not-work
|
||||
git filter-branch -f --index-filter 'git rm --cached -qr --ignore-unmatch -- . && git reset -q $GIT_COMMIT -- aten cmake third_party/tbb third_party/catch third_party/cpuinfo && (git ls-files -s | sed "s-.travis.aten.yml-.travis.yml-" | sed "s-.gitmodules.aten-.gitmodules-" | git update-index --index-info)'
|
||||
git filter-branch -f --index-filter 'git rm --cached -qr --ignore-unmatch -- . && git reset -q $GIT_COMMIT -- aten cmake third_party/catch third_party/cpuinfo && (git ls-files -s | sed "s-.travis.aten.yml-.travis.yml-" | sed "s-.gitmodules.aten-.gitmodules-" | git update-index --index-info)'
|
||||
git checkout master
|
||||
git merge temporary-split-branch
|
||||
git push
|
||||
|
Reference in New Issue
Block a user