From 481b6d026848b6d94edc0d84f32fb59c50590625 Mon Sep 17 00:00:00 2001 From: Ilia Cherniavskii Date: Mon, 6 May 2019 19:25:55 -0700 Subject: [PATCH] Allow a non-OpenMP based build (#19749) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/19749 ghimport-source-id: a6636c0acddbdc5fd5b0dcb20b9f80cbdb9159b9 Differential Revision: D15141993 Pulled By: ilia-cher fbshipit-source-id: 96085608398b2a4c97c68b2948f5184d07f9ad3d --- .jenkins/pytorch/test.sh | 1 + aten/src/ATen/Parallel.cpp | 39 ++++++++ aten/src/ATen/Parallel.h | 3 + aten/src/ATen/Version.cpp | 121 +++++++++++++++--------- aten/src/ATen/Version.h | 6 ++ aten/src/ATen/test/thread_init_test.cpp | 8 +- binaries/CMakeLists.txt | 5 + binaries/parallel_info.cc | 41 ++++++++ cmake/Modules/FindMKLDNN.cmake | 2 +- docs/source/__config__.rst | 1 + setup.py | 9 ++ test/test_torch.py | 3 + tools/build_pytorch_libs.py | 10 ++ torch/CMakeLists.txt | 2 +- torch/__config__.py | 4 + torch/csrc/Module.cpp | 8 ++ 16 files changed, 211 insertions(+), 52 deletions(-) create mode 100644 binaries/parallel_info.cc diff --git a/.jenkins/pytorch/test.sh b/.jenkins/pytorch/test.sh index 4143e923df15..03c96cb16e54 100755 --- a/.jenkins/pytorch/test.sh +++ b/.jenkins/pytorch/test.sh @@ -203,6 +203,7 @@ test_xla() { } (cd test && python -c "import torch; print(torch.__config__.show())") +(cd test && python -c "import torch; print(torch.__config__.parallel_info())") if [[ "${BUILD_ENVIRONMENT}" == *xla* ]]; then test_torchvision diff --git a/aten/src/ATen/Parallel.cpp b/aten/src/ATen/Parallel.cpp index b79bb99bb33b..0a965f7dba06 100644 --- a/aten/src/ATen/Parallel.cpp +++ b/aten/src/ATen/Parallel.cpp @@ -1,6 +1,10 @@ #include +#include +#include + #include +#include #ifdef TH_BLAS_MKL #include @@ -60,6 +64,41 @@ size_t get_num_threads() { #endif } +namespace { +const char* get_env_var(const char* var_name) { + const char* value = std::getenv(var_name); + return value ? value : "[not set]"; +} +} + +std::string get_parallel_info() { + std::ostringstream ss; + + ss << "ATen/Parallel:\n\tat::get_num_threads() : " + << at::get_num_threads() << std::endl; + + ss << at::get_openmp_version() << std::endl; +#ifdef _OPENMP + ss << "\tomp_get_max_threads() : " << omp_get_max_threads() << std::endl; +#endif + + ss << at::get_mkl_version() << std::endl; +#ifdef TH_BLAS_MKL + ss << "\tmkl_get_max_threads() : " << mkl_get_max_threads() << std::endl; +#endif + + ss << at::get_mkldnn_version() << std::endl; + + ss << "std::thread::hardware_concurrency() : " + << std::thread::hardware_concurrency() << std::endl; + + ss << "Environment variables:" << std::endl; + ss << "\tOMP_NUM_THREADS : " << get_env_var("OMP_NUM_THREADS") << std::endl; + ss << "\tMKL_NUM_THREADS : " << get_env_var("MKL_NUM_THREADS") << std::endl; + + return ss.str(); +} + PTThreadPool::PTThreadPool( std::size_t pool_size, int numa_node_id) diff --git a/aten/src/ATen/Parallel.h b/aten/src/ATen/Parallel.h index d7a3b893017d..087a042e8470 100644 --- a/aten/src/ATen/Parallel.h +++ b/aten/src/ATen/Parallel.h @@ -143,6 +143,9 @@ inline scalar_t parallel_reduce( } } +// Returns a detailed string describing parallelization settings +CAFFE2_API std::string get_parallel_info(); + class CAFFE2_API PTThreadPool : public c10::ThreadPool { public: explicit PTThreadPool( diff --git a/aten/src/ATen/Version.cpp b/aten/src/ATen/Version.cpp index 6a2d727ba78b..3c010874df70 100644 --- a/aten/src/ATen/Version.cpp +++ b/aten/src/ATen/Version.cpp @@ -16,6 +16,78 @@ namespace at { +std::string get_mkl_version() { + std::string version; + #if AT_MKL_ENABLED() + { + // Magic buffer number is from MKL documentation + // https://software.intel.com/en-us/mkl-developer-reference-c-mkl-get-version-string + char buf[198]; + mkl_get_version_string(buf, 198); + version = buf; + } + #else + version = "MKL not found"; + #endif + return version; +} + +std::string get_mkldnn_version() { + std::ostringstream ss; + #if AT_MKLDNN_ENABLED() + // Cribbed from mkl-dnn/src/common/verbose.cpp + // Too bad: can't get ISA info conveniently :( + // Apparently no way to get ideep version? + // https://github.com/intel/ideep/issues/29 + { + const mkldnn_version_t* ver = mkldnn_version(); + ss << "Intel(R) MKL-DNN v" << ver->major << "." << ver->minor << "." << ver->patch + << " (Git Hash " << ver->hash << ")"; + } + #else + ss << "MKLDNN not found"; + #endif + return ss.str(); +} + +std::string get_openmp_version() { + std::ostringstream ss; + #ifdef _OPENMP + { + ss << "OpenMP " << _OPENMP; + // Reference: + // https://stackoverflow.com/questions/1304363/how-to-check-the-version-of-openmp-on-linux + const char* ver_str = nullptr; + switch (_OPENMP) { + case 200505: + ver_str = "2.5"; + break; + case 200805: + ver_str = "3.0"; + break; + case 201107: + ver_str = "3.1"; + break; + case 201307: + ver_str = "4.0"; + break; + case 201511: + ver_str = "4.5"; + break; + default: + ver_str = nullptr; + break; + } + if (ver_str) { + ss << " (a.k.a. OpenMP " << ver_str << ")"; + } + } + #else + ss << "OpenMP not found"; + #endif + return ss.str(); +} + std::string show_config() { std::ostringstream ss; ss << "PyTorch built with:\n"; // TODO add the version of PyTorch @@ -42,58 +114,15 @@ std::string show_config() { #endif #if AT_MKL_ENABLED() - { - // Magic buffer number is from MKL documentation - // https://software.intel.com/en-us/mkl-developer-reference-c-mkl-get-version-string - char buf[198]; - mkl_get_version_string(buf, 198); - ss << " - " << buf << "\n"; - } + ss << " - " << get_mkl_version() << "\n"; #endif #if AT_MKLDNN_ENABLED() - // Cribbed from mkl-dnn/src/common/verbose.cpp - // Too bad: can't get ISA info conveniently :( - // Apparently no way to get ideep version? - // https://github.com/intel/ideep/issues/29 - { - const mkldnn_version_t* ver = mkldnn_version(); - ss << " - Intel(R) MKL-DNN v" << ver->major << "." << ver->minor << "." << ver->patch - << " (Git Hash " << ver->hash << ")\n"; - } + ss << " - " << get_mkldnn_version() << "\n"; #endif #ifdef _OPENMP - { - ss << " - OpenMP " << _OPENMP; - // Reference: - // https://stackoverflow.com/questions/1304363/how-to-check-the-version-of-openmp-on-linux - const char* ver_str = nullptr; - switch (_OPENMP) { - case 200505: - ver_str = "2.5"; - break; - case 200805: - ver_str = "3.0"; - break; - case 201107: - ver_str = "3.1"; - break; - case 201307: - ver_str = "4.0"; - break; - case 201511: - ver_str = "4.5"; - break; - default: - ver_str = nullptr; - break; - } - if (ver_str) { - ss << " (a.k.a. OpenMP " << ver_str << ")"; - } - ss << "\n"; - } + ss << " - " << get_openmp_version() << "\n"; #endif #ifdef USE_LAPACK diff --git a/aten/src/ATen/Version.h b/aten/src/ATen/Version.h index 0385177a6373..18fd31d3ed87 100644 --- a/aten/src/ATen/Version.h +++ b/aten/src/ATen/Version.h @@ -5,4 +5,10 @@ namespace at { /// Returns a detailed string describing the configuration PyTorch. CAFFE2_API std::string show_config(); +CAFFE2_API std::string get_mkl_version(); + +CAFFE2_API std::string get_mkldnn_version(); + +CAFFE2_API std::string get_openmp_version(); + } // namespace at diff --git a/aten/src/ATen/test/thread_init_test.cpp b/aten/src/ATen/test/thread_init_test.cpp index adbb324592a7..1c0d8576e32d 100644 --- a/aten/src/ATen/test/thread_init_test.cpp +++ b/aten/src/ATen/test/thread_init_test.cpp @@ -28,15 +28,15 @@ int main() { t1.join(); at::set_num_threads(4); - std::thread t2(test, 4); - std::thread t3(test, 4); - std::thread t4(test, 4); + std::thread t2(test, at::get_num_threads()); + std::thread t3(test, at::get_num_threads()); + std::thread t4(test, at::get_num_threads()); t4.join(); t3.join(); t2.join(); at::set_num_threads(5); - test(5); + test(at::get_num_threads()); return 0; } diff --git a/binaries/CMakeLists.txt b/binaries/CMakeLists.txt index 81320d21fb4c..0f656ff898dd 100644 --- a/binaries/CMakeLists.txt +++ b/binaries/CMakeLists.txt @@ -2,6 +2,11 @@ caffe2_binary_target("convert_caffe_image_db.cc") caffe2_binary_target("convert_db.cc") caffe2_binary_target("make_cifar_db.cc") caffe2_binary_target("make_mnist_db.cc") +if (NOT ANDROID) + caffe2_binary_target("parallel_info.cc") + target_include_directories(parallel_info PUBLIC + ${CMAKE_BINARY_DIR}/aten/src) # provides "ATen/TypeExtendedInterface.h" to ATen.h +endif() caffe2_binary_target("predictor_verifier.cc") caffe2_binary_target("print_registered_core_operators.cc") caffe2_binary_target("run_plan.cc") diff --git a/binaries/parallel_info.cc b/binaries/parallel_info.cc new file mode 100644 index 000000000000..eb12278a29d9 --- /dev/null +++ b/binaries/parallel_info.cc @@ -0,0 +1,41 @@ +/** + * Copyright (c) 2016-present, Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ATen/Parallel.h" + +#include +#include + +#ifdef __linux__ +#include +#include +#endif + +int main(int argc, char** argv) { + at::init_num_threads(); + + std::cout << at::get_parallel_info() << std::endl; + +# ifdef __linux__ + std::ostringstream cmd; + cmd << "lsof -p " << getpid() << " | grep .so"; + std::cout << "Loaded .so:" << std::endl; + std::cout << cmd.str() << std::endl; + std::system(cmd.str().c_str()); +# endif + + return 0; +} diff --git a/cmake/Modules/FindMKLDNN.cmake b/cmake/Modules/FindMKLDNN.cmake index 27a0ed3cd34c..393b64c5e20f 100644 --- a/cmake/Modules/FindMKLDNN.cmake +++ b/cmake/Modules/FindMKLDNN.cmake @@ -89,7 +89,7 @@ ENDIF(MKL_FOUND) IF(MKL_FOUND) SET(MKL_cmake_included TRUE) - SET(MKLDNN_THREADING "OMP:COMP" CACHE STRING "" FORCE) + SET(MKLDNN_THREADING "OMP:COMP" CACHE STRING "") ENDIF(MKL_FOUND) SET(WITH_TEST FALSE CACHE BOOL "" FORCE) SET(WITH_EXAMPLE FALSE CACHE BOOL "" FORCE) diff --git a/docs/source/__config__.rst b/docs/source/__config__.rst index fb89c33fcb0e..e4a6ac890493 100644 --- a/docs/source/__config__.rst +++ b/docs/source/__config__.rst @@ -4,3 +4,4 @@ torch.__config__ .. automodule:: torch.__config__ .. autofunction:: show +.. autofunction:: parallel_info diff --git a/setup.py b/setup.py index 6e97f61c08e5..441bc08d7082 100644 --- a/setup.py +++ b/setup.py @@ -42,6 +42,9 @@ # USE_MKLDNN=0 # disables use of MKLDNN # +# MKLDNN_THREADING +# MKL-DNN threading mode (https://github.com/intel/mkl-dnn/) +# # USE_NNPACK=0 # disables NNPACK build # @@ -64,6 +67,9 @@ # USE_OPENCV # enables use of OpenCV for additional operators # +# USE_OPENMP=0 +# disables use of OpenMP for parallelization +# # USE_FFMPEG # enables use of ffmpeg for additional operators # @@ -96,6 +102,9 @@ # then the build will fail if the requested BLAS is not found, otherwise # the BLAS will be chosen based on what is found on your system. # +# MKL_SEQ=1 +# chooses a sequential version of MKL library (in case of BLAS=MKL) +# # USE_FBGEMM # Enables use of FBGEMM # diff --git a/test/test_torch.py b/test/test_torch.py index 0226c8b87c37..fb94a8e66951 100644 --- a/test/test_torch.py +++ b/test/test_torch.py @@ -10996,6 +10996,9 @@ tensor([[[1., 1., 1., ..., 1., 1., 1.], # We can't usefully test the output; just make sure this doesn't crash torch.__config__.show() + def test_parallel_info(self): + torch.__config__.parallel_info() + @staticmethod def _test_bincount(self, device): # negative input throws diff --git a/tools/build_pytorch_libs.py b/tools/build_pytorch_libs.py index 432fed3bc809..7c4fd2e03751 100644 --- a/tools/build_pytorch_libs.py +++ b/tools/build_pytorch_libs.py @@ -208,6 +208,16 @@ def run_cmake(version, USE_GFLAGS=os.getenv('USE_GFLAGS'), WERROR=os.getenv('WERROR')) + if os.getenv('USE_OPENMP'): + cmake_defines(cmake_args, USE_OPENMP=check_env_flag('USE_OPENMP')) + + if os.getenv('MKL_SEQ'): + cmake_defines(cmake_args, INTEL_MKL_SEQUENTIAL=check_env_flag('MKL_SEQ')) + + mkldnn_threading = os.getenv('MKLDNN_THREADING') + if mkldnn_threading: + cmake_defines(cmake_args, MKLDNN_THREADING=mkldnn_threading) + if USE_GLOO_IBVERBS: cmake_defines(cmake_args, USE_IBVERBS="1", USE_GLOO_IBVERBS="1") diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt index d3eb13ae240c..874a6e917ac2 100644 --- a/torch/CMakeLists.txt +++ b/torch/CMakeLists.txt @@ -337,7 +337,7 @@ endif() target_link_libraries(torch caffe2_library) find_package(OpenMP QUIET) -if(OPENMP_FOUND) +if(USE_OPENMP AND OPENMP_FOUND) message(STATUS "pytorch is compiling with OpenMP. \n" "OpenMP CXX_FLAGS: ${OpenMP_CXX_FLAGS}. \n" "OpenMP libraries: ${OpenMP_CXX_LIBRARIES}.") diff --git a/torch/__config__.py b/torch/__config__.py index cb482645bebb..e4c3fde9ec3c 100644 --- a/torch/__config__.py +++ b/torch/__config__.py @@ -11,3 +11,7 @@ def show(): # TODO: In principle, we could provide more structured version/config # information here. We're not for now; considering doing so if someone # asks for it. + +def parallel_info(): + r"""Returns detailed string with parallelization settings""" + return torch._C._parallel_info() diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp index fcd23f308d1b..b0658e824213 100644 --- a/torch/csrc/Module.cpp +++ b/torch/csrc/Module.cpp @@ -306,6 +306,13 @@ static PyObject *THPModule_showConfig(PyObject *module) END_HANDLE_TH_ERRORS } +static PyObject *THPModule_parallelInfo(PyObject *module) +{ + HANDLE_TH_ERRORS + return THPUtils_packString(at::get_parallel_info()); + END_HANDLE_TH_ERRORS +} + void DLPack_Capsule_Destructor(PyObject* data) { HANDLE_TH_ERRORS DLManagedTensor * dlMTensor = (DLManagedTensor *)PyCapsule_GetPointer(data, "dltensor"); @@ -441,6 +448,7 @@ static PyMethodDef TorchMethods[] = { {"_crash_if_csrc_ubsan", (PyCFunction)THPModule_crashIfCsrcUBSAN, METH_O, nullptr}, {"_crash_if_aten_asan", (PyCFunction)THPModule_crashIfATenASAN, METH_O, nullptr}, {"_show_config", (PyCFunction)THPModule_showConfig, METH_NOARGS, nullptr}, + {"_parallel_info", (PyCFunction)THPModule_parallelInfo, METH_NOARGS, nullptr}, {"_set_backcompat_broadcast_warn", (PyCFunction)THPModule_setBackcompatBroadcastWarn, METH_O, nullptr}, {"_get_backcompat_broadcast_warn", (PyCFunction)THPModule_getBackcompatBroadcastWarn, METH_NOARGS, nullptr}, {"_set_backcompat_keepdim_warn", (PyCFunction)THPModule_setBackcompatKeepdimWarn, METH_O, nullptr},