Allow a non-OpenMP based build (#19749)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/19749 ghimport-source-id: a6636c0acddbdc5fd5b0dcb20b9f80cbdb9159b9 Differential Revision: D15141993 Pulled By: ilia-cher fbshipit-source-id: 96085608398b2a4c97c68b2948f5184d07f9ad3d
2025-10-20 21:14:14 +08:00 · 2019-05-06 19:25:55 -07:00
parent 8c97f0b19e
commit 481b6d0268
16 changed files with 211 additions and 52 deletions
--- a/.jenkins/pytorch/test.sh
+++ b/.jenkins/pytorch/test.sh
@ -203,6 +203,7 @@ test_xla() {
 }

 (cd test && python -c "import torch; print(torch.__config__.show())")
+(cd test && python -c "import torch; print(torch.__config__.parallel_info())")

 if [[ "${BUILD_ENVIRONMENT}" == *xla* ]]; then
  test_torchvision
--- a/aten/src/ATen/Parallel.cpp
+++ b/aten/src/ATen/Parallel.cpp
@ -1,6 +1,10 @@
 #include <ATen/Parallel.h>

+#include <ATen/Config.h>
+#include <ATen/Version.h>
+
 #include <atomic>
+#include <sstream>

 #ifdef TH_BLAS_MKL
 #include <mkl.h>
@ -60,6 +64,41 @@ size_t get_num_threads() {
 #endif
 }

+namespace {
+const char* get_env_var(const char* var_name) {
+  const char* value = std::getenv(var_name);
+  return value ? value : "[not set]";
+}
+}
+
+std::string get_parallel_info() {
+  std::ostringstream ss;
+
+  ss << "ATen/Parallel:\n\tat::get_num_threads() : "
+     << at::get_num_threads() << std::endl;
+
+  ss << at::get_openmp_version() << std::endl;
+#ifdef _OPENMP
+  ss << "\tomp_get_max_threads() : " << omp_get_max_threads() << std::endl;
+#endif
+
+  ss << at::get_mkl_version() << std::endl;
+#ifdef TH_BLAS_MKL
+  ss << "\tmkl_get_max_threads() : " << mkl_get_max_threads() << std::endl;
+#endif
+
+  ss << at::get_mkldnn_version() << std::endl;
+
+  ss << "std::thread::hardware_concurrency() : "
+     << std::thread::hardware_concurrency() << std::endl;
+
+  ss << "Environment variables:" << std::endl;
+  ss << "\tOMP_NUM_THREADS : " << get_env_var("OMP_NUM_THREADS") << std::endl;
+  ss << "\tMKL_NUM_THREADS : " << get_env_var("MKL_NUM_THREADS") << std::endl;
+
+  return ss.str();
+}
+
 PTThreadPool::PTThreadPool(
    std::size_t pool_size,
    int numa_node_id)
--- a/aten/src/ATen/Parallel.h
+++ b/aten/src/ATen/Parallel.h
@ -143,6 +143,9 @@ inline scalar_t parallel_reduce(
  }
 }

+// Returns a detailed string describing parallelization settings
+CAFFE2_API std::string get_parallel_info();
+
 class CAFFE2_API PTThreadPool : public c10::ThreadPool {
 public:
  explicit PTThreadPool(
--- a/aten/src/ATen/Version.cpp
+++ b/aten/src/ATen/Version.cpp
@ -16,6 +16,78 @@

 namespace at {

+std::string get_mkl_version() {
+  std::string version;
+  #if AT_MKL_ENABLED()
+    {
+      // Magic buffer number is from MKL documentation
+      // https://software.intel.com/en-us/mkl-developer-reference-c-mkl-get-version-string
+      char buf[198];
+      mkl_get_version_string(buf, 198);
+      version = buf;
+    }
+  #else
+    version = "MKL not found";
+  #endif
+  return version;
+}
+
+std::string get_mkldnn_version() {
+  std::ostringstream ss;
+  #if AT_MKLDNN_ENABLED()
+    // Cribbed from mkl-dnn/src/common/verbose.cpp
+    // Too bad: can't get ISA info conveniently :(
+    // Apparently no way to get ideep version?
+    // https://github.com/intel/ideep/issues/29
+    {
+      const mkldnn_version_t* ver = mkldnn_version();
+      ss << "Intel(R) MKL-DNN v" << ver->major << "." << ver->minor << "." << ver->patch
+         << " (Git Hash " << ver->hash << ")";
+    }
+  #else
+    ss << "MKLDNN not found";
+  #endif
+  return ss.str();
+}
+
+std::string get_openmp_version() {
+  std::ostringstream ss;
+  #ifdef _OPENMP
+    {
+      ss << "OpenMP " << _OPENMP;
+      // Reference:
+      // https://stackoverflow.com/questions/1304363/how-to-check-the-version-of-openmp-on-linux
+      const char* ver_str = nullptr;
+      switch (_OPENMP) {
+        case 200505:
+          ver_str = "2.5";
+          break;
+        case 200805:
+          ver_str = "3.0";
+          break;
+        case 201107:
+          ver_str = "3.1";
+          break;
+        case 201307:
+          ver_str = "4.0";
+          break;
+        case 201511:
+          ver_str = "4.5";
+          break;
+        default:
+          ver_str = nullptr;
+          break;
+      }
+      if (ver_str) {
+        ss << " (a.k.a. OpenMP " << ver_str << ")";
+      }
+    }
+  #else
+    ss << "OpenMP not found";
+  #endif
+  return ss.str();
+}
+
 std::string show_config() {
  std::ostringstream ss;
  ss << "PyTorch built with:\n"; // TODO add the version of PyTorch
@ -42,58 +114,15 @@ std::string show_config() {
 #endif

 #if AT_MKL_ENABLED()
-  {
-    // Magic buffer number is from MKL documentation
-    // https://software.intel.com/en-us/mkl-developer-reference-c-mkl-get-version-string
-    char buf[198];
-    mkl_get_version_string(buf, 198);
-    ss << "  - " << buf << "\n";
-  }
+  ss << "  - " << get_mkl_version() << "\n";
 #endif

 #if AT_MKLDNN_ENABLED()
-  // Cribbed from mkl-dnn/src/common/verbose.cpp
-  // Too bad: can't get ISA info conveniently :(
-  // Apparently no way to get ideep version?
-  // https://github.com/intel/ideep/issues/29
-  {
-    const mkldnn_version_t* ver = mkldnn_version();
-    ss << "  - Intel(R) MKL-DNN v" << ver->major << "." << ver->minor << "." << ver->patch
-       << " (Git Hash " << ver->hash << ")\n";
-  }
+  ss << "  - " << get_mkldnn_version() << "\n";
 #endif

 #ifdef _OPENMP
-  {
-    ss << "  - OpenMP " << _OPENMP;
-    // Reference:
-    // https://stackoverflow.com/questions/1304363/how-to-check-the-version-of-openmp-on-linux
-    const char* ver_str = nullptr;
-    switch (_OPENMP) {
-      case 200505:
-        ver_str = "2.5";
-        break;
-      case 200805:
-        ver_str = "3.0";
-        break;
-      case 201107:
-        ver_str = "3.1";
-        break;
-      case 201307:
-        ver_str = "4.0";
-        break;
-      case 201511:
-        ver_str = "4.5";
-        break;
-      default:
-        ver_str = nullptr;
-        break;
-    }
-    if (ver_str) {
-      ss << " (a.k.a. OpenMP " << ver_str << ")";
-    }
-    ss << "\n";
-  }
+  ss << "  - " << get_openmp_version() << "\n";
 #endif

 #ifdef USE_LAPACK
--- a/aten/src/ATen/Version.h
+++ b/aten/src/ATen/Version.h
@ -5,4 +5,10 @@ namespace at {
 /// Returns a detailed string describing the configuration PyTorch.
 CAFFE2_API std::string show_config();

+CAFFE2_API std::string get_mkl_version();
+
+CAFFE2_API std::string get_mkldnn_version();
+
+CAFFE2_API std::string get_openmp_version();
+
 }  // namespace at
--- a/aten/src/ATen/test/thread_init_test.cpp
+++ b/aten/src/ATen/test/thread_init_test.cpp
@ -28,15 +28,15 @@ int main() {
  t1.join();

  at::set_num_threads(4);
-  std::thread t2(test, 4);
-  std::thread t3(test, 4);
-  std::thread t4(test, 4);
+  std::thread t2(test, at::get_num_threads());
+  std::thread t3(test, at::get_num_threads());
+  std::thread t4(test, at::get_num_threads());
  t4.join();
  t3.join();
  t2.join();

  at::set_num_threads(5);
-  test(5);
+  test(at::get_num_threads());

  return 0;
 }
--- a/binaries/CMakeLists.txt
+++ b/binaries/CMakeLists.txt
@ -2,6 +2,11 @@ caffe2_binary_target("convert_caffe_image_db.cc")
 caffe2_binary_target("convert_db.cc")
 caffe2_binary_target("make_cifar_db.cc")
 caffe2_binary_target("make_mnist_db.cc")
+if (NOT ANDROID)
+  caffe2_binary_target("parallel_info.cc")
+  target_include_directories(parallel_info PUBLIC
+    ${CMAKE_BINARY_DIR}/aten/src) # provides "ATen/TypeExtendedInterface.h" to ATen.h
+endif()
 caffe2_binary_target("predictor_verifier.cc")
 caffe2_binary_target("print_registered_core_operators.cc")
 caffe2_binary_target("run_plan.cc")
--- a/binaries/parallel_info.cc
+++ b/binaries/parallel_info.cc
@ -0,0 +1,41 @@
+/**
+ * Copyright (c) 2016-present, Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ATen/Parallel.h"
+
+#include <iostream>
+#include <sstream>
+
+#ifdef __linux__
+#include <sys/types.h>
+#include <unistd.h>
+#endif
+
+int main(int argc, char** argv) {
+  at::init_num_threads();
+
+  std::cout << at::get_parallel_info() << std::endl;
+
+# ifdef __linux__
+  std::ostringstream cmd;
+  cmd << "lsof -p " << getpid() << " | grep .so";
+  std::cout << "Loaded .so:" << std::endl;
+  std::cout << cmd.str() << std::endl;
+  std::system(cmd.str().c_str());
+# endif
+
+  return 0;
+}
--- a/cmake/Modules/FindMKLDNN.cmake
+++ b/cmake/Modules/FindMKLDNN.cmake
@ -89,7 +89,7 @@ ENDIF(MKL_FOUND)

 IF(MKL_FOUND)
  SET(MKL_cmake_included TRUE)
-  SET(MKLDNN_THREADING "OMP:COMP" CACHE STRING "" FORCE)
+  SET(MKLDNN_THREADING "OMP:COMP" CACHE STRING "")
 ENDIF(MKL_FOUND)
 SET(WITH_TEST FALSE CACHE BOOL "" FORCE)
 SET(WITH_EXAMPLE FALSE CACHE BOOL "" FORCE)
--- a/docs/source/config.rst
+++ b/docs/source/config.rst
@ -4,3 +4,4 @@ torch.__config__
 .. automodule:: torch.__config__

 .. autofunction:: show
+.. autofunction:: parallel_info
--- a/setup.py
+++ b/setup.py
@ -42,6 +42,9 @@
 #   USE_MKLDNN=0
 #     disables use of MKLDNN
 #
+#   MKLDNN_THREADING
+#     MKL-DNN threading mode (https://github.com/intel/mkl-dnn/)
+#
 #   USE_NNPACK=0
 #     disables NNPACK build
 #
@ -64,6 +67,9 @@
 #   USE_OPENCV
 #     enables use of OpenCV for additional operators
 #
+#   USE_OPENMP=0
+#     disables use of OpenMP for parallelization
+#
 #   USE_FFMPEG
 #     enables use of ffmpeg for additional operators
 #
@ -96,6 +102,9 @@
 #     then the build will fail if the requested BLAS is not found, otherwise
 #     the BLAS will be chosen based on what is found on your system.
 #
+#   MKL_SEQ=1
+#     chooses a sequential version of MKL library (in case of BLAS=MKL)
+#
 #   USE_FBGEMM
 #     Enables use of FBGEMM
 #
--- a/test/test_torch.py
+++ b/test/test_torch.py
@ -10996,6 +10996,9 @@ tensor([[[1., 1., 1.,  ..., 1., 1., 1.],
        # We can't usefully test the output; just make sure this doesn't crash
        torch.__config__.show()

+    def test_parallel_info(self):
+        torch.__config__.parallel_info()
+
    @staticmethod
    def _test_bincount(self, device):
        # negative input throws
--- a/tools/build_pytorch_libs.py
+++ b/tools/build_pytorch_libs.py
@ -208,6 +208,16 @@ def run_cmake(version,
        USE_GFLAGS=os.getenv('USE_GFLAGS'),
        WERROR=os.getenv('WERROR'))

+    if os.getenv('USE_OPENMP'):
+        cmake_defines(cmake_args, USE_OPENMP=check_env_flag('USE_OPENMP'))
+
+    if os.getenv('MKL_SEQ'):
+        cmake_defines(cmake_args, INTEL_MKL_SEQUENTIAL=check_env_flag('MKL_SEQ'))
+
+    mkldnn_threading = os.getenv('MKLDNN_THREADING')
+    if mkldnn_threading:
+        cmake_defines(cmake_args, MKLDNN_THREADING=mkldnn_threading)
+
    if USE_GLOO_IBVERBS:
        cmake_defines(cmake_args, USE_IBVERBS="1", USE_GLOO_IBVERBS="1")

--- a/torch/CMakeLists.txt
+++ b/torch/CMakeLists.txt
@ -337,7 +337,7 @@ endif()
 target_link_libraries(torch caffe2_library)

 find_package(OpenMP QUIET)
-if(OPENMP_FOUND)
+if(USE_OPENMP AND OPENMP_FOUND)
  message(STATUS "pytorch is compiling with OpenMP. \n"
    "OpenMP CXX_FLAGS: ${OpenMP_CXX_FLAGS}. \n"
    "OpenMP libraries: ${OpenMP_CXX_LIBRARIES}.")
--- a/torch/config.py
+++ b/torch/config.py
@ -11,3 +11,7 @@ def show():
 # TODO: In principle, we could provide more structured version/config
 # information here.  We're not for now; considering doing so if someone
 # asks for it.
+
+def parallel_info():
+    r"""Returns detailed string with parallelization settings"""
+    return torch._C._parallel_info()
--- a/torch/csrc/Module.cpp
+++ b/torch/csrc/Module.cpp
@ -306,6 +306,13 @@ static PyObject *THPModule_showConfig(PyObject *module)
  END_HANDLE_TH_ERRORS
 }

+static PyObject *THPModule_parallelInfo(PyObject *module)
+{
+  HANDLE_TH_ERRORS
+  return THPUtils_packString(at::get_parallel_info());
+  END_HANDLE_TH_ERRORS
+}
+
 void DLPack_Capsule_Destructor(PyObject* data) {
  HANDLE_TH_ERRORS
  DLManagedTensor * dlMTensor = (DLManagedTensor *)PyCapsule_GetPointer(data, "dltensor");
@ -441,6 +448,7 @@ static PyMethodDef TorchMethods[] = {
  {"_crash_if_csrc_ubsan", (PyCFunction)THPModule_crashIfCsrcUBSAN, METH_O, nullptr},
  {"_crash_if_aten_asan", (PyCFunction)THPModule_crashIfATenASAN, METH_O, nullptr},
  {"_show_config",    (PyCFunction)THPModule_showConfig, METH_NOARGS, nullptr},
+  {"_parallel_info",    (PyCFunction)THPModule_parallelInfo, METH_NOARGS, nullptr},
  {"_set_backcompat_broadcast_warn", (PyCFunction)THPModule_setBackcompatBroadcastWarn, METH_O, nullptr},
  {"_get_backcompat_broadcast_warn", (PyCFunction)THPModule_getBackcompatBroadcastWarn, METH_NOARGS, nullptr},
  {"_set_backcompat_keepdim_warn", (PyCFunction)THPModule_setBackcompatKeepdimWarn, METH_O, nullptr},