From 481b6d026848b6d94edc0d84f32fb59c50590625 Mon Sep 17 00:00:00 2001
From: Ilia Cherniavskii <iliacher@fb.com>
Date: Mon, 6 May 2019 19:25:55 -0700
Subject: [PATCH] Allow a non-OpenMP based build (#19749)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/19749
ghimport-source-id: a6636c0acddbdc5fd5b0dcb20b9f80cbdb9159b9

Differential Revision: D15141993

Pulled By: ilia-cher

fbshipit-source-id: 96085608398b2a4c97c68b2948f5184d07f9ad3d
---
 .jenkins/pytorch/test.sh                |   1 +
 aten/src/ATen/Parallel.cpp              |  39 ++++++++
 aten/src/ATen/Parallel.h                |   3 +
 aten/src/ATen/Version.cpp               | 121 +++++++++++++++---------
 aten/src/ATen/Version.h                 |   6 ++
 aten/src/ATen/test/thread_init_test.cpp |   8 +-
 binaries/CMakeLists.txt                 |   5 +
 binaries/parallel_info.cc               |  41 ++++++++
 cmake/Modules/FindMKLDNN.cmake          |   2 +-
 docs/source/__config__.rst              |   1 +
 setup.py                                |   9 ++
 test/test_torch.py                      |   3 +
 tools/build_pytorch_libs.py             |  10 ++
 torch/CMakeLists.txt                    |   2 +-
 torch/__config__.py                     |   4 +
 torch/csrc/Module.cpp                   |   8 ++
 16 files changed, 211 insertions(+), 52 deletions(-)
 create mode 100644 binaries/parallel_info.cc
diff --git a/.jenkins/pytorch/test.sh b/.jenkins/pytorch/test.sh
index 4143e923df15..03c96cb16e54 100755
--- a/.jenkins/pytorch/test.sh
+++ b/.jenkins/pytorch/test.sh
@@ -203,6 +203,7 @@ test_xla() {
 }
 
 (cd test && python -c "import torch; print(torch.__config__.show())")
+(cd test && python -c "import torch; print(torch.__config__.parallel_info())")
 
 if [[ "${BUILD_ENVIRONMENT}" == *xla* ]]; then
   test_torchvision
diff --git a/aten/src/ATen/Parallel.cpp b/aten/src/ATen/Parallel.cpp
index b79bb99bb33b..0a965f7dba06 100644
--- a/aten/src/ATen/Parallel.cpp
+++ b/aten/src/ATen/Parallel.cpp
@@ -1,6 +1,10 @@
 #include <ATen/Parallel.h>
 
+#include <ATen/Config.h>
+#include <ATen/Version.h>
+
 #include <atomic>
+#include <sstream>
 
 #ifdef TH_BLAS_MKL
 #include <mkl.h>
@@ -60,6 +64,41 @@ size_t get_num_threads() {
 #endif
 }
 
+namespace {
+const char* get_env_var(const char* var_name) {
+  const char* value = std::getenv(var_name);
+  return value ? value : "[not set]";
+}
+}
+
+std::string get_parallel_info() {
+  std::ostringstream ss;
+
+  ss << "ATen/Parallel:\n\tat::get_num_threads() : "
+     << at::get_num_threads() << std::endl;
+
+  ss << at::get_openmp_version() << std::endl;
+#ifdef _OPENMP
+  ss << "\tomp_get_max_threads() : " << omp_get_max_threads() << std::endl;
+#endif
+
+  ss << at::get_mkl_version() << std::endl;
+#ifdef TH_BLAS_MKL
+  ss << "\tmkl_get_max_threads() : " << mkl_get_max_threads() << std::endl;
+#endif
+
+  ss << at::get_mkldnn_version() << std::endl;
+
+  ss << "std::thread::hardware_concurrency() : "
+     << std::thread::hardware_concurrency() << std::endl;
+
+  ss << "Environment variables:" << std::endl;
+  ss << "\tOMP_NUM_THREADS : " << get_env_var("OMP_NUM_THREADS") << std::endl;
+  ss << "\tMKL_NUM_THREADS : " << get_env_var("MKL_NUM_THREADS") << std::endl;
+
+  return ss.str();
+}
+
 PTThreadPool::PTThreadPool(
     std::size_t pool_size,
     int numa_node_id)
diff --git a/aten/src/ATen/Parallel.h b/aten/src/ATen/Parallel.h
index d7a3b893017d..087a042e8470 100644
--- a/aten/src/ATen/Parallel.h
+++ b/aten/src/ATen/Parallel.h
@@ -143,6 +143,9 @@ inline scalar_t parallel_reduce(
   }
 }
 
+// Returns a detailed string describing parallelization settings
+CAFFE2_API std::string get_parallel_info();
+
 class CAFFE2_API PTThreadPool : public c10::ThreadPool {
  public:
   explicit PTThreadPool(
diff --git a/aten/src/ATen/Version.cpp b/aten/src/ATen/Version.cpp
index 6a2d727ba78b..3c010874df70 100644
--- a/aten/src/ATen/Version.cpp
+++ b/aten/src/ATen/Version.cpp
@@ -16,6 +16,78 @@
 
 namespace at {
 
+std::string get_mkl_version() {
+  std::string version;
+  #if AT_MKL_ENABLED()
+    {
+      // Magic buffer number is from MKL documentation
+      // https://software.intel.com/en-us/mkl-developer-reference-c-mkl-get-version-string
+      char buf[198];
+      mkl_get_version_string(buf, 198);
+      version = buf;
+    }
+  #else
+    version = "MKL not found";
+  #endif
+  return version;
+}
+
+std::string get_mkldnn_version() {
+  std::ostringstream ss;
+  #if AT_MKLDNN_ENABLED()
+    // Cribbed from mkl-dnn/src/common/verbose.cpp
+    // Too bad: can't get ISA info conveniently :(
+    // Apparently no way to get ideep version?
+    // https://github.com/intel/ideep/issues/29
+    {
+      const mkldnn_version_t* ver = mkldnn_version();
+      ss << "Intel(R) MKL-DNN v" << ver->major << "." << ver->minor << "." << ver->patch
+         << " (Git Hash " << ver->hash << ")";
+    }
+  #else
+    ss << "MKLDNN not found";
+  #endif
+  return ss.str();
+}
+
+std::string get_openmp_version() {
+  std::ostringstream ss;
+  #ifdef _OPENMP
+    {
+      ss << "OpenMP " << _OPENMP;
+      // Reference:
+      // https://stackoverflow.com/questions/1304363/how-to-check-the-version-of-openmp-on-linux
+      const char* ver_str = nullptr;
+      switch (_OPENMP) {
+        case 200505:
+          ver_str = "2.5";
+          break;
+        case 200805:
+          ver_str = "3.0";
+          break;
+        case 201107:
+          ver_str = "3.1";
+          break;
+        case 201307:
+          ver_str = "4.0";
+          break;
+        case 201511:
+          ver_str = "4.5";
+          break;
+        default:
+          ver_str = nullptr;
+          break;
+      }
+      if (ver_str) {
+        ss << " (a.k.a. OpenMP " << ver_str << ")";
+      }
+    }
+  #else
+    ss << "OpenMP not found";
+  #endif
+  return ss.str();
+}
+
 std::string show_config() {
   std::ostringstream ss;
   ss << "PyTorch built with:\n"; // TODO add the version of PyTorch
@@ -42,58 +114,15 @@ std::string show_config() {
 #endif
 
 #if AT_MKL_ENABLED()
-  {
-    // Magic buffer number is from MKL documentation
-    // https://software.intel.com/en-us/mkl-developer-reference-c-mkl-get-version-string
-    char buf[198];
-    mkl_get_version_string(buf, 198);
-    ss << "  - " << buf << "\n";
-  }
+  ss << "  - " << get_mkl_version() << "\n";
 #endif
 
 #if AT_MKLDNN_ENABLED()
-  // Cribbed from mkl-dnn/src/common/verbose.cpp
-  // Too bad: can't get ISA info conveniently :(
-  // Apparently no way to get ideep version?
-  // https://github.com/intel/ideep/issues/29
-  {
-    const mkldnn_version_t* ver = mkldnn_version();
-    ss << "  - Intel(R) MKL-DNN v" << ver->major << "." << ver->minor << "." << ver->patch
-       << " (Git Hash " << ver->hash << ")\n";
-  }
+  ss << "  - " << get_mkldnn_version() << "\n";
 #endif
 
 #ifdef _OPENMP
-  {
-    ss << "  - OpenMP " << _OPENMP;
-    // Reference:
-    // https://stackoverflow.com/questions/1304363/how-to-check-the-version-of-openmp-on-linux
-    const char* ver_str = nullptr;
-    switch (_OPENMP) {
-      case 200505:
-        ver_str = "2.5";
-        break;
-      case 200805:
-        ver_str = "3.0";
-        break;
-      case 201107:
-        ver_str = "3.1";
-        break;
-      case 201307:
-        ver_str = "4.0";
-        break;
-      case 201511:
-        ver_str = "4.5";
-        break;
-      default:
-        ver_str = nullptr;
-        break;
-    }
-    if (ver_str) {
-      ss << " (a.k.a. OpenMP " << ver_str << ")";
-    }
-    ss << "\n";
-  }
+  ss << "  - " << get_openmp_version() << "\n";
 #endif
 
 #ifdef USE_LAPACK
diff --git a/aten/src/ATen/Version.h b/aten/src/ATen/Version.h
index 0385177a6373..18fd31d3ed87 100644
--- a/aten/src/ATen/Version.h
+++ b/aten/src/ATen/Version.h
@@ -5,4 +5,10 @@ namespace at {
 /// Returns a detailed string describing the configuration PyTorch.
 CAFFE2_API std::string show_config();
 
+CAFFE2_API std::string get_mkl_version();
+
+CAFFE2_API std::string get_mkldnn_version();
+
+CAFFE2_API std::string get_openmp_version();
+
 }  // namespace at
diff --git a/aten/src/ATen/test/thread_init_test.cpp b/aten/src/ATen/test/thread_init_test.cpp
index adbb324592a7..1c0d8576e32d 100644
--- a/aten/src/ATen/test/thread_init_test.cpp
+++ b/aten/src/ATen/test/thread_init_test.cpp
@@ -28,15 +28,15 @@ int main() {
   t1.join();
 
   at::set_num_threads(4);
-  std::thread t2(test, 4);
-  std::thread t3(test, 4);
-  std::thread t4(test, 4);
+  std::thread t2(test, at::get_num_threads());
+  std::thread t3(test, at::get_num_threads());
+  std::thread t4(test, at::get_num_threads());
   t4.join();
   t3.join();
   t2.join();
 
   at::set_num_threads(5);
-  test(5);
+  test(at::get_num_threads());
 
   return 0;
 }
diff --git a/binaries/CMakeLists.txt b/binaries/CMakeLists.txt
index 81320d21fb4c..0f656ff898dd 100644
--- a/binaries/CMakeLists.txt
+++ b/binaries/CMakeLists.txt
@@ -2,6 +2,11 @@ caffe2_binary_target("convert_caffe_image_db.cc")
 caffe2_binary_target("convert_db.cc")
 caffe2_binary_target("make_cifar_db.cc")
 caffe2_binary_target("make_mnist_db.cc")
+if (NOT ANDROID)
+  caffe2_binary_target("parallel_info.cc")
+  target_include_directories(parallel_info PUBLIC
+    ${CMAKE_BINARY_DIR}/aten/src) # provides "ATen/TypeExtendedInterface.h" to ATen.h
+endif()
 caffe2_binary_target("predictor_verifier.cc")
 caffe2_binary_target("print_registered_core_operators.cc")
 caffe2_binary_target("run_plan.cc")
diff --git a/binaries/parallel_info.cc b/binaries/parallel_info.cc
new file mode 100644
index 000000000000..eb12278a29d9
--- /dev/null
+++ b/binaries/parallel_info.cc
@@ -0,0 +1,41 @@
+/**
+ * Copyright (c) 2016-present, Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ATen/Parallel.h"
+
+#include <iostream>
+#include <sstream>
+
+#ifdef __linux__
+#include <sys/types.h>
+#include <unistd.h>
+#endif
+
+int main(int argc, char** argv) {
+  at::init_num_threads();
+
+  std::cout << at::get_parallel_info() << std::endl;
+
+# ifdef __linux__
+  std::ostringstream cmd;
+  cmd << "lsof -p " << getpid() << " | grep .so";
+  std::cout << "Loaded .so:" << std::endl;
+  std::cout << cmd.str() << std::endl;
+  std::system(cmd.str().c_str());
+# endif
+
+  return 0;
+}
diff --git a/cmake/Modules/FindMKLDNN.cmake b/cmake/Modules/FindMKLDNN.cmake
index 27a0ed3cd34c..393b64c5e20f 100644
--- a/cmake/Modules/FindMKLDNN.cmake
+++ b/cmake/Modules/FindMKLDNN.cmake
@@ -89,7 +89,7 @@ ENDIF(MKL_FOUND)
 
 IF(MKL_FOUND)
   SET(MKL_cmake_included TRUE)
-  SET(MKLDNN_THREADING "OMP:COMP" CACHE STRING "" FORCE)
+  SET(MKLDNN_THREADING "OMP:COMP" CACHE STRING "")
 ENDIF(MKL_FOUND)
 SET(WITH_TEST FALSE CACHE BOOL "" FORCE)
 SET(WITH_EXAMPLE FALSE CACHE BOOL "" FORCE)
diff --git a/docs/source/__config__.rst b/docs/source/__config__.rst
index fb89c33fcb0e..e4a6ac890493 100644
--- a/docs/source/__config__.rst
+++ b/docs/source/__config__.rst
@@ -4,3 +4,4 @@ torch.__config__
 .. automodule:: torch.__config__
 
 .. autofunction:: show
+.. autofunction:: parallel_info
diff --git a/setup.py b/setup.py
index 6e97f61c08e5..441bc08d7082 100644
--- a/setup.py
+++ b/setup.py
@@ -42,6 +42,9 @@
 #   USE_MKLDNN=0
 #     disables use of MKLDNN
 #
+#   MKLDNN_THREADING
+#     MKL-DNN threading mode (https://github.com/intel/mkl-dnn/)
+#
 #   USE_NNPACK=0
 #     disables NNPACK build
 #
@@ -64,6 +67,9 @@
 #   USE_OPENCV
 #     enables use of OpenCV for additional operators
 #
+#   USE_OPENMP=0
+#     disables use of OpenMP for parallelization
+#
 #   USE_FFMPEG
 #     enables use of ffmpeg for additional operators
 #
@@ -96,6 +102,9 @@
 #     then the build will fail if the requested BLAS is not found, otherwise
 #     the BLAS will be chosen based on what is found on your system.
 #
+#   MKL_SEQ=1
+#     chooses a sequential version of MKL library (in case of BLAS=MKL)
+#
 #   USE_FBGEMM
 #     Enables use of FBGEMM
 #
diff --git a/test/test_torch.py b/test/test_torch.py
index 0226c8b87c37..fb94a8e66951 100644
--- a/test/test_torch.py
+++ b/test/test_torch.py
@@ -10996,6 +10996,9 @@ tensor([[[1., 1., 1.,  ..., 1., 1., 1.],
         # We can't usefully test the output; just make sure this doesn't crash
         torch.__config__.show()
 
+    def test_parallel_info(self):
+        torch.__config__.parallel_info()
+
     @staticmethod
     def _test_bincount(self, device):
         # negative input throws
diff --git a/tools/build_pytorch_libs.py b/tools/build_pytorch_libs.py
index 432fed3bc809..7c4fd2e03751 100644
--- a/tools/build_pytorch_libs.py
+++ b/tools/build_pytorch_libs.py
@@ -208,6 +208,16 @@ def run_cmake(version,
         USE_GFLAGS=os.getenv('USE_GFLAGS'),
         WERROR=os.getenv('WERROR'))
 
+    if os.getenv('USE_OPENMP'):
+        cmake_defines(cmake_args, USE_OPENMP=check_env_flag('USE_OPENMP'))
+
+    if os.getenv('MKL_SEQ'):
+        cmake_defines(cmake_args, INTEL_MKL_SEQUENTIAL=check_env_flag('MKL_SEQ'))
+
+    mkldnn_threading = os.getenv('MKLDNN_THREADING')
+    if mkldnn_threading:
+        cmake_defines(cmake_args, MKLDNN_THREADING=mkldnn_threading)
+
     if USE_GLOO_IBVERBS:
         cmake_defines(cmake_args, USE_IBVERBS="1", USE_GLOO_IBVERBS="1")
 
diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
index d3eb13ae240c..874a6e917ac2 100644
--- a/torch/CMakeLists.txt
+++ b/torch/CMakeLists.txt
@@ -337,7 +337,7 @@ endif()
 target_link_libraries(torch caffe2_library)
 
 find_package(OpenMP QUIET)
-if(OPENMP_FOUND)
+if(USE_OPENMP AND OPENMP_FOUND)
   message(STATUS "pytorch is compiling with OpenMP. \n"
     "OpenMP CXX_FLAGS: ${OpenMP_CXX_FLAGS}. \n"
     "OpenMP libraries: ${OpenMP_CXX_LIBRARIES}.")
diff --git a/torch/__config__.py b/torch/__config__.py
index cb482645bebb..e4c3fde9ec3c 100644
--- a/torch/__config__.py
+++ b/torch/__config__.py
@@ -11,3 +11,7 @@ def show():
 # TODO: In principle, we could provide more structured version/config
 # information here.  We're not for now; considering doing so if someone
 # asks for it.
+
+def parallel_info():
+    r"""Returns detailed string with parallelization settings"""
+    return torch._C._parallel_info()
diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp
index fcd23f308d1b..b0658e824213 100644
--- a/torch/csrc/Module.cpp
+++ b/torch/csrc/Module.cpp
@@ -306,6 +306,13 @@ static PyObject *THPModule_showConfig(PyObject *module)
   END_HANDLE_TH_ERRORS
 }
 
+static PyObject *THPModule_parallelInfo(PyObject *module)
+{
+  HANDLE_TH_ERRORS
+  return THPUtils_packString(at::get_parallel_info());
+  END_HANDLE_TH_ERRORS
+}
+
 void DLPack_Capsule_Destructor(PyObject* data) {
   HANDLE_TH_ERRORS
   DLManagedTensor * dlMTensor = (DLManagedTensor *)PyCapsule_GetPointer(data, "dltensor");
@@ -441,6 +448,7 @@ static PyMethodDef TorchMethods[] = {
   {"_crash_if_csrc_ubsan", (PyCFunction)THPModule_crashIfCsrcUBSAN, METH_O, nullptr},
   {"_crash_if_aten_asan", (PyCFunction)THPModule_crashIfATenASAN, METH_O, nullptr},
   {"_show_config",    (PyCFunction)THPModule_showConfig, METH_NOARGS, nullptr},
+  {"_parallel_info",    (PyCFunction)THPModule_parallelInfo, METH_NOARGS, nullptr},
   {"_set_backcompat_broadcast_warn", (PyCFunction)THPModule_setBackcompatBroadcastWarn, METH_O, nullptr},
   {"_get_backcompat_broadcast_warn", (PyCFunction)THPModule_getBackcompatBroadcastWarn, METH_NOARGS, nullptr},
   {"_set_backcompat_keepdim_warn", (PyCFunction)THPModule_setBackcompatKeepdimWarn, METH_O, nullptr},