Allow a non-OpenMP based build (#19749)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/19749
ghimport-source-id: a6636c0acddbdc5fd5b0dcb20b9f80cbdb9159b9

Differential Revision: D15141993

Pulled By: ilia-cher

fbshipit-source-id: 96085608398b2a4c97c68b2948f5184d07f9ad3d
This commit is contained in:
Ilia Cherniavskii
2019-05-06 19:25:55 -07:00
committed by Facebook Github Bot
parent 8c97f0b19e
commit 481b6d0268
16 changed files with 211 additions and 52 deletions

View File

@ -203,6 +203,7 @@ test_xla() {
}
(cd test && python -c "import torch; print(torch.__config__.show())")
(cd test && python -c "import torch; print(torch.__config__.parallel_info())")
if [[ "${BUILD_ENVIRONMENT}" == *xla* ]]; then
test_torchvision

View File

@ -1,6 +1,10 @@
#include <ATen/Parallel.h>
#include <ATen/Config.h>
#include <ATen/Version.h>
#include <atomic>
#include <sstream>
#ifdef TH_BLAS_MKL
#include <mkl.h>
@ -60,6 +64,41 @@ size_t get_num_threads() {
#endif
}
namespace {
const char* get_env_var(const char* var_name) {
const char* value = std::getenv(var_name);
return value ? value : "[not set]";
}
}
std::string get_parallel_info() {
std::ostringstream ss;
ss << "ATen/Parallel:\n\tat::get_num_threads() : "
<< at::get_num_threads() << std::endl;
ss << at::get_openmp_version() << std::endl;
#ifdef _OPENMP
ss << "\tomp_get_max_threads() : " << omp_get_max_threads() << std::endl;
#endif
ss << at::get_mkl_version() << std::endl;
#ifdef TH_BLAS_MKL
ss << "\tmkl_get_max_threads() : " << mkl_get_max_threads() << std::endl;
#endif
ss << at::get_mkldnn_version() << std::endl;
ss << "std::thread::hardware_concurrency() : "
<< std::thread::hardware_concurrency() << std::endl;
ss << "Environment variables:" << std::endl;
ss << "\tOMP_NUM_THREADS : " << get_env_var("OMP_NUM_THREADS") << std::endl;
ss << "\tMKL_NUM_THREADS : " << get_env_var("MKL_NUM_THREADS") << std::endl;
return ss.str();
}
PTThreadPool::PTThreadPool(
std::size_t pool_size,
int numa_node_id)

View File

@ -143,6 +143,9 @@ inline scalar_t parallel_reduce(
}
}
// Returns a detailed string describing parallelization settings
CAFFE2_API std::string get_parallel_info();
class CAFFE2_API PTThreadPool : public c10::ThreadPool {
public:
explicit PTThreadPool(

View File

@ -16,6 +16,78 @@
namespace at {
std::string get_mkl_version() {
std::string version;
#if AT_MKL_ENABLED()
{
// Magic buffer number is from MKL documentation
// https://software.intel.com/en-us/mkl-developer-reference-c-mkl-get-version-string
char buf[198];
mkl_get_version_string(buf, 198);
version = buf;
}
#else
version = "MKL not found";
#endif
return version;
}
std::string get_mkldnn_version() {
std::ostringstream ss;
#if AT_MKLDNN_ENABLED()
// Cribbed from mkl-dnn/src/common/verbose.cpp
// Too bad: can't get ISA info conveniently :(
// Apparently no way to get ideep version?
// https://github.com/intel/ideep/issues/29
{
const mkldnn_version_t* ver = mkldnn_version();
ss << "Intel(R) MKL-DNN v" << ver->major << "." << ver->minor << "." << ver->patch
<< " (Git Hash " << ver->hash << ")";
}
#else
ss << "MKLDNN not found";
#endif
return ss.str();
}
std::string get_openmp_version() {
std::ostringstream ss;
#ifdef _OPENMP
{
ss << "OpenMP " << _OPENMP;
// Reference:
// https://stackoverflow.com/questions/1304363/how-to-check-the-version-of-openmp-on-linux
const char* ver_str = nullptr;
switch (_OPENMP) {
case 200505:
ver_str = "2.5";
break;
case 200805:
ver_str = "3.0";
break;
case 201107:
ver_str = "3.1";
break;
case 201307:
ver_str = "4.0";
break;
case 201511:
ver_str = "4.5";
break;
default:
ver_str = nullptr;
break;
}
if (ver_str) {
ss << " (a.k.a. OpenMP " << ver_str << ")";
}
}
#else
ss << "OpenMP not found";
#endif
return ss.str();
}
std::string show_config() {
std::ostringstream ss;
ss << "PyTorch built with:\n"; // TODO add the version of PyTorch
@ -42,58 +114,15 @@ std::string show_config() {
#endif
#if AT_MKL_ENABLED()
{
// Magic buffer number is from MKL documentation
// https://software.intel.com/en-us/mkl-developer-reference-c-mkl-get-version-string
char buf[198];
mkl_get_version_string(buf, 198);
ss << " - " << buf << "\n";
}
ss << " - " << get_mkl_version() << "\n";
#endif
#if AT_MKLDNN_ENABLED()
// Cribbed from mkl-dnn/src/common/verbose.cpp
// Too bad: can't get ISA info conveniently :(
// Apparently no way to get ideep version?
// https://github.com/intel/ideep/issues/29
{
const mkldnn_version_t* ver = mkldnn_version();
ss << " - Intel(R) MKL-DNN v" << ver->major << "." << ver->minor << "." << ver->patch
<< " (Git Hash " << ver->hash << ")\n";
}
ss << " - " << get_mkldnn_version() << "\n";
#endif
#ifdef _OPENMP
{
ss << " - OpenMP " << _OPENMP;
// Reference:
// https://stackoverflow.com/questions/1304363/how-to-check-the-version-of-openmp-on-linux
const char* ver_str = nullptr;
switch (_OPENMP) {
case 200505:
ver_str = "2.5";
break;
case 200805:
ver_str = "3.0";
break;
case 201107:
ver_str = "3.1";
break;
case 201307:
ver_str = "4.0";
break;
case 201511:
ver_str = "4.5";
break;
default:
ver_str = nullptr;
break;
}
if (ver_str) {
ss << " (a.k.a. OpenMP " << ver_str << ")";
}
ss << "\n";
}
ss << " - " << get_openmp_version() << "\n";
#endif
#ifdef USE_LAPACK

View File

@ -5,4 +5,10 @@ namespace at {
/// Returns a detailed string describing the configuration PyTorch.
CAFFE2_API std::string show_config();
CAFFE2_API std::string get_mkl_version();
CAFFE2_API std::string get_mkldnn_version();
CAFFE2_API std::string get_openmp_version();
} // namespace at

View File

@ -28,15 +28,15 @@ int main() {
t1.join();
at::set_num_threads(4);
std::thread t2(test, 4);
std::thread t3(test, 4);
std::thread t4(test, 4);
std::thread t2(test, at::get_num_threads());
std::thread t3(test, at::get_num_threads());
std::thread t4(test, at::get_num_threads());
t4.join();
t3.join();
t2.join();
at::set_num_threads(5);
test(5);
test(at::get_num_threads());
return 0;
}

View File

@ -2,6 +2,11 @@ caffe2_binary_target("convert_caffe_image_db.cc")
caffe2_binary_target("convert_db.cc")
caffe2_binary_target("make_cifar_db.cc")
caffe2_binary_target("make_mnist_db.cc")
if (NOT ANDROID)
caffe2_binary_target("parallel_info.cc")
target_include_directories(parallel_info PUBLIC
${CMAKE_BINARY_DIR}/aten/src) # provides "ATen/TypeExtendedInterface.h" to ATen.h
endif()
caffe2_binary_target("predictor_verifier.cc")
caffe2_binary_target("print_registered_core_operators.cc")
caffe2_binary_target("run_plan.cc")

41
binaries/parallel_info.cc Normal file
View File

@ -0,0 +1,41 @@
/**
* Copyright (c) 2016-present, Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "ATen/Parallel.h"
#include <iostream>
#include <sstream>
#ifdef __linux__
#include <sys/types.h>
#include <unistd.h>
#endif
int main(int argc, char** argv) {
at::init_num_threads();
std::cout << at::get_parallel_info() << std::endl;
# ifdef __linux__
std::ostringstream cmd;
cmd << "lsof -p " << getpid() << " | grep .so";
std::cout << "Loaded .so:" << std::endl;
std::cout << cmd.str() << std::endl;
std::system(cmd.str().c_str());
# endif
return 0;
}

View File

@ -89,7 +89,7 @@ ENDIF(MKL_FOUND)
IF(MKL_FOUND)
SET(MKL_cmake_included TRUE)
SET(MKLDNN_THREADING "OMP:COMP" CACHE STRING "" FORCE)
SET(MKLDNN_THREADING "OMP:COMP" CACHE STRING "")
ENDIF(MKL_FOUND)
SET(WITH_TEST FALSE CACHE BOOL "" FORCE)
SET(WITH_EXAMPLE FALSE CACHE BOOL "" FORCE)

View File

@ -4,3 +4,4 @@ torch.__config__
.. automodule:: torch.__config__
.. autofunction:: show
.. autofunction:: parallel_info

View File

@ -42,6 +42,9 @@
# USE_MKLDNN=0
# disables use of MKLDNN
#
# MKLDNN_THREADING
# MKL-DNN threading mode (https://github.com/intel/mkl-dnn/)
#
# USE_NNPACK=0
# disables NNPACK build
#
@ -64,6 +67,9 @@
# USE_OPENCV
# enables use of OpenCV for additional operators
#
# USE_OPENMP=0
# disables use of OpenMP for parallelization
#
# USE_FFMPEG
# enables use of ffmpeg for additional operators
#
@ -96,6 +102,9 @@
# then the build will fail if the requested BLAS is not found, otherwise
# the BLAS will be chosen based on what is found on your system.
#
# MKL_SEQ=1
# chooses a sequential version of MKL library (in case of BLAS=MKL)
#
# USE_FBGEMM
# Enables use of FBGEMM
#

View File

@ -10996,6 +10996,9 @@ tensor([[[1., 1., 1., ..., 1., 1., 1.],
# We can't usefully test the output; just make sure this doesn't crash
torch.__config__.show()
def test_parallel_info(self):
torch.__config__.parallel_info()
@staticmethod
def _test_bincount(self, device):
# negative input throws

View File

@ -208,6 +208,16 @@ def run_cmake(version,
USE_GFLAGS=os.getenv('USE_GFLAGS'),
WERROR=os.getenv('WERROR'))
if os.getenv('USE_OPENMP'):
cmake_defines(cmake_args, USE_OPENMP=check_env_flag('USE_OPENMP'))
if os.getenv('MKL_SEQ'):
cmake_defines(cmake_args, INTEL_MKL_SEQUENTIAL=check_env_flag('MKL_SEQ'))
mkldnn_threading = os.getenv('MKLDNN_THREADING')
if mkldnn_threading:
cmake_defines(cmake_args, MKLDNN_THREADING=mkldnn_threading)
if USE_GLOO_IBVERBS:
cmake_defines(cmake_args, USE_IBVERBS="1", USE_GLOO_IBVERBS="1")

View File

@ -337,7 +337,7 @@ endif()
target_link_libraries(torch caffe2_library)
find_package(OpenMP QUIET)
if(OPENMP_FOUND)
if(USE_OPENMP AND OPENMP_FOUND)
message(STATUS "pytorch is compiling with OpenMP. \n"
"OpenMP CXX_FLAGS: ${OpenMP_CXX_FLAGS}. \n"
"OpenMP libraries: ${OpenMP_CXX_LIBRARIES}.")

View File

@ -11,3 +11,7 @@ def show():
# TODO: In principle, we could provide more structured version/config
# information here. We're not for now; considering doing so if someone
# asks for it.
def parallel_info():
r"""Returns detailed string with parallelization settings"""
return torch._C._parallel_info()

View File

@ -306,6 +306,13 @@ static PyObject *THPModule_showConfig(PyObject *module)
END_HANDLE_TH_ERRORS
}
static PyObject *THPModule_parallelInfo(PyObject *module)
{
HANDLE_TH_ERRORS
return THPUtils_packString(at::get_parallel_info());
END_HANDLE_TH_ERRORS
}
void DLPack_Capsule_Destructor(PyObject* data) {
HANDLE_TH_ERRORS
DLManagedTensor * dlMTensor = (DLManagedTensor *)PyCapsule_GetPointer(data, "dltensor");
@ -441,6 +448,7 @@ static PyMethodDef TorchMethods[] = {
{"_crash_if_csrc_ubsan", (PyCFunction)THPModule_crashIfCsrcUBSAN, METH_O, nullptr},
{"_crash_if_aten_asan", (PyCFunction)THPModule_crashIfATenASAN, METH_O, nullptr},
{"_show_config", (PyCFunction)THPModule_showConfig, METH_NOARGS, nullptr},
{"_parallel_info", (PyCFunction)THPModule_parallelInfo, METH_NOARGS, nullptr},
{"_set_backcompat_broadcast_warn", (PyCFunction)THPModule_setBackcompatBroadcastWarn, METH_O, nullptr},
{"_get_backcompat_broadcast_warn", (PyCFunction)THPModule_getBackcompatBroadcastWarn, METH_NOARGS, nullptr},
{"_set_backcompat_keepdim_warn", (PyCFunction)THPModule_setBackcompatKeepdimWarn, METH_O, nullptr},