mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
seperate mkl, mklml, and mkldnn (#12170)
Summary: 1. Remove avx2 support in mkldnn 2. Seperate mkl, mklml, and mkldnn 3. Fix convfusion test case Pull Request resolved: https://github.com/pytorch/pytorch/pull/12170 Reviewed By: yinghai Differential Revision: D10207126 Pulled By: orionr fbshipit-source-id: 1e62eb47943f426a89d57e2d2606439f2b04fd51
This commit is contained in:
committed by
Facebook Github Bot
parent
bb96b6635c
commit
dbab9b73b6
@ -109,6 +109,7 @@ test_aten() {
|
||||
|
||||
${SUDO} ln -s "$TORCH_LIB_PATH"/libc10* build/bin
|
||||
${SUDO} ln -s "$TORCH_LIB_PATH"/libcaffe2* build/bin
|
||||
${SUDO} ln -s "$TORCH_LIB_PATH"/libmkldnn* build/bin
|
||||
${SUDO} ln -s "$TORCH_LIB_PATH"/libnccl* build/bin
|
||||
|
||||
ls build/bin
|
||||
|
@ -117,8 +117,6 @@ option(USE_TENSORRT "Using Nvidia TensorRT library" OFF)
|
||||
option(USE_ZMQ "Use ZMQ" OFF)
|
||||
option(USE_ZSTD "Use ZSTD" OFF)
|
||||
option(USE_MKLDNN "Use MKLDNN" OFF)
|
||||
option(USE_IDEEP "Use IDEEP interface in MKL BLAS" ON)
|
||||
option(USE_MKLML "Use MKLML interface in MKL BLAS" ON)
|
||||
option(USE_DISTRIBUTED "Use distributed" ON)
|
||||
cmake_dependent_option(
|
||||
USE_MPI "Use MPI for Caffe2. Only available if USE_DISTRIBUTED is on." ON
|
||||
@ -150,8 +148,7 @@ if (BUILD_ATEN_ONLY)
|
||||
set(USE_NNPACK OFF)
|
||||
set(USE_NUMPY OFF)
|
||||
set(USE_OPENCV OFF)
|
||||
set(USE_IDEEP OFF)
|
||||
set(USE_MKLML OFF)
|
||||
set(USE_MKLDNN OFF)
|
||||
set(USE_DISTRIBUTED OFF)
|
||||
set(USE_LMDB OFF)
|
||||
endif()
|
||||
|
@ -1,20 +1,20 @@
|
||||
if(USE_MKL AND USE_IDEEP AND CAFFE2_COMPILER_SUPPORTS_AVX2_EXTENSIONS)
|
||||
if(CAFFE2_USE_MKLDNN)
|
||||
message(STATUS "Including IDEEP operators")
|
||||
|
||||
# ---[ CPU files.
|
||||
file(GLOB_RECURSE avx2_srcs *.cc)
|
||||
file(GLOB_RECURSE tmp *.cc)
|
||||
set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} ${tmp})
|
||||
# exclude test files and gpu files
|
||||
file(GLOB_RECURSE tmp *_test.cc)
|
||||
exclude(avx2_srcs "${avx2_srcs}" ${tmp})
|
||||
exclude(Caffe2_CPU_SRCS "${Caffe2_CPU_SRCS}" ${tmp})
|
||||
|
||||
add_library(Caffe2_ideep_operators OBJECT ${avx2_srcs})
|
||||
add_dependencies(Caffe2_ideep_operators Caffe2_PROTO)
|
||||
set_target_properties(Caffe2_ideep_operators PROPERTIES COMPILE_FLAGS "-mavx2")
|
||||
# ---[ CPU test files - currently none but just to be safe
|
||||
file(GLOB_RECURSE tmp *_test.cc)
|
||||
set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} ${tmp})
|
||||
|
||||
# ---[ Send the lists to the parent scope.
|
||||
set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS}
|
||||
$<TARGET_OBJECTS:Caffe2_ideep_operators>)
|
||||
set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} PARENT_SCOPE)
|
||||
set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} PARENT_SCOPE)
|
||||
else()
|
||||
message(STATUS "Excluding ideep operators as we are not using ideep")
|
||||
endif()
|
||||
|
@ -2,7 +2,7 @@
|
||||
#include "caffe2/core/context_gpu.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
|
||||
#ifdef CAFFE2_USE_IDEEP
|
||||
#ifdef CAFFE2_USE_MKLDNN
|
||||
#include <caffe2/ideep/operators/operator_fallback_ideep.h>
|
||||
#include <caffe2/ideep/utils/ideep_operator.h>
|
||||
#endif
|
||||
@ -25,7 +25,7 @@ REGISTER_CPU_OPERATOR(CTC, CTCOp<float, CPUContext>);
|
||||
OPERATOR_SCHEMA(CTC).NumInputs(3, 4).NumOutputs(2, 3);
|
||||
// .EnforceInputOutputGradient({{0, 0}});
|
||||
|
||||
#ifdef CAFFE2_USE_IDEEP
|
||||
#ifdef CAFFE2_USE_MKLDNN
|
||||
REGISTER_IDEEP_OPERATOR(CTC, IDEEPFallbackOp<CTCOp<float, CPUContext>>);
|
||||
#endif
|
||||
|
||||
|
@ -40,7 +40,7 @@ static_assert(
|
||||
#cmakedefine CAFFE2_USE_GOOGLE_GLOG
|
||||
#cmakedefine CAFFE2_USE_LITE_PROTO
|
||||
#cmakedefine CAFFE2_USE_MKL
|
||||
#cmakedefine CAFFE2_USE_IDEEP
|
||||
#cmakedefine CAFFE2_USE_MKLDNN
|
||||
#cmakedefine CAFFE2_USE_NVTX
|
||||
#cmakedefine CAFFE2_USE_TRT
|
||||
#cmakedefine CAFFE2_DISABLE_NUMA
|
||||
@ -76,6 +76,7 @@ static_assert(
|
||||
{"USE_EIGEN_FOR_BLAS", "${CAFFE2_USE_EIGEN_FOR_BLAS}"}, \
|
||||
{"USE_LITE_PROTO", "${CAFFE2_USE_LITE_PROTO}"}, \
|
||||
{"USE_MKL", "${CAFFE2_USE_MKL}"}, \
|
||||
{"USE_MKLDNN", "${CAFFE2_USE_MKLDNN}"}, \
|
||||
{"USE_NVTX", "${CAFFE2_USE_NVTX}"}, \
|
||||
{"USE_TRT", "${CAFFE2_USE_TRT}"}, \
|
||||
{"DISABLE_NUMA", "${CAFFE2_DISABLE_NUMA}"}, \
|
||||
|
@ -1,20 +1,20 @@
|
||||
if(USE_MKL AND USE_IDEEP AND CAFFE2_COMPILER_SUPPORTS_AVX2_EXTENSIONS)
|
||||
if(CAFFE2_USE_MKLDNN)
|
||||
message(STATUS "Including IDEEP operators")
|
||||
|
||||
# ---[ CPU files.
|
||||
file(GLOB_RECURSE avx2_srcs *.cc)
|
||||
file(GLOB_RECURSE tmp *.cc)
|
||||
set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} ${tmp})
|
||||
# exclude test files and gpu files
|
||||
file(GLOB_RECURSE tmp *_test.cc)
|
||||
exclude(avx2_srcs "${avx2_srcs}" ${tmp})
|
||||
exclude(Caffe2_CPU_SRCS "${Caffe2_CPU_SRCS}" ${tmp})
|
||||
|
||||
add_library(Caffe2_ideep_operators OBJECT ${avx2_srcs})
|
||||
add_dependencies(Caffe2_ideep_operators Caffe2_PROTO)
|
||||
set_target_properties(Caffe2_ideep_operators PROPERTIES COMPILE_FLAGS "-mavx2")
|
||||
# ---[ CPU test files - currently none but just to be safe
|
||||
file(GLOB_RECURSE tmp *_test.cc)
|
||||
set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} ${tmp})
|
||||
|
||||
# ---[ Send the lists to the parent scope.
|
||||
set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS}
|
||||
$<TARGET_OBJECTS:Caffe2_ideep_operators>)
|
||||
set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} PARENT_SCOPE)
|
||||
set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} PARENT_SCOPE)
|
||||
else()
|
||||
message(STATUS "Excluding ideep operators as we are not using ideep")
|
||||
endif()
|
||||
|
@ -2,7 +2,7 @@
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
|
||||
#ifdef CAFFE2_USE_IDEEP
|
||||
#ifdef CAFFE2_USE_MKLDNN
|
||||
#include <caffe2/ideep/operators/operator_fallback_ideep.h>
|
||||
#include <caffe2/ideep/utils/ideep_operator.h>
|
||||
#endif
|
||||
@ -90,7 +90,7 @@ class CheckAtomicBoolOp final : public Operator<CPUContext> {
|
||||
REGISTER_CPU_OPERATOR(CreateMutex, CreateMutexOp);
|
||||
REGISTER_CPU_OPERATOR(AtomicFetchAdd, AtomicFetchAddOp);
|
||||
|
||||
#ifdef CAFFE2_USE_IDEEP
|
||||
#ifdef CAFFE2_USE_MKLDNN
|
||||
REGISTER_IDEEP_OPERATOR(CreateMutex, IDEEPFallbackOp<CreateMutexOp, SkipIndices<0>>);
|
||||
#endif
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
#include "caffe2/operators/distance_op.h"
|
||||
#include "caffe2/utils/eigen_utils.h"
|
||||
#ifdef CAFFE2_USE_IDEEP
|
||||
#ifdef CAFFE2_USE_MKLDNN
|
||||
#include <caffe2/ideep/operators/operator_fallback_ideep.h>
|
||||
#include <caffe2/ideep/utils/ideep_operator.h>
|
||||
#endif
|
||||
@ -400,7 +400,7 @@ REGISTER_CPU_OPERATOR(L1Distance, L1DistanceOp<float, CPUContext>);
|
||||
REGISTER_CPU_OPERATOR(
|
||||
L1DistanceGradient,
|
||||
L1DistanceGradientOp<float, CPUContext>);
|
||||
#ifdef CAFFE2_USE_IDEEP
|
||||
#ifdef CAFFE2_USE_MKLDNN
|
||||
REGISTER_IDEEP_OPERATOR(
|
||||
L1DistanceGradient,
|
||||
IDEEPFallbackOp<L1DistanceGradientOp<float, CPUContext>>);
|
||||
|
@ -2,7 +2,7 @@
|
||||
#include "caffe2/utils/cpu_neon.h"
|
||||
#include "caffe2/utils/math.h"
|
||||
|
||||
#ifdef CAFFE2_USE_IDEEP
|
||||
#ifdef CAFFE2_USE_MKLDNN
|
||||
#include <caffe2/ideep/operators/operator_fallback_ideep.h>
|
||||
#include <caffe2/ideep/utils/ideep_operator.h>
|
||||
#endif
|
||||
@ -586,7 +586,7 @@ OPERATOR_SCHEMA(BRGNCHWCToPackedInt8BGRAStylizerDeprocess)
|
||||
.NumInputs(2)
|
||||
.NumOutputs(1);
|
||||
|
||||
#ifdef CAFFE2_USE_IDEEP
|
||||
#ifdef CAFFE2_USE_MKLDNN
|
||||
REGISTER_IDEEP_OPERATOR(
|
||||
BRGNCHWCToPackedInt8BGRAStylizerDeprocess,
|
||||
IDEEPFallbackOp<BRGNCHWCToPackedInt8BGRAStylizerDeprocessOp, SkipIndices<0>>);
|
||||
|
@ -2,7 +2,7 @@
|
||||
#include "caffe2/opt/converter.h"
|
||||
#include "caffe2/opt/fusion.h"
|
||||
|
||||
#ifdef CAFFE2_USE_IDEEP
|
||||
#ifdef CAFFE2_USE_MKLDNN
|
||||
#include "caffe2/ideep/ideep_utils.h"
|
||||
#endif
|
||||
|
||||
@ -11,7 +11,7 @@ namespace opt {
|
||||
|
||||
using namespace nom;
|
||||
|
||||
#ifndef CAFFE2_USE_IDEEP
|
||||
#ifndef CAFFE2_USE_MKLDNN
|
||||
void OptimizeForIdeep(
|
||||
repr::NNModule* nn,
|
||||
caffe2::Workspace* ws,
|
||||
@ -440,7 +440,7 @@ void OptimizeForIdeep(
|
||||
setPoolingInferenceMode(nn);
|
||||
}
|
||||
|
||||
#endif // CAFFE2_USE_IDEEP
|
||||
#endif // CAFFE2_USE_MKLDNN
|
||||
|
||||
} // namespace opt
|
||||
} // namespace caffe2
|
||||
|
@ -6,6 +6,13 @@ set(Caffe2_CPU_PYTHON_SRCS
|
||||
"/pybind_state_registry.cc"
|
||||
)
|
||||
|
||||
if(CAFFE2_USE_MKLDNN)
|
||||
set(Caffe2_CPU_PYTHON_SRCS
|
||||
${Caffe2_CPU_PYTHON_SRCS}
|
||||
"/pybind_state_ideep.cc"
|
||||
)
|
||||
endif()
|
||||
|
||||
# ---[ GPU files
|
||||
set(Caffe2_GPU_PYTHON_SRCS
|
||||
${Caffe2_CPU_PYTHON_SRCS}
|
||||
@ -22,19 +29,6 @@ prepend(Caffe2_CPU_PYTHON_SRCS ${CMAKE_CURRENT_SOURCE_DIR} ${Caffe2_CPU_PYTHON_S
|
||||
prepend(Caffe2_GPU_PYTHON_SRCS ${CMAKE_CURRENT_SOURCE_DIR} ${Caffe2_GPU_PYTHON_SRCS})
|
||||
prepend(Caffe2_HIP_PYTHON_SRCS ${CMAKE_CURRENT_SOURCE_DIR} ${Caffe2_HIP_PYTHON_SRCS})
|
||||
|
||||
|
||||
# --[ Some special handling for ideep binding as we need to build with "-mavx2"
|
||||
if(USE_MKL AND USE_IDEEP AND CAFFE2_COMPILER_SUPPORTS_AVX2_EXTENSIONS)
|
||||
file(GLOB_RECURSE ideep_srcs *_ideep.cc)
|
||||
add_library(Caffe2_ideep_pybind OBJECT ${ideep_srcs})
|
||||
add_dependencies(Caffe2_ideep_pybind Caffe2_PROTO)
|
||||
set_target_properties(Caffe2_ideep_pybind PROPERTIES COMPILE_FLAGS "-mavx2")
|
||||
set(Caffe2_CPU_PYTHON_SRCS
|
||||
${Caffe2_CPU_PYTHON_SRCS}
|
||||
$<TARGET_OBJECTS:Caffe2_ideep_pybind>
|
||||
)
|
||||
endif()
|
||||
|
||||
set(Caffe2_CPU_PYTHON_SRCS ${Caffe2_CPU_PYTHON_SRCS} PARENT_SCOPE)
|
||||
set(Caffe2_GPU_PYTHON_SRCS ${Caffe2_GPU_PYTHON_SRCS} PARENT_SCOPE)
|
||||
set(Caffe2_HIP_PYTHON_SRCS ${Caffe2_HIP_PYTHON_SRCS} PARENT_SCOPE)
|
||||
|
@ -12,7 +12,7 @@ import caffe2.python.hypothesis_test_util as hu
|
||||
import caffe2.python.ideep_test_util as mu
|
||||
|
||||
|
||||
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
|
||||
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
|
||||
class LRNTest(hu.HypothesisTestCase):
|
||||
@given(input_channels=st.integers(1, 3),
|
||||
batch_size=st.integers(1, 3),
|
||||
|
@ -45,7 +45,7 @@ def _tensor_splits(draw, add_axis=False):
|
||||
)
|
||||
|
||||
|
||||
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
|
||||
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
|
||||
class TestConcatSplitOps(hu.HypothesisTestCase):
|
||||
@given(tensor_splits=_tensor_splits(),
|
||||
**mu.gcs)
|
||||
|
@ -14,7 +14,7 @@ import caffe2.python.hypothesis_test_util as hu
|
||||
import caffe2.python.ideep_test_util as mu
|
||||
|
||||
|
||||
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
|
||||
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
|
||||
class ConvTest(hu.HypothesisTestCase):
|
||||
@given(stride=st.integers(1, 3),
|
||||
pad=st.integers(0, 3),
|
||||
|
@ -15,7 +15,7 @@ import caffe2.python.hypothesis_test_util as hu
|
||||
import caffe2.python.ideep_test_util as mu
|
||||
|
||||
|
||||
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
|
||||
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
|
||||
class ConvFusionTest(hu.HypothesisTestCase):
|
||||
@given(stride=st.integers(1, 3),
|
||||
pad=st.integers(0, 3),
|
||||
@ -129,10 +129,14 @@ class ConvFusionTest(hu.HypothesisTestCase):
|
||||
def test_convolution_sum_fusion(self, stride, pad, kernel, size,
|
||||
input_channels, output_channels,
|
||||
batch_size, use_bias, group, gc, dc):
|
||||
relu_S0 = core.CreateOperator(
|
||||
"Relu",
|
||||
["S0"],
|
||||
conv_S0 = core.CreateOperator(
|
||||
"Conv",
|
||||
["SX0", "Sw0", "Sb0"] if use_bias else ["SX0", "Sw0"],
|
||||
["S0"],
|
||||
stride=stride,
|
||||
pad=pad,
|
||||
kernel=kernel,
|
||||
group=group,
|
||||
device_option=dc[0]
|
||||
)
|
||||
conv = core.CreateOperator(
|
||||
@ -153,10 +157,14 @@ class ConvFusionTest(hu.HypothesisTestCase):
|
||||
)
|
||||
|
||||
# Manual fusion for Conv + Sum
|
||||
relu_S1 = core.CreateOperator(
|
||||
"Relu",
|
||||
["S1"],
|
||||
conv_S1 = core.CreateOperator(
|
||||
"Conv",
|
||||
["SX1", "Sw1", "Sb1"] if use_bias else ["SX1", "Sw1"],
|
||||
["S1"],
|
||||
stride=stride,
|
||||
pad=pad,
|
||||
kernel=kernel,
|
||||
group=group,
|
||||
device_option=dc[1]
|
||||
)
|
||||
conv_fusion = core.CreateOperator(
|
||||
@ -170,6 +178,12 @@ class ConvFusionTest(hu.HypothesisTestCase):
|
||||
fusion_type = 2,
|
||||
device_option=dc[1]
|
||||
)
|
||||
SX = np.random.rand(
|
||||
batch_size, input_channels * group, size, size).astype(np.float32) - 0.5
|
||||
Sw = np.random.rand(
|
||||
output_channels * group, input_channels, kernel, kernel) \
|
||||
.astype(np.float32) - 0.5
|
||||
Sb = np.random.rand(output_channels * group).astype(np.float32) - 0.5
|
||||
X = np.random.rand(
|
||||
batch_size, input_channels * group, size, size).astype(np.float32) - 0.5
|
||||
w = np.random.rand(
|
||||
@ -179,23 +193,25 @@ class ConvFusionTest(hu.HypothesisTestCase):
|
||||
|
||||
old_ws_name = workspace.CurrentWorkspace()
|
||||
workspace.SwitchWorkspace("_device_check_", True)
|
||||
workspace.FeedBlob('SX0', SX, dc[0])
|
||||
workspace.FeedBlob('Sw0', Sw, dc[0])
|
||||
workspace.FeedBlob('Sb0', Sb, dc[0])
|
||||
workspace.FeedBlob('X0', X, dc[0])
|
||||
workspace.FeedBlob('w0', w, dc[0])
|
||||
workspace.FeedBlob('b0', b, dc[0])
|
||||
workspace.RunOperatorOnce(conv_S0)
|
||||
workspace.RunOperatorOnce(conv)
|
||||
Y0 = workspace.FetchBlob('Y0')
|
||||
S = np.random.rand(*Y0.shape).astype(np.float32) - 0.5
|
||||
workspace.FeedBlob('S0', S, dc[0])
|
||||
workspace.RunOperatorOnce(relu_S0)
|
||||
workspace.RunOperatorOnce(sum)
|
||||
S0 = workspace.FetchBlob('S0')
|
||||
|
||||
workspace.ResetWorkspace()
|
||||
workspace.FeedBlob('SX1', SX, dc[1])
|
||||
workspace.FeedBlob('Sw1', Sw, dc[1])
|
||||
workspace.FeedBlob('Sb1', Sb, dc[1])
|
||||
workspace.FeedBlob('X1', X, dc[1])
|
||||
workspace.FeedBlob('w1', w, dc[1])
|
||||
workspace.FeedBlob('b1', b, dc[1])
|
||||
workspace.FeedBlob('S1', S, dc[1])
|
||||
workspace.RunOperatorOnce(relu_S1)
|
||||
workspace.RunOperatorOnce(conv_S1)
|
||||
workspace.RunOperatorOnce(conv_fusion)
|
||||
S1 = workspace.FetchBlob('S1')
|
||||
|
||||
@ -208,20 +224,22 @@ class ConvFusionTest(hu.HypothesisTestCase):
|
||||
# Auto fusion for Conv + Sum
|
||||
workspace.ResetWorkspace()
|
||||
old_net = caffe2_pb2.NetDef()
|
||||
relu_S0_old = caffe2_pb2.OperatorDef()
|
||||
relu_S0_old.CopyFrom(relu_S0)
|
||||
relu_S0_old.device_option.CopyFrom(dc[1])
|
||||
conv_S0_old = caffe2_pb2.OperatorDef()
|
||||
conv_S0_old.CopyFrom(conv_S0)
|
||||
conv_S0_old.device_option.CopyFrom(dc[1])
|
||||
conv_old = caffe2_pb2.OperatorDef()
|
||||
conv_old.CopyFrom(conv)
|
||||
conv_old.device_option.CopyFrom(dc[1])
|
||||
sum_old = caffe2_pb2.OperatorDef()
|
||||
sum_old.CopyFrom(sum)
|
||||
sum_old.device_option.CopyFrom(dc[1])
|
||||
old_net.op.extend([relu_S0_old, conv_old, sum_old])
|
||||
old_net.op.extend([conv_S0_old, conv_old, sum_old])
|
||||
workspace.FeedBlob('SX0', SX, dc[1])
|
||||
workspace.FeedBlob('Sw0', Sw, dc[1])
|
||||
workspace.FeedBlob('Sb0', Sb, dc[1])
|
||||
workspace.FeedBlob('X0', X, dc[1])
|
||||
workspace.FeedBlob('w0', w, dc[1])
|
||||
workspace.FeedBlob('b0', b, dc[1])
|
||||
workspace.FeedBlob('S0', S, dc[1])
|
||||
net = core.Net("net")
|
||||
net.Proto().CopyFrom(old_net)
|
||||
optimizeForIDEEP(net)
|
||||
@ -250,10 +268,14 @@ class ConvFusionTest(hu.HypothesisTestCase):
|
||||
def test_convolution_sum_relu_fusion(self, stride, pad, kernel, size,
|
||||
input_channels, output_channels,
|
||||
batch_size, use_bias, group, gc, dc):
|
||||
relu_S0 = core.CreateOperator(
|
||||
"Relu",
|
||||
["S0"],
|
||||
conv_S0 = core.CreateOperator(
|
||||
"Conv",
|
||||
["SX0", "Sw0", "Sb0"] if use_bias else ["SX0", "Sw0"],
|
||||
["S0"],
|
||||
stride=stride,
|
||||
pad=pad,
|
||||
kernel=kernel,
|
||||
group=group,
|
||||
device_option=dc[0]
|
||||
)
|
||||
conv = core.CreateOperator(
|
||||
@ -280,10 +302,14 @@ class ConvFusionTest(hu.HypothesisTestCase):
|
||||
)
|
||||
|
||||
# Manual fusion for Conv + Sum + ReLU
|
||||
relu_S1 = core.CreateOperator(
|
||||
"Relu",
|
||||
["S1"],
|
||||
conv_S1 = core.CreateOperator(
|
||||
"Conv",
|
||||
["SX1", "Sw1", "Sb1"] if use_bias else ["SX1", "Sw1"],
|
||||
["S1"],
|
||||
stride=stride,
|
||||
pad=pad,
|
||||
kernel=kernel,
|
||||
group=group,
|
||||
device_option=dc[1]
|
||||
)
|
||||
conv_fusion = core.CreateOperator(
|
||||
@ -297,6 +323,12 @@ class ConvFusionTest(hu.HypothesisTestCase):
|
||||
fusion_type = 3,
|
||||
device_option=dc[1]
|
||||
)
|
||||
SX = np.random.rand(
|
||||
batch_size, input_channels * group, size, size).astype(np.float32) - 0.5
|
||||
Sw = np.random.rand(
|
||||
output_channels * group, input_channels, kernel, kernel) \
|
||||
.astype(np.float32) - 0.5
|
||||
Sb = np.random.rand(output_channels * group).astype(np.float32) - 0.5
|
||||
X = np.random.rand(
|
||||
batch_size, input_channels * group, size, size).astype(np.float32) - 0.5
|
||||
w = np.random.rand(
|
||||
@ -306,24 +338,26 @@ class ConvFusionTest(hu.HypothesisTestCase):
|
||||
|
||||
old_ws_name = workspace.CurrentWorkspace()
|
||||
workspace.SwitchWorkspace("_device_check_", True)
|
||||
workspace.FeedBlob('SX0', SX, dc[0])
|
||||
workspace.FeedBlob('Sw0', Sw, dc[0])
|
||||
workspace.FeedBlob('Sb0', Sb, dc[0])
|
||||
workspace.FeedBlob('X0', X, dc[0])
|
||||
workspace.FeedBlob('w0', w, dc[0])
|
||||
workspace.FeedBlob('b0', b, dc[0])
|
||||
workspace.RunOperatorOnce(conv_S0)
|
||||
workspace.RunOperatorOnce(conv)
|
||||
Y0 = workspace.FetchBlob('Y0')
|
||||
S = np.random.rand(*Y0.shape).astype(np.float32) - 0.5
|
||||
workspace.FeedBlob('S0', S, dc[0])
|
||||
workspace.RunOperatorOnce(relu_S0)
|
||||
workspace.RunOperatorOnce(sum)
|
||||
workspace.RunOperatorOnce(relu)
|
||||
S0 = workspace.FetchBlob('S0')
|
||||
|
||||
workspace.ResetWorkspace()
|
||||
workspace.FeedBlob('SX1', SX, dc[1])
|
||||
workspace.FeedBlob('Sw1', Sw, dc[1])
|
||||
workspace.FeedBlob('Sb1', Sb, dc[1])
|
||||
workspace.FeedBlob('X1', X, dc[1])
|
||||
workspace.FeedBlob('w1', w, dc[1])
|
||||
workspace.FeedBlob('b1', b, dc[1])
|
||||
workspace.FeedBlob('S1', S, dc[1])
|
||||
workspace.RunOperatorOnce(relu_S1)
|
||||
workspace.RunOperatorOnce(conv_S1)
|
||||
workspace.RunOperatorOnce(conv_fusion)
|
||||
S1 = workspace.FetchBlob('S1')
|
||||
|
||||
@ -336,9 +370,9 @@ class ConvFusionTest(hu.HypothesisTestCase):
|
||||
# Auto fusion for Conv + Sum + ReLU
|
||||
workspace.ResetWorkspace()
|
||||
old_net = caffe2_pb2.NetDef()
|
||||
relu_S0_old = caffe2_pb2.OperatorDef()
|
||||
relu_S0_old.CopyFrom(relu_S0)
|
||||
relu_S0_old.device_option.CopyFrom(dc[1])
|
||||
conv_S0_old = caffe2_pb2.OperatorDef()
|
||||
conv_S0_old.CopyFrom(conv_S0)
|
||||
conv_S0_old.device_option.CopyFrom(dc[1])
|
||||
conv_old = caffe2_pb2.OperatorDef()
|
||||
conv_old.CopyFrom(conv)
|
||||
conv_old.device_option.CopyFrom(dc[1])
|
||||
@ -348,11 +382,13 @@ class ConvFusionTest(hu.HypothesisTestCase):
|
||||
relu_old = caffe2_pb2.OperatorDef()
|
||||
relu_old.CopyFrom(relu)
|
||||
relu_old.device_option.CopyFrom(dc[1])
|
||||
old_net.op.extend([relu_S0_old, conv_old, sum_old, relu_old])
|
||||
old_net.op.extend([conv_S0_old, conv_old, sum_old, relu_old])
|
||||
workspace.FeedBlob('SX0', SX, dc[1])
|
||||
workspace.FeedBlob('Sw0', Sw, dc[1])
|
||||
workspace.FeedBlob('Sb0', Sb, dc[1])
|
||||
workspace.FeedBlob('X0', X, dc[1])
|
||||
workspace.FeedBlob('w0', w, dc[1])
|
||||
workspace.FeedBlob('b0', b, dc[1])
|
||||
workspace.FeedBlob('S0', S, dc[1])
|
||||
net = core.Net("net")
|
||||
net.Proto().CopyFrom(old_net)
|
||||
optimizeForIDEEP(net)
|
||||
|
@ -9,7 +9,7 @@ from random import randint
|
||||
from caffe2.proto import caffe2_pb2
|
||||
from caffe2.python import core, workspace
|
||||
|
||||
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
|
||||
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
|
||||
class CopyTest(unittest.TestCase):
|
||||
def _get_deep_device(self):
|
||||
return caffe2_pb2.DeviceOption(device_type=caffe2_pb2.IDEEP)
|
||||
|
@ -14,7 +14,7 @@ import caffe2.python.hypothesis_test_util as hu
|
||||
import caffe2.python.ideep_test_util as mu
|
||||
|
||||
|
||||
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
|
||||
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
|
||||
class DropoutTest(hu.HypothesisTestCase):
|
||||
|
||||
@given(X=hu.tensor(),
|
||||
|
@ -12,7 +12,7 @@ import caffe2.python.hypothesis_test_util as hu
|
||||
import caffe2.python.ideep_test_util as mu
|
||||
|
||||
|
||||
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
|
||||
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
|
||||
class ElementwiseSumTest(hu.HypothesisTestCase):
|
||||
@given(size=st.integers(7, 9),
|
||||
input_channels=st.integers(1, 3),
|
||||
|
@ -11,7 +11,7 @@ from caffe2.python import core, workspace
|
||||
import caffe2.python.hypothesis_test_util as hu
|
||||
import caffe2.python.ideep_test_util as mu
|
||||
|
||||
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
|
||||
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
|
||||
class FcTest(hu.HypothesisTestCase):
|
||||
@given(n=st.integers(1, 5), m=st.integers(1, 5),
|
||||
k=st.integers(1, 5), **mu.gcs)
|
||||
|
@ -12,7 +12,7 @@ from hypothesis import given
|
||||
import caffe2.python.ideep_test_util as mu
|
||||
|
||||
|
||||
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
|
||||
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
|
||||
class TestMomentumSGDUpdateOps(hu.HypothesisTestCase):
|
||||
@given(n=st.integers(4, 8), nesterov=st.booleans(),
|
||||
**mu.gcs)
|
||||
|
@ -13,7 +13,7 @@ import caffe2.python.hypothesis_test_util as hu
|
||||
import caffe2.python.ideep_test_util as mu
|
||||
|
||||
|
||||
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
|
||||
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
|
||||
class TestFallbackOps(hu.HypothesisTestCase):
|
||||
@given(stride=st.integers(1, 3),
|
||||
pad=st.integers(0, 3),
|
||||
|
@ -11,7 +11,7 @@ from caffe2.python import core, workspace
|
||||
import caffe2.python.hypothesis_test_util as hu
|
||||
import caffe2.python.ideep_test_util as mu
|
||||
|
||||
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
|
||||
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
|
||||
class PoolTest(hu.HypothesisTestCase):
|
||||
@given(stride=st.integers(1, 3),
|
||||
pad=st.integers(0, 3),
|
||||
|
@ -11,7 +11,7 @@ from caffe2.python import core, workspace
|
||||
import caffe2.python.hypothesis_test_util as hu
|
||||
import caffe2.python.ideep_test_util as mu
|
||||
|
||||
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
|
||||
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
|
||||
class ReluTest(hu.HypothesisTestCase):
|
||||
@given(X=hu.tensor(),
|
||||
inplace=st.booleans(),
|
||||
|
@ -11,7 +11,7 @@ from caffe2.python import core, workspace
|
||||
import caffe2.python.hypothesis_test_util as hu
|
||||
import caffe2.python.ideep_test_util as mu
|
||||
|
||||
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
|
||||
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
|
||||
class SoftmaxTest(hu.HypothesisTestCase):
|
||||
@given(size=st.integers(8, 20),
|
||||
input_channels=st.integers(1, 3),
|
||||
|
@ -13,7 +13,7 @@ from caffe2.python.model_helper import ModelHelper
|
||||
import caffe2.python.ideep_test_util as mu
|
||||
|
||||
|
||||
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
|
||||
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
|
||||
class TestSpatialBN(hu.HypothesisTestCase):
|
||||
@given(size=st.integers(7, 10),
|
||||
input_channels=st.integers(7, 10),
|
||||
|
@ -12,7 +12,7 @@ import caffe2.python.hypothesis_test_util as hu
|
||||
import caffe2.python.ideep_test_util as mu
|
||||
|
||||
|
||||
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
|
||||
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
|
||||
class SqueezeTest(hu.HypothesisTestCase):
|
||||
@given(
|
||||
squeeze_dims=st.lists(st.integers(0, 3), min_size=1, max_size=3),
|
||||
|
@ -180,8 +180,7 @@ def complex_resnet():
|
||||
return model, [(1, 1, 224, 224)]
|
||||
|
||||
|
||||
@unittest.skipIf(not workspace.C.use_ideep,
|
||||
"Skipping as we do not have IDEEP.")
|
||||
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
|
||||
class MKLRewriteTest(hu.HypothesisTestCase):
|
||||
@given(gen=st.sampled_from([simple_relu, simple_fc,
|
||||
simple_mlp, simple_cnn]))
|
||||
|
@ -977,12 +977,12 @@ void addGlobalMethods(py::module& m) {
|
||||
// keep this Python attribute for BC
|
||||
m.attr("has_mkldnn") = py::bool_(false);
|
||||
|
||||
m.attr("use_ideep") = py::bool_(
|
||||
#ifdef CAFFE2_USE_IDEEP
|
||||
m.attr("use_mkldnn") = py::bool_(
|
||||
#ifdef CAFFE2_USE_MKLDNN
|
||||
true
|
||||
#else // CAFFE2_USE_IDEEP
|
||||
#else // CAFFE2_USE_MKLDNN
|
||||
false
|
||||
#endif // CAFFE2_USE_IDEEP
|
||||
#endif // CAFFE2_USE_MKLDNN
|
||||
);
|
||||
|
||||
m.attr("use_trt") = py::bool_(
|
||||
|
@ -347,17 +347,7 @@ class TestWorkspaceGPU(test_util.TestCase):
|
||||
self.assertEqual(pattern.shape[0], workspace.NumCudaDevices())
|
||||
|
||||
|
||||
@unittest.skipIf(not workspace.C.has_mkldnn, "No MKLDNN support.")
|
||||
class TestWorkspaceMKLDNN(test_util.TestCase):
|
||||
|
||||
def testFeedFetchBlobMKLDNN(self):
|
||||
arr = np.random.randn(2, 3).astype(np.float32)
|
||||
workspace.FeedBlob(
|
||||
"testblob_mkldnn", arr, core.DeviceOption(caffe2_pb2.MKLDNN))
|
||||
fetched = workspace.FetchBlob("testblob_mkldnn")
|
||||
np.testing.assert_array_equal(arr, fetched)
|
||||
|
||||
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
|
||||
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
|
||||
class TestWorkspaceIDEEP(test_util.TestCase):
|
||||
|
||||
def testFeedFetchBlobIDEEP(self):
|
||||
|
@ -1,6 +1,6 @@
|
||||
#include "caffe2/sgd/iter_op.h"
|
||||
|
||||
#ifdef CAFFE2_USE_IDEEP
|
||||
#ifdef CAFFE2_USE_MKLDNN
|
||||
#include <caffe2/ideep/operators/operator_fallback_ideep.h>
|
||||
#include <caffe2/ideep/utils/ideep_operator.h>
|
||||
#endif
|
||||
@ -28,7 +28,7 @@ void MutexDeserializer::Deserialize(const BlobProto& /* unused */, Blob* blob) {
|
||||
REGISTER_CPU_OPERATOR(Iter, IterOp<CPUContext>);
|
||||
REGISTER_CPU_OPERATOR(AtomicIter, AtomicIterOp<CPUContext>);
|
||||
|
||||
#ifdef CAFFE2_USE_IDEEP
|
||||
#ifdef CAFFE2_USE_MKLDNN
|
||||
REGISTER_IDEEP_OPERATOR(AtomicIter, IDEEPFallbackOp<AtomicIterOp<CPUContext>>);
|
||||
#endif
|
||||
|
||||
|
@ -72,10 +72,12 @@ elseif(BLAS STREQUAL "MKL")
|
||||
if(MKL_FOUND)
|
||||
include_directories(SYSTEM ${MKL_INCLUDE_DIR})
|
||||
list(APPEND Caffe2_PUBLIC_DEPENDENCY_LIBS caffe2::mkl)
|
||||
set(CAFFE2_USE_MKL ON)
|
||||
else()
|
||||
message(WARNING "MKL could not be found. Defaulting to Eigen")
|
||||
set(BLAS "Eigen" CACHE STRING "Selected BLAS library")
|
||||
set(CAFFE2_USE_EIGEN_FOR_BLAS ON)
|
||||
set(CAFFE2_USE_MKL OFF)
|
||||
endif()
|
||||
elseif(BLAS STREQUAL "vecLib")
|
||||
find_package(vecLib REQUIRED)
|
||||
@ -1247,19 +1249,21 @@ if (NOT BUILD_ATEN_MOBILE)
|
||||
set(AT_ROCM_ENABLED 1)
|
||||
ENDIF()
|
||||
|
||||
if (NO_MKLDNN)
|
||||
message("disabling MKLDNN because NO_MKLDNN is set")
|
||||
set(AT_MKLDNN_ENABLED 0)
|
||||
else()
|
||||
find_package(MKLDNN)
|
||||
if(NOT MKLDNN_FOUND)
|
||||
message(STATUS "MKLDNN not found. Compiling without MKLDNN support")
|
||||
set(AT_MKLDNN_ENABLED 0)
|
||||
else()
|
||||
include_directories(SYSTEM ${MKLDNN_INCLUDE_DIRS})
|
||||
set(AT_MKLDNN_ENABLED 1)
|
||||
endif()
|
||||
endif()
|
||||
SET(AT_MKLDNN_ENABLED 0)
|
||||
SET(CAFFE2_USE_MKLDNN OFF)
|
||||
IF (USE_MKLDNN)
|
||||
FIND_PACKAGE(MKLDNN)
|
||||
IF(MKLDNN_FOUND)
|
||||
SET(AT_MKLDNN_ENABLED 1)
|
||||
SET(CAFFE2_USE_MKLDNN ON)
|
||||
INCLUDE_DIRECTORIES(SYSTEM ${MKLDNN_INCLUDE_DIR})
|
||||
LIST(APPEND Caffe2_PUBLIC_DEPENDENCY_LIBS ${MKLDNN_LIBRARIES})
|
||||
ELSE()
|
||||
MESSAGE(WARNING "MKLDNN could not be found.")
|
||||
ENDIF()
|
||||
ELSE()
|
||||
MESSAGE("disabling MKLDNN because USE_MKLDNN is not set")
|
||||
ENDIF()
|
||||
|
||||
IF(UNIX AND NOT APPLE)
|
||||
INCLUDE(CheckLibraryExists)
|
||||
|
@ -1,13 +1,5 @@
|
||||
# - Find INTEL MKL library
|
||||
#
|
||||
# This module finds the Intel Mkl libraries.
|
||||
#
|
||||
# USE_IDEEP : use IDEEP interface
|
||||
# USE_MKLML : use MKLML interface
|
||||
# MKLML_USE_SINGLE_DYNAMIC_LIBRARY : use single dynamic library interface
|
||||
# MKLML_USE_STATIC_LIBS : use static libraries
|
||||
# MKLML_MULTI_THREADED : use multi-threading
|
||||
#
|
||||
# This module sets the following variables:
|
||||
# MKL_FOUND - set to true if a library implementing the CBLAS interface is found
|
||||
# MKL_VERSION - best guess
|
||||
@ -69,7 +61,6 @@ ENDIF (CMAKE_COMPILER_IS_GNUCC)
|
||||
SET(mklkerlibs "mc" "mc3" "nc" "p4n" "p4m" "p4m3" "p4p" "def")
|
||||
SET(mklseq)
|
||||
|
||||
|
||||
# Paths
|
||||
SET(saved_CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH})
|
||||
SET(saved_CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH})
|
||||
@ -142,7 +133,7 @@ MACRO(CHECK_ALL_LIBRARIES LIBRARIES _name _list _flags)
|
||||
IF(${_library} STREQUAL "gomp")
|
||||
FIND_PACKAGE(OpenMP)
|
||||
IF(OPENMP_FOUND)
|
||||
SET(${_prefix}_${_library}_LIBRARY ${OpenMP_C_FLAGS})
|
||||
SET(${_prefix}_${_library}_LIBRARY ${OpenMP_C_FLAGS})
|
||||
ENDIF(OPENMP_FOUND)
|
||||
ELSE(${_library} STREQUAL "gomp")
|
||||
FIND_LIBRARY(${_prefix}_${_library}_LIBRARY NAMES ${_library})
|
||||
@ -174,19 +165,19 @@ MACRO(CHECK_ALL_LIBRARIES LIBRARIES _name _list _flags)
|
||||
ENDIF(_libraries_work)
|
||||
ENDMACRO(CHECK_ALL_LIBRARIES)
|
||||
|
||||
if(WIN32)
|
||||
set(mkl_m "")
|
||||
set(mkl_pthread "")
|
||||
else(WIN32)
|
||||
set(mkl_m "m")
|
||||
set(mkl_pthread "pthread")
|
||||
endif(WIN32)
|
||||
IF(WIN32)
|
||||
SET(mkl_m "")
|
||||
SET(mkl_pthread "")
|
||||
ELSE(WIN32)
|
||||
SET(mkl_m "m")
|
||||
SET(mkl_pthread "pthread")
|
||||
ENDIF(WIN32)
|
||||
|
||||
if(UNIX AND NOT APPLE)
|
||||
set(mkl_dl "${CMAKE_DL_LIBS}")
|
||||
else(UNIX AND NOT APPLE)
|
||||
set(mkl_dl "")
|
||||
endif(UNIX AND NOT APPLE)
|
||||
IF(UNIX AND NOT APPLE)
|
||||
SET(mkl_dl "${CMAKE_DL_LIBS}")
|
||||
ELSE(UNIX AND NOT APPLE)
|
||||
SET(mkl_dl "")
|
||||
ENDIF(UNIX AND NOT APPLE)
|
||||
|
||||
# Check for version 10/11
|
||||
IF (NOT MKL_LIBRARIES)
|
||||
@ -286,7 +277,7 @@ SET(CMAKE_LIBRARY_PATH ${saved_CMAKE_LIBRARY_PATH})
|
||||
SET(CMAKE_INCLUDE_PATH ${saved_CMAKE_INCLUDE_PATH})
|
||||
IF (MKL_LIBRARIES AND MKL_INCLUDE_DIR)
|
||||
SET(MKL_FOUND TRUE)
|
||||
set(MKL_cmake_included true)
|
||||
SET(MKL_cmake_included true)
|
||||
ELSE (MKL_LIBRARIES AND MKL_INCLUDE_DIR)
|
||||
SET(MKL_FOUND FALSE)
|
||||
SET(MKL_VERSION)
|
||||
@ -301,86 +292,8 @@ IF(NOT MKL_FIND_QUIETLY)
|
||||
MESSAGE(STATUS "MKL library found")
|
||||
ELSE(MKL_FOUND)
|
||||
MESSAGE(STATUS "MKL library not found")
|
||||
return()
|
||||
ENDIF(MKL_FOUND)
|
||||
ENDIF(NOT MKL_FIND_QUIETLY)
|
||||
|
||||
# MKLML is included in the MKL package
|
||||
if (USE_MKL AND USE_MKLML)
|
||||
set(CAFFE2_USE_MKL 1)
|
||||
endif()
|
||||
|
||||
if (USE_MKL AND USE_IDEEP)
|
||||
set(IDEEP_ROOT "${PROJECT_SOURCE_DIR}/third_party/ideep")
|
||||
set(MKLDNN_ROOT "${IDEEP_ROOT}/mkl-dnn")
|
||||
find_path(IDEEP_INCLUDE_DIR ideep.hpp PATHS ${IDEEP_ROOT} PATH_SUFFIXES include)
|
||||
find_path(MKLDNN_INCLUDE_DIR_HACK mkldnn.hpp mkldnn.h PATHS ${MKLDNN_ROOT} PATH_SUFFIXES include)
|
||||
if (NOT MKLDNN_INCLUDE_DIR_HACK)
|
||||
execute_process(COMMAND git submodule update --init mkl-dnn WORKING_DIRECTORY ${IDEEP_ROOT})
|
||||
find_path(MKLDNN_INCLUDE_DIR_HACK mkldnn.hpp mkldnn.h PATHS ${MKLDNN_ROOT} PATH_SUFFIXES include)
|
||||
endif()
|
||||
|
||||
if (MKLDNN_INCLUDE_DIR_HACK)
|
||||
list(APPEND IDEEP_INCLUDE_DIR ${MKLDNN_INCLUDE_DIR_HACK})
|
||||
list(APPEND __ideep_looked_for MKLDNN_INCLUDE_DIR_HACK)
|
||||
# to avoid adding conflicting submodels
|
||||
set(ORIG_WITH_TEST ${WITH_TEST})
|
||||
set(WITH_TEST OFF)
|
||||
add_subdirectory(${IDEEP_ROOT})
|
||||
set(WITH_TEST ${ORIG_WITH_TEST})
|
||||
|
||||
# If we cannot find MKL, we will use the Intel MKL Small library comes with ${MKLDNN_ROOT}/external
|
||||
file(GLOB_RECURSE MKLML_INNER_INCLUDE_DIR ${MKLDNN_ROOT}/external/*/mkl_vsl.h)
|
||||
if(NOT MKL_FOUND AND MKLML_INNER_INCLUDE_DIR)
|
||||
# if user has multiple version under external/ then guess last
|
||||
# one alphabetically is "latest" and warn
|
||||
list(LENGTH MKLML_INNER_INCLUDE_DIR MKLINCLEN)
|
||||
if(MKLINCLEN GREATER 1)
|
||||
list(SORT MKLML_INNER_INCLUDE_DIR)
|
||||
list(REVERSE MKLML_INNER_INCLUDE_DIR)
|
||||
list(GET MKLML_INNER_INCLUDE_DIR 0 MKLINCLST)
|
||||
set(MKLML_INNER_INCLUDE_DIR "${MKLINCLST}")
|
||||
endif()
|
||||
get_filename_component(MKLML_INNER_INCLUDE_DIR ${MKLML_INNER_INCLUDE_DIR} DIRECTORY)
|
||||
list(APPEND IDEEP_INCLUDE_DIR ${MKLML_INNER_INCLUDE_DIR})
|
||||
list(APPEND __ideep_looked_for MKLML_INNER_INCLUDE_DIR)
|
||||
|
||||
if(APPLE)
|
||||
set(__mklml_inner_libs mklml iomp5)
|
||||
else()
|
||||
set(__mklml_inner_libs mklml_intel iomp5)
|
||||
endif()
|
||||
|
||||
set(IDEEP_LIBRARIES "")
|
||||
foreach (__mklml_inner_lib ${__mklml_inner_libs})
|
||||
string(TOUPPER ${__mklml_inner_lib} __mklml_inner_lib_upper)
|
||||
find_library(${__mklml_inner_lib_upper}_LIBRARY
|
||||
NAMES ${__mklml_inner_lib}
|
||||
PATHS "${MKLML_INNER_INCLUDE_DIR}/../lib"
|
||||
DOC "The path to Intel(R) MKLML ${__mklml_inner_lib} library")
|
||||
mark_as_advanced(${__mklml_inner_lib_upper}_LIBRARY)
|
||||
list(APPEND IDEEP_LIBRARIES ${${__mklml_inner_lib_upper}_LIBRARY})
|
||||
list(APPEND __ideep_looked_for ${__mklml_inner_lib_upper}_LIBRARY)
|
||||
endforeach()
|
||||
endif() # NOT MKL_FOUND AND MKLML_INNER_INCLUDE_DIR
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(IDEEP DEFAULT_MSG ${__ideep_looked_for})
|
||||
|
||||
if(IDEEP_FOUND)
|
||||
set(MKLDNN_LIB "${CMAKE_SHARED_LIBRARY_PREFIX}mkldnn${CMAKE_SHARED_LIBRARY_SUFFIX}")
|
||||
list(APPEND IDEEP_LIBRARIES "${PROJECT_BINARY_DIR}/lib/${MKLDNN_LIB}")
|
||||
message(STATUS "Found IDEEP (include: ${IDEEP_INCLUDE_DIR}, lib: ${IDEEP_LIBRARIES})")
|
||||
set(CAFFE2_USE_IDEEP 1)
|
||||
list(APPEND MKL_INCLUDE_DIR ${IDEEP_INCLUDE_DIR})
|
||||
list(APPEND MKL_LIBRARIES ${IDEEP_LIBRARIES})
|
||||
else()
|
||||
message(FATAL_ERROR "Did not find IDEEP files!")
|
||||
endif()
|
||||
|
||||
caffe_clear_vars(__ideep_looked_for __mklml_inner_libs)
|
||||
endif() # MKLDNN_INCLUDE_DIR_HACK
|
||||
endif() # USE_IDEEP
|
||||
|
||||
# Do nothing if MKL_FOUND was set before!
|
||||
ENDIF (NOT MKL_FOUND)
|
||||
|
@ -1,32 +1,115 @@
|
||||
# - Try to find MKLDNN
|
||||
#
|
||||
# The following variables are optionally searched for defaults
|
||||
# MKLDNN_ROOT_DIR: Base directory where all MKLDNN components are found
|
||||
# MKL_FOUND : set to true if a library implementing the CBLAS interface is found
|
||||
# USE_MKLDNN
|
||||
#
|
||||
# The following are set after configuration is done:
|
||||
# MKLDNN_FOUND
|
||||
# MKLDNN_INCLUDE_DIRS
|
||||
# MKLDNN_LIBRARIES
|
||||
# MKLDNN_LIBRARY_DIRS
|
||||
# MKLDNN_FOUND : set to true if mkl-dnn is found.
|
||||
# MKLDNN_INCLUDE_DIR : path to mkl-dnn include dir.
|
||||
# MKLDNN_LIBRARIES : list of libraries for mkl-dnn
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
IF (NOT MKLDNN_FOUND)
|
||||
|
||||
set(MKLDNN_ROOT_DIR "" CACHE PATH "Folder contains Intel MKLDNN")
|
||||
SET(MKLDNN_LIBRARIES)
|
||||
SET(MKLDNN_INCLUDE_DIR)
|
||||
|
||||
find_path(MKLDNN_INCLUDE_DIR mkldnn.h
|
||||
HINTS ${MKLDNN_ROOT_DIR}
|
||||
PATH_SUFFIXES include)
|
||||
IF (NOT USE_MKLDNN)
|
||||
RETURN()
|
||||
ENDIF(NOT USE_MKLDNN)
|
||||
|
||||
find_library(MKLDNN_LIBRARY mkldnn
|
||||
HINTS ${MKLDNN_LIB_DIR} ${MKLDNN_ROOT_DIR}
|
||||
PATH_SUFFIXES lib lib64)
|
||||
IF(MSVC)
|
||||
MESSAGE(STATUS "MKL-DNN needs omp 3+ which is not supported in MSVC so far")
|
||||
RETURN()
|
||||
ENDIF(MSVC)
|
||||
|
||||
find_package_handle_standard_args(
|
||||
MKLDNN DEFAULT_MSG MKLDNN_INCLUDE_DIR MKLDNN_LIBRARY)
|
||||
SET(IDEEP_ROOT "${PROJECT_SOURCE_DIR}/third_party/ideep")
|
||||
SET(MKLDNN_ROOT "${IDEEP_ROOT}/mkl-dnn")
|
||||
|
||||
if(MKLDNN_FOUND)
|
||||
set(MKLDNN_INCLUDE_DIRS ${MKLDNN_INCLUDE_DIR})
|
||||
set(MKLDNN_LIBRARIES ${MKLDNN_LIBRARY})
|
||||
message(STATUS "Found MKLDNN (include: ${MKLDNN_INCLUDE_DIR}, library: ${MKLDNN_LIBRARY})")
|
||||
mark_as_advanced(MKLDNN_ROOT_DIR MKLDNN_LIBRARY MKLDNN_INCLUDE_DIR)
|
||||
endif()
|
||||
FIND_PACKAGE(BLAS)
|
||||
FIND_PATH(IDEEP_INCLUDE_DIR ideep.hpp PATHS ${IDEEP_ROOT} PATH_SUFFIXES include)
|
||||
FIND_PATH(MKLDNN_INCLUDE_DIR mkldnn.hpp mkldnn.h PATHS ${MKLDNN_ROOT} PATH_SUFFIXES include)
|
||||
IF (NOT MKLDNN_INCLUDE_DIR)
|
||||
EXECUTE_PROCESS(COMMAND git${CMAKE_EXECUTABLE_SUFFIX} submodule update --init mkl-dnn WORKING_DIRECTORY ${IDEEP_ROOT})
|
||||
FIND_PATH(MKLDNN_INCLUDE_DIR mkldnn.hpp mkldnn.h PATHS ${MKLDNN_ROOT} PATH_SUFFIXES include)
|
||||
ENDIF(NOT MKLDNN_INCLUDE_DIR)
|
||||
|
||||
IF (NOT IDEEP_INCLUDE_DIR OR NOT MKLDNN_INCLUDE_DIR)
|
||||
MESSAGE(STATUS "MKLDNN source files not found!")
|
||||
RETURN()
|
||||
ENDIF(NOT IDEEP_INCLUDE_DIR OR NOT MKLDNN_INCLUDE_DIR)
|
||||
LIST(APPEND MKLDNN_INCLUDE_DIR ${IDEEP_INCLUDE_DIR})
|
||||
|
||||
IF(MKL_FOUND)
|
||||
LIST(APPEND MKLDNN_LIBRARIES ${MKL_LIBRARIES})
|
||||
LIST(APPEND MKLDNN_INCLUDE_DIR ${MKL_INCLUDE_DIR})
|
||||
|
||||
ELSE(MKL_FOUND)
|
||||
# If we cannot find MKL, we will use the Intel MKL Small library
|
||||
# comes with ${MKLDNN_ROOT}/external
|
||||
IF(NOT IS_DIRECTORY ${MKLDNN_ROOT}/external)
|
||||
IF(UNIX)
|
||||
EXECUTE_PROCESS(COMMAND "${MKLDNN_ROOT}/scripts/prepare_mkl.sh" RESULT_VARIABLE __result)
|
||||
ELSE(UNIX)
|
||||
EXECUTE_PROCESS(COMMAND "${MKLDNN_ROOT}/scripts/prepare_mkl.bat" RESULT_VARIABLE __result)
|
||||
ENDIF(UNIX)
|
||||
ENDIF(NOT IS_DIRECTORY ${MKLDNN_ROOT}/external)
|
||||
|
||||
FILE(GLOB_RECURSE MKLML_INNER_INCLUDE_DIR ${MKLDNN_ROOT}/external/*/mkl.h)
|
||||
IF(MKLML_INNER_INCLUDE_DIR)
|
||||
# if user has multiple version under external/ then guess last
|
||||
# one alphabetically is "latest" and warn
|
||||
LIST(LENGTH MKLML_INNER_INCLUDE_DIR MKLINCLEN)
|
||||
IF(MKLINCLEN GREATER 1)
|
||||
LIST(SORT MKLML_INNER_INCLUDE_DIR)
|
||||
LIST(REVERSE MKLML_INNER_INCLUDE_DIR)
|
||||
LIST(GET MKLML_INNER_INCLUDE_DIR 0 MKLINCLST)
|
||||
SET(MKLML_INNER_INCLUDE_DIR "${MKLINCLST}")
|
||||
ENDIF(MKLINCLEN GREATER 1)
|
||||
GET_FILENAME_COMPONENT(MKLML_INNER_INCLUDE_DIR ${MKLML_INNER_INCLUDE_DIR} DIRECTORY)
|
||||
LIST(APPEND MKLDNN_INCLUDE_DIR ${MKLML_INNER_INCLUDE_DIR})
|
||||
|
||||
IF(APPLE)
|
||||
SET(__mklml_inner_libs mklml iomp5)
|
||||
ELSE(APPLE)
|
||||
SET(__mklml_inner_libs mklml_intel iomp5)
|
||||
ENDIF(APPLE)
|
||||
|
||||
FOREACH(__mklml_inner_lib ${__mklml_inner_libs})
|
||||
STRING(TOUPPER ${__mklml_inner_lib} __mklml_inner_lib_upper)
|
||||
FIND_LIBRARY(${__mklml_inner_lib_upper}_LIBRARY
|
||||
NAMES ${__mklml_inner_lib}
|
||||
PATHS "${MKLML_INNER_INCLUDE_DIR}/../lib"
|
||||
DOC "The path to Intel(R) MKLML ${__mklml_inner_lib} library")
|
||||
MARK_AS_ADVANCED(${__mklml_inner_lib_upper}_LIBRARY)
|
||||
LIST(APPEND MKLDNN_LIBRARIES ${${__mklml_inner_lib_upper}_LIBRARY})
|
||||
ENDFOREACH(__mklml_inner_lib)
|
||||
ENDIF(MKLML_INNER_INCLUDE_DIR)
|
||||
ENDIF(MKL_FOUND)
|
||||
|
||||
LIST(APPEND __mkldnn_looked_for MKLDNN_LIBRARIES)
|
||||
LIST(APPEND __mkldnn_looked_for MKLDNN_INCLUDE_DIR)
|
||||
INCLUDE(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(MKLDNN DEFAULT_MSG ${__mkldnn_looked_for})
|
||||
|
||||
IF(MKLDNN_FOUND)
|
||||
IF(NOT APPLE AND CMAKE_COMPILER_IS_GNUCC)
|
||||
ADD_COMPILE_OPTIONS(-Wno-maybe-uninitialized)
|
||||
ENDIF(NOT APPLE AND CMAKE_COMPILER_IS_GNUCC)
|
||||
SET(WITH_TEST FALSE CACHE BOOL "build with mkl-dnn test" FORCE)
|
||||
SET(WITH_EXAMPLE FALSE CACHE BOOL "build with mkl-dnn examples" FORCE)
|
||||
ADD_SUBDIRECTORY(${MKLDNN_ROOT})
|
||||
SET(MKLDNN_LIB "${CMAKE_SHARED_LIBRARY_PREFIX}mkldnn${CMAKE_SHARED_LIBRARY_SUFFIX}")
|
||||
IF(MINGW OR MSVC)
|
||||
LIST(APPEND MKLDNN_LIBRARIES "${PROJECT_BINARY_DIR}/bin/${MKLDNN_LIB}")
|
||||
ELSE(MINGW OR MSVC)
|
||||
LIST(APPEND MKLDNN_LIBRARIES "${PROJECT_BINARY_DIR}/lib/${MKLDNN_LIB}")
|
||||
ENDIF(MINGW OR MSVC)
|
||||
ELSE(MKLDNN_FOUND)
|
||||
MESSAGE(STATUS "MKLDNN source files not found!")
|
||||
ENDIF(MKLDNN_FOUND)
|
||||
|
||||
UNSET(__mklml_inner_libs)
|
||||
UNSET(__mkldnn_looked_for)
|
||||
|
||||
ENDIF(NOT MKLDNN_FOUND)
|
||||
|
@ -95,11 +95,8 @@ function (caffe2_print_configuration_summary)
|
||||
message(STATUS " LMDB version : ${LMDB_VERSION}")
|
||||
endif()
|
||||
message(STATUS " USE_METAL : ${USE_METAL}")
|
||||
message(STATUS " USE_MKL : ${USE_MKL}")
|
||||
if(${USE_MKL})
|
||||
message(STATUS " USE_MKLML : ${USE_MKLML}")
|
||||
message(STATUS " USE_IDEEP : ${USE_IDEEP}")
|
||||
endif()
|
||||
message(STATUS " USE_MKL : ${CAFFE2_USE_MKL}")
|
||||
message(STATUS " USE_MKLDNN : ${CAFFE2_USE_MKLDNN}")
|
||||
message(STATUS " USE_MOBILE_OPENGL : ${USE_MOBILE_OPENGL}")
|
||||
message(STATUS " USE_NCCL : ${USE_NCCL}")
|
||||
if(${USE_NCCL})
|
||||
|
@ -15,14 +15,14 @@
|
||||
*/
|
||||
|
||||
#include "batch_permutation_op.h"
|
||||
#ifdef CAFFE2_USE_IDEEP
|
||||
#ifdef CAFFE2_USE_MKLDNN
|
||||
#include <caffe2/ideep/operators/operator_fallback_ideep.h>
|
||||
#include <caffe2/ideep/utils/ideep_operator.h>
|
||||
#endif
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
#ifdef CAFFE2_USE_IDEEP
|
||||
#ifdef CAFFE2_USE_MKLDNN
|
||||
REGISTER_IDEEP_OPERATOR(
|
||||
BatchPermutation,
|
||||
IDEEPFallbackOp<BatchPermutationOp<float, CPUContext>>);
|
||||
|
@ -15,13 +15,13 @@
|
||||
*/
|
||||
|
||||
#include "upsample_nearest_op.h"
|
||||
#ifdef CAFFE2_USE_IDEEP
|
||||
#ifdef CAFFE2_USE_MKLDNN
|
||||
#include "caffe2/ideep/operators/operator_fallback_ideep.h"
|
||||
#include "caffe2/ideep/utils/ideep_operator.h"
|
||||
#endif
|
||||
|
||||
namespace caffe2 {
|
||||
#ifdef CAFFE2_USE_IDEEP
|
||||
#ifdef CAFFE2_USE_MKLDNN
|
||||
REGISTER_IDEEP_OPERATOR(
|
||||
UpsampleNearest,
|
||||
IDEEPFallbackOp<UpsampleNearestOp<float, CPUContext>>);
|
||||
|
13
setup.py
13
setup.py
@ -111,11 +111,6 @@
|
||||
# NCCL_INCLUDE_DIR
|
||||
# specify where nccl is installed
|
||||
#
|
||||
# MKLDNN_LIB_DIR
|
||||
# MKLDNN_LIBRARY
|
||||
# MKLDNN_INCLUDE_DIR
|
||||
# specify where MKLDNN is installed
|
||||
#
|
||||
# NVTOOLSEXT_PATH (Windows only)
|
||||
# specify where nvtoolsext is installed
|
||||
#
|
||||
@ -177,8 +172,6 @@ from tools.setup_helpers.miopen import (USE_MIOPEN, MIOPEN_LIBRARY,
|
||||
MIOPEN_LIB_DIR, MIOPEN_INCLUDE_DIR)
|
||||
from tools.setup_helpers.nccl import USE_NCCL, USE_SYSTEM_NCCL, NCCL_LIB_DIR, \
|
||||
NCCL_INCLUDE_DIR, NCCL_ROOT_DIR, NCCL_SYSTEM_LIB
|
||||
from tools.setup_helpers.mkldnn import (USE_MKLDNN, MKLDNN_LIBRARY,
|
||||
MKLDNN_LIB_DIR, MKLDNN_INCLUDE_DIR)
|
||||
from tools.setup_helpers.nnpack import USE_NNPACK
|
||||
from tools.setup_helpers.qnnpack import USE_QNNPACK
|
||||
from tools.setup_helpers.nvtoolext import NVTOOLEXT_HOME
|
||||
@ -197,6 +190,7 @@ IS_DARWIN = (platform.system() == 'Darwin')
|
||||
IS_LINUX = (platform.system() == 'Linux')
|
||||
|
||||
BUILD_PYTORCH = check_env_flag('BUILD_PYTORCH')
|
||||
USE_MKLDNN = check_env_flag('USE_MKLDNN')
|
||||
USE_CUDA_STATIC_LINK = check_env_flag('USE_CUDA_STATIC_LINK')
|
||||
RERUN_CMAKE = True
|
||||
|
||||
@ -396,9 +390,6 @@ def build_libs(libs):
|
||||
my_env["MIOPEN_LIBRARY"] = MIOPEN_LIBRARY
|
||||
my_env["MIOPEN_INCLUDE_DIR"] = MIOPEN_INCLUDE_DIR
|
||||
if USE_MKLDNN:
|
||||
my_env["MKLDNN_LIB_DIR"] = MKLDNN_LIB_DIR
|
||||
my_env["MKLDNN_LIBRARY"] = MKLDNN_LIBRARY
|
||||
my_env["MKLDNN_INCLUDE_DIR"] = MKLDNN_INCLUDE_DIR
|
||||
build_libs_cmd += ['--use-mkldnn']
|
||||
if USE_QNNPACK:
|
||||
build_libs_cmd += ['--use-qnnpack']
|
||||
@ -610,7 +601,7 @@ class build_ext(build_ext_parent):
|
||||
else:
|
||||
print('-- Not using CUDA')
|
||||
if USE_MKLDNN:
|
||||
print('-- Detected MKLDNN at ' + MKLDNN_LIBRARY + ', ' + MKLDNN_INCLUDE_DIR)
|
||||
print('-- Using MKLDNN')
|
||||
else:
|
||||
print('-- Not using MKLDNN')
|
||||
if USE_NCCL and USE_SYSTEM_NCCL:
|
||||
|
@ -48,10 +48,10 @@ IF "%~1"=="--use-qnnpack" (
|
||||
)
|
||||
|
||||
IF "%~1"=="--use-mkldnn" (
|
||||
set /a NO_MKLDNN=0
|
||||
set /a USE_MKLDNN=1
|
||||
shift
|
||||
) ELSE (
|
||||
set /a NO_MKLDNN=1
|
||||
set /a USE_MKLDNN=0
|
||||
)
|
||||
|
||||
IF "%~1"=="--use-gloo-ibverbs" (
|
||||
@ -205,10 +205,7 @@ goto:eof
|
||||
-DCUDNN_INCLUDE_DIR="%CUDNN_INCLUDE_DIR%" ^
|
||||
-DCUDNN_LIB_DIR="%CUDNN_LIB_DIR%" ^
|
||||
-DCUDNN_LIBRARY="%CUDNN_LIBRARY%" ^
|
||||
-DNO_MKLDNN=%NO_MKLDNN% ^
|
||||
-DMKLDNN_INCLUDE_DIR="%MKLDNN_INCLUDE_DIR%" ^
|
||||
-DMKLDNN_LIB_DIR="%MKLDNN_LIB_DIR%" ^
|
||||
-DMKLDNN_LIBRARY="%MKLDNN_LIBRARY%" ^
|
||||
-DUSE_MKLDNN=%USE_MKLDNN% ^
|
||||
-DATEN_NO_CONTRIB=1 ^
|
||||
-DCMAKE_INSTALL_PREFIX="%INSTALL_DIR%" ^
|
||||
-DCMAKE_C_FLAGS="%USER_CFLAGS%" ^
|
||||
|
@ -328,9 +328,6 @@ function build_caffe2() {
|
||||
-DCUDNN_LIB_DIR=$CUDNN_LIB_DIR \
|
||||
-DCUDNN_LIBRARY=$CUDNN_LIBRARY \
|
||||
-DUSE_MKLDNN=$USE_MKLDNN \
|
||||
-DMKLDNN_INCLUDE_DIR=$MKLDNN_INCLUDE_DIR \
|
||||
-DMKLDNN_LIB_DIR=$MKLDNN_LIB_DIR \
|
||||
-DMKLDNN_LIBRARY=$MKLDNN_LIBRARY \
|
||||
-DCMAKE_INSTALL_PREFIX="$INSTALL_DIR" \
|
||||
-DCMAKE_C_FLAGS="$USER_CFLAGS" \
|
||||
-DCMAKE_CXX_FLAGS="$USER_CFLAGS" \
|
||||
|
@ -1,87 +0,0 @@
|
||||
import platform
|
||||
import glob
|
||||
import os
|
||||
import sys
|
||||
|
||||
from itertools import chain
|
||||
from .env import check_env_flag, IS_LINUX, IS_WINDOWS, IS_CONDA, CONDA_DIR
|
||||
|
||||
|
||||
def gather_paths(env_vars):
|
||||
return list(chain(*(os.getenv(v, '').split(os.pathsep) for v in env_vars)))
|
||||
|
||||
MKLDNN_HOME = os.getenv('MKLDNN_HOME', '/usr/local/mkl-dnn')
|
||||
|
||||
USE_MKLDNN = False
|
||||
MKLDNN_LIB_DIR = None
|
||||
MKLDNN_INCLUDE_DIR = None
|
||||
MKLDNN_LIBRARY = None
|
||||
if not check_env_flag('NO_MKLDNN'):
|
||||
lib_paths = list(filter(bool, [
|
||||
os.getenv('MKLDNN_LIB_DIR'),
|
||||
os.path.join(MKLDNN_HOME, 'lib'),
|
||||
os.path.join(MKLDNN_HOME, 'lib64'),
|
||||
os.path.join(MKLDNN_HOME, 'lib/x64'),
|
||||
'/usr/lib/',
|
||||
'/usr/lib64/',
|
||||
] + gather_paths([
|
||||
'LIBRARY_PATH',
|
||||
]) + gather_paths([
|
||||
'LD_LIBRARY_PATH',
|
||||
]) + gather_paths([
|
||||
'LIB'
|
||||
])))
|
||||
include_paths = list(filter(bool, [
|
||||
os.getenv('MKLDNN_INCLUDE_DIR'),
|
||||
os.path.join(MKLDNN_HOME, 'include'),
|
||||
'/usr/include/',
|
||||
] + gather_paths([
|
||||
'CPATH',
|
||||
'C_INCLUDE_PATH',
|
||||
'CPLUS_INCLUDE_PATH',
|
||||
'INCLUDE',
|
||||
])))
|
||||
if IS_WINDOWS:
|
||||
mkldnn_regex = 'mkldnn*.lib'
|
||||
mklml_regex = 'mklml*.lib'
|
||||
else:
|
||||
mkldnn_regex = 'libmkldnn*'
|
||||
mklml_regex = 'libmklml_intel*'
|
||||
if IS_CONDA:
|
||||
lib_paths.append(os.path.join(CONDA_DIR, 'lib'))
|
||||
include_paths.append(os.path.join(CONDA_DIR, 'include'))
|
||||
for path in lib_paths:
|
||||
if path is None or not os.path.exists(path):
|
||||
continue
|
||||
else:
|
||||
libraries = sorted(glob.glob(os.path.join(path, mkldnn_regex)))
|
||||
if libraries:
|
||||
if not glob.glob(os.path.join(path, mklml_regex)):
|
||||
print("WARNING: MKL-DNN is not compiled with Intel MKL small library")
|
||||
print("Convolution performance might be suboptimal")
|
||||
print("Refer https://github.com/01org/mkl-dnn for detail info")
|
||||
MKLDNN_LIBRARY = libraries[0]
|
||||
MKLDNN_LIB_DIR = path
|
||||
break
|
||||
for path in include_paths:
|
||||
if path is None or not os.path.exists(path):
|
||||
continue
|
||||
else:
|
||||
if os.path.exists(os.path.join(path, 'mkldnn.hpp')):
|
||||
MKLDNN_INCLUDE_DIR = path
|
||||
break
|
||||
|
||||
# Specifying the library directly will overwrite the lib directory
|
||||
library = os.getenv('MKLDNN_LIBRARY')
|
||||
if library is not None and os.path.exists(library):
|
||||
MKLDNN_LIBRARY = library
|
||||
MKLDNN_LIB_DIR = os.path.dirname(MKLDNN_LIBRARY)
|
||||
|
||||
if not all([MKLDNN_LIBRARY, MKLDNN_LIB_DIR, MKLDNN_INCLUDE_DIR]):
|
||||
MKLDNN_LIBRARY = MKLDNN_LIB_DIR = MKLDNN_INCLUDE_DIR = None
|
||||
else:
|
||||
real_mkldnn_library = os.path.realpath(MKLDNN_LIBRARY)
|
||||
real_mkldnn_lib_dir = os.path.realpath(MKLDNN_LIB_DIR)
|
||||
assert os.path.dirname(real_mkldnn_library) == real_mkldnn_lib_dir, (
|
||||
'mkldnn library and lib_dir must agree')
|
||||
USE_MKLDNN = True
|
Reference in New Issue
Block a user