seperate mkl, mklml, and mkldnn (#12170)

Summary:
1. Remove avx2 support in mkldnn
2. Seperate mkl, mklml, and mkldnn
3. Fix convfusion test case
Pull Request resolved: https://github.com/pytorch/pytorch/pull/12170

Reviewed By: yinghai

Differential Revision: D10207126

Pulled By: orionr

fbshipit-source-id: 1e62eb47943f426a89d57e2d2606439f2b04fd51
This commit is contained in:
Gu, Jinghui
2018-10-29 10:47:25 -07:00
committed by Facebook Github Bot
parent bb96b6635c
commit dbab9b73b6
40 changed files with 277 additions and 364 deletions

View File

@ -109,6 +109,7 @@ test_aten() {
${SUDO} ln -s "$TORCH_LIB_PATH"/libc10* build/bin
${SUDO} ln -s "$TORCH_LIB_PATH"/libcaffe2* build/bin
${SUDO} ln -s "$TORCH_LIB_PATH"/libmkldnn* build/bin
${SUDO} ln -s "$TORCH_LIB_PATH"/libnccl* build/bin
ls build/bin

View File

@ -117,8 +117,6 @@ option(USE_TENSORRT "Using Nvidia TensorRT library" OFF)
option(USE_ZMQ "Use ZMQ" OFF)
option(USE_ZSTD "Use ZSTD" OFF)
option(USE_MKLDNN "Use MKLDNN" OFF)
option(USE_IDEEP "Use IDEEP interface in MKL BLAS" ON)
option(USE_MKLML "Use MKLML interface in MKL BLAS" ON)
option(USE_DISTRIBUTED "Use distributed" ON)
cmake_dependent_option(
USE_MPI "Use MPI for Caffe2. Only available if USE_DISTRIBUTED is on." ON
@ -150,8 +148,7 @@ if (BUILD_ATEN_ONLY)
set(USE_NNPACK OFF)
set(USE_NUMPY OFF)
set(USE_OPENCV OFF)
set(USE_IDEEP OFF)
set(USE_MKLML OFF)
set(USE_MKLDNN OFF)
set(USE_DISTRIBUTED OFF)
set(USE_LMDB OFF)
endif()

View File

@ -1,20 +1,20 @@
if(USE_MKL AND USE_IDEEP AND CAFFE2_COMPILER_SUPPORTS_AVX2_EXTENSIONS)
if(CAFFE2_USE_MKLDNN)
message(STATUS "Including IDEEP operators")
# ---[ CPU files.
file(GLOB_RECURSE avx2_srcs *.cc)
file(GLOB_RECURSE tmp *.cc)
set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} ${tmp})
# exclude test files and gpu files
file(GLOB_RECURSE tmp *_test.cc)
exclude(avx2_srcs "${avx2_srcs}" ${tmp})
exclude(Caffe2_CPU_SRCS "${Caffe2_CPU_SRCS}" ${tmp})
add_library(Caffe2_ideep_operators OBJECT ${avx2_srcs})
add_dependencies(Caffe2_ideep_operators Caffe2_PROTO)
set_target_properties(Caffe2_ideep_operators PROPERTIES COMPILE_FLAGS "-mavx2")
# ---[ CPU test files - currently none but just to be safe
file(GLOB_RECURSE tmp *_test.cc)
set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} ${tmp})
# ---[ Send the lists to the parent scope.
set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS}
$<TARGET_OBJECTS:Caffe2_ideep_operators>)
set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} PARENT_SCOPE)
set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} PARENT_SCOPE)
else()
message(STATUS "Excluding ideep operators as we are not using ideep")
endif()

View File

@ -2,7 +2,7 @@
#include "caffe2/core/context_gpu.h"
#include "caffe2/core/operator.h"
#ifdef CAFFE2_USE_IDEEP
#ifdef CAFFE2_USE_MKLDNN
#include <caffe2/ideep/operators/operator_fallback_ideep.h>
#include <caffe2/ideep/utils/ideep_operator.h>
#endif
@ -25,7 +25,7 @@ REGISTER_CPU_OPERATOR(CTC, CTCOp<float, CPUContext>);
OPERATOR_SCHEMA(CTC).NumInputs(3, 4).NumOutputs(2, 3);
// .EnforceInputOutputGradient({{0, 0}});
#ifdef CAFFE2_USE_IDEEP
#ifdef CAFFE2_USE_MKLDNN
REGISTER_IDEEP_OPERATOR(CTC, IDEEPFallbackOp<CTCOp<float, CPUContext>>);
#endif

View File

@ -40,7 +40,7 @@ static_assert(
#cmakedefine CAFFE2_USE_GOOGLE_GLOG
#cmakedefine CAFFE2_USE_LITE_PROTO
#cmakedefine CAFFE2_USE_MKL
#cmakedefine CAFFE2_USE_IDEEP
#cmakedefine CAFFE2_USE_MKLDNN
#cmakedefine CAFFE2_USE_NVTX
#cmakedefine CAFFE2_USE_TRT
#cmakedefine CAFFE2_DISABLE_NUMA
@ -76,6 +76,7 @@ static_assert(
{"USE_EIGEN_FOR_BLAS", "${CAFFE2_USE_EIGEN_FOR_BLAS}"}, \
{"USE_LITE_PROTO", "${CAFFE2_USE_LITE_PROTO}"}, \
{"USE_MKL", "${CAFFE2_USE_MKL}"}, \
{"USE_MKLDNN", "${CAFFE2_USE_MKLDNN}"}, \
{"USE_NVTX", "${CAFFE2_USE_NVTX}"}, \
{"USE_TRT", "${CAFFE2_USE_TRT}"}, \
{"DISABLE_NUMA", "${CAFFE2_DISABLE_NUMA}"}, \

View File

@ -1,20 +1,20 @@
if(USE_MKL AND USE_IDEEP AND CAFFE2_COMPILER_SUPPORTS_AVX2_EXTENSIONS)
if(CAFFE2_USE_MKLDNN)
message(STATUS "Including IDEEP operators")
# ---[ CPU files.
file(GLOB_RECURSE avx2_srcs *.cc)
file(GLOB_RECURSE tmp *.cc)
set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} ${tmp})
# exclude test files and gpu files
file(GLOB_RECURSE tmp *_test.cc)
exclude(avx2_srcs "${avx2_srcs}" ${tmp})
exclude(Caffe2_CPU_SRCS "${Caffe2_CPU_SRCS}" ${tmp})
add_library(Caffe2_ideep_operators OBJECT ${avx2_srcs})
add_dependencies(Caffe2_ideep_operators Caffe2_PROTO)
set_target_properties(Caffe2_ideep_operators PROPERTIES COMPILE_FLAGS "-mavx2")
# ---[ CPU test files - currently none but just to be safe
file(GLOB_RECURSE tmp *_test.cc)
set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} ${tmp})
# ---[ Send the lists to the parent scope.
set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS}
$<TARGET_OBJECTS:Caffe2_ideep_operators>)
set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} PARENT_SCOPE)
set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} PARENT_SCOPE)
else()
message(STATUS "Excluding ideep operators as we are not using ideep")
endif()

View File

@ -2,7 +2,7 @@
#include "caffe2/core/context.h"
#include "caffe2/core/operator.h"
#ifdef CAFFE2_USE_IDEEP
#ifdef CAFFE2_USE_MKLDNN
#include <caffe2/ideep/operators/operator_fallback_ideep.h>
#include <caffe2/ideep/utils/ideep_operator.h>
#endif
@ -90,7 +90,7 @@ class CheckAtomicBoolOp final : public Operator<CPUContext> {
REGISTER_CPU_OPERATOR(CreateMutex, CreateMutexOp);
REGISTER_CPU_OPERATOR(AtomicFetchAdd, AtomicFetchAddOp);
#ifdef CAFFE2_USE_IDEEP
#ifdef CAFFE2_USE_MKLDNN
REGISTER_IDEEP_OPERATOR(CreateMutex, IDEEPFallbackOp<CreateMutexOp, SkipIndices<0>>);
#endif

View File

@ -1,6 +1,6 @@
#include "caffe2/operators/distance_op.h"
#include "caffe2/utils/eigen_utils.h"
#ifdef CAFFE2_USE_IDEEP
#ifdef CAFFE2_USE_MKLDNN
#include <caffe2/ideep/operators/operator_fallback_ideep.h>
#include <caffe2/ideep/utils/ideep_operator.h>
#endif
@ -400,7 +400,7 @@ REGISTER_CPU_OPERATOR(L1Distance, L1DistanceOp<float, CPUContext>);
REGISTER_CPU_OPERATOR(
L1DistanceGradient,
L1DistanceGradientOp<float, CPUContext>);
#ifdef CAFFE2_USE_IDEEP
#ifdef CAFFE2_USE_MKLDNN
REGISTER_IDEEP_OPERATOR(
L1DistanceGradient,
IDEEPFallbackOp<L1DistanceGradientOp<float, CPUContext>>);

View File

@ -2,7 +2,7 @@
#include "caffe2/utils/cpu_neon.h"
#include "caffe2/utils/math.h"
#ifdef CAFFE2_USE_IDEEP
#ifdef CAFFE2_USE_MKLDNN
#include <caffe2/ideep/operators/operator_fallback_ideep.h>
#include <caffe2/ideep/utils/ideep_operator.h>
#endif
@ -586,7 +586,7 @@ OPERATOR_SCHEMA(BRGNCHWCToPackedInt8BGRAStylizerDeprocess)
.NumInputs(2)
.NumOutputs(1);
#ifdef CAFFE2_USE_IDEEP
#ifdef CAFFE2_USE_MKLDNN
REGISTER_IDEEP_OPERATOR(
BRGNCHWCToPackedInt8BGRAStylizerDeprocess,
IDEEPFallbackOp<BRGNCHWCToPackedInt8BGRAStylizerDeprocessOp, SkipIndices<0>>);

View File

@ -2,7 +2,7 @@
#include "caffe2/opt/converter.h"
#include "caffe2/opt/fusion.h"
#ifdef CAFFE2_USE_IDEEP
#ifdef CAFFE2_USE_MKLDNN
#include "caffe2/ideep/ideep_utils.h"
#endif
@ -11,7 +11,7 @@ namespace opt {
using namespace nom;
#ifndef CAFFE2_USE_IDEEP
#ifndef CAFFE2_USE_MKLDNN
void OptimizeForIdeep(
repr::NNModule* nn,
caffe2::Workspace* ws,
@ -440,7 +440,7 @@ void OptimizeForIdeep(
setPoolingInferenceMode(nn);
}
#endif // CAFFE2_USE_IDEEP
#endif // CAFFE2_USE_MKLDNN
} // namespace opt
} // namespace caffe2

View File

@ -6,6 +6,13 @@ set(Caffe2_CPU_PYTHON_SRCS
"/pybind_state_registry.cc"
)
if(CAFFE2_USE_MKLDNN)
set(Caffe2_CPU_PYTHON_SRCS
${Caffe2_CPU_PYTHON_SRCS}
"/pybind_state_ideep.cc"
)
endif()
# ---[ GPU files
set(Caffe2_GPU_PYTHON_SRCS
${Caffe2_CPU_PYTHON_SRCS}
@ -22,19 +29,6 @@ prepend(Caffe2_CPU_PYTHON_SRCS ${CMAKE_CURRENT_SOURCE_DIR} ${Caffe2_CPU_PYTHON_S
prepend(Caffe2_GPU_PYTHON_SRCS ${CMAKE_CURRENT_SOURCE_DIR} ${Caffe2_GPU_PYTHON_SRCS})
prepend(Caffe2_HIP_PYTHON_SRCS ${CMAKE_CURRENT_SOURCE_DIR} ${Caffe2_HIP_PYTHON_SRCS})
# --[ Some special handling for ideep binding as we need to build with "-mavx2"
if(USE_MKL AND USE_IDEEP AND CAFFE2_COMPILER_SUPPORTS_AVX2_EXTENSIONS)
file(GLOB_RECURSE ideep_srcs *_ideep.cc)
add_library(Caffe2_ideep_pybind OBJECT ${ideep_srcs})
add_dependencies(Caffe2_ideep_pybind Caffe2_PROTO)
set_target_properties(Caffe2_ideep_pybind PROPERTIES COMPILE_FLAGS "-mavx2")
set(Caffe2_CPU_PYTHON_SRCS
${Caffe2_CPU_PYTHON_SRCS}
$<TARGET_OBJECTS:Caffe2_ideep_pybind>
)
endif()
set(Caffe2_CPU_PYTHON_SRCS ${Caffe2_CPU_PYTHON_SRCS} PARENT_SCOPE)
set(Caffe2_GPU_PYTHON_SRCS ${Caffe2_GPU_PYTHON_SRCS} PARENT_SCOPE)
set(Caffe2_HIP_PYTHON_SRCS ${Caffe2_HIP_PYTHON_SRCS} PARENT_SCOPE)

View File

@ -12,7 +12,7 @@ import caffe2.python.hypothesis_test_util as hu
import caffe2.python.ideep_test_util as mu
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
class LRNTest(hu.HypothesisTestCase):
@given(input_channels=st.integers(1, 3),
batch_size=st.integers(1, 3),

View File

@ -45,7 +45,7 @@ def _tensor_splits(draw, add_axis=False):
)
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
class TestConcatSplitOps(hu.HypothesisTestCase):
@given(tensor_splits=_tensor_splits(),
**mu.gcs)

View File

@ -14,7 +14,7 @@ import caffe2.python.hypothesis_test_util as hu
import caffe2.python.ideep_test_util as mu
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
class ConvTest(hu.HypothesisTestCase):
@given(stride=st.integers(1, 3),
pad=st.integers(0, 3),

View File

@ -15,7 +15,7 @@ import caffe2.python.hypothesis_test_util as hu
import caffe2.python.ideep_test_util as mu
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
class ConvFusionTest(hu.HypothesisTestCase):
@given(stride=st.integers(1, 3),
pad=st.integers(0, 3),
@ -129,10 +129,14 @@ class ConvFusionTest(hu.HypothesisTestCase):
def test_convolution_sum_fusion(self, stride, pad, kernel, size,
input_channels, output_channels,
batch_size, use_bias, group, gc, dc):
relu_S0 = core.CreateOperator(
"Relu",
["S0"],
conv_S0 = core.CreateOperator(
"Conv",
["SX0", "Sw0", "Sb0"] if use_bias else ["SX0", "Sw0"],
["S0"],
stride=stride,
pad=pad,
kernel=kernel,
group=group,
device_option=dc[0]
)
conv = core.CreateOperator(
@ -153,10 +157,14 @@ class ConvFusionTest(hu.HypothesisTestCase):
)
# Manual fusion for Conv + Sum
relu_S1 = core.CreateOperator(
"Relu",
["S1"],
conv_S1 = core.CreateOperator(
"Conv",
["SX1", "Sw1", "Sb1"] if use_bias else ["SX1", "Sw1"],
["S1"],
stride=stride,
pad=pad,
kernel=kernel,
group=group,
device_option=dc[1]
)
conv_fusion = core.CreateOperator(
@ -170,6 +178,12 @@ class ConvFusionTest(hu.HypothesisTestCase):
fusion_type = 2,
device_option=dc[1]
)
SX = np.random.rand(
batch_size, input_channels * group, size, size).astype(np.float32) - 0.5
Sw = np.random.rand(
output_channels * group, input_channels, kernel, kernel) \
.astype(np.float32) - 0.5
Sb = np.random.rand(output_channels * group).astype(np.float32) - 0.5
X = np.random.rand(
batch_size, input_channels * group, size, size).astype(np.float32) - 0.5
w = np.random.rand(
@ -179,23 +193,25 @@ class ConvFusionTest(hu.HypothesisTestCase):
old_ws_name = workspace.CurrentWorkspace()
workspace.SwitchWorkspace("_device_check_", True)
workspace.FeedBlob('SX0', SX, dc[0])
workspace.FeedBlob('Sw0', Sw, dc[0])
workspace.FeedBlob('Sb0', Sb, dc[0])
workspace.FeedBlob('X0', X, dc[0])
workspace.FeedBlob('w0', w, dc[0])
workspace.FeedBlob('b0', b, dc[0])
workspace.RunOperatorOnce(conv_S0)
workspace.RunOperatorOnce(conv)
Y0 = workspace.FetchBlob('Y0')
S = np.random.rand(*Y0.shape).astype(np.float32) - 0.5
workspace.FeedBlob('S0', S, dc[0])
workspace.RunOperatorOnce(relu_S0)
workspace.RunOperatorOnce(sum)
S0 = workspace.FetchBlob('S0')
workspace.ResetWorkspace()
workspace.FeedBlob('SX1', SX, dc[1])
workspace.FeedBlob('Sw1', Sw, dc[1])
workspace.FeedBlob('Sb1', Sb, dc[1])
workspace.FeedBlob('X1', X, dc[1])
workspace.FeedBlob('w1', w, dc[1])
workspace.FeedBlob('b1', b, dc[1])
workspace.FeedBlob('S1', S, dc[1])
workspace.RunOperatorOnce(relu_S1)
workspace.RunOperatorOnce(conv_S1)
workspace.RunOperatorOnce(conv_fusion)
S1 = workspace.FetchBlob('S1')
@ -208,20 +224,22 @@ class ConvFusionTest(hu.HypothesisTestCase):
# Auto fusion for Conv + Sum
workspace.ResetWorkspace()
old_net = caffe2_pb2.NetDef()
relu_S0_old = caffe2_pb2.OperatorDef()
relu_S0_old.CopyFrom(relu_S0)
relu_S0_old.device_option.CopyFrom(dc[1])
conv_S0_old = caffe2_pb2.OperatorDef()
conv_S0_old.CopyFrom(conv_S0)
conv_S0_old.device_option.CopyFrom(dc[1])
conv_old = caffe2_pb2.OperatorDef()
conv_old.CopyFrom(conv)
conv_old.device_option.CopyFrom(dc[1])
sum_old = caffe2_pb2.OperatorDef()
sum_old.CopyFrom(sum)
sum_old.device_option.CopyFrom(dc[1])
old_net.op.extend([relu_S0_old, conv_old, sum_old])
old_net.op.extend([conv_S0_old, conv_old, sum_old])
workspace.FeedBlob('SX0', SX, dc[1])
workspace.FeedBlob('Sw0', Sw, dc[1])
workspace.FeedBlob('Sb0', Sb, dc[1])
workspace.FeedBlob('X0', X, dc[1])
workspace.FeedBlob('w0', w, dc[1])
workspace.FeedBlob('b0', b, dc[1])
workspace.FeedBlob('S0', S, dc[1])
net = core.Net("net")
net.Proto().CopyFrom(old_net)
optimizeForIDEEP(net)
@ -250,10 +268,14 @@ class ConvFusionTest(hu.HypothesisTestCase):
def test_convolution_sum_relu_fusion(self, stride, pad, kernel, size,
input_channels, output_channels,
batch_size, use_bias, group, gc, dc):
relu_S0 = core.CreateOperator(
"Relu",
["S0"],
conv_S0 = core.CreateOperator(
"Conv",
["SX0", "Sw0", "Sb0"] if use_bias else ["SX0", "Sw0"],
["S0"],
stride=stride,
pad=pad,
kernel=kernel,
group=group,
device_option=dc[0]
)
conv = core.CreateOperator(
@ -280,10 +302,14 @@ class ConvFusionTest(hu.HypothesisTestCase):
)
# Manual fusion for Conv + Sum + ReLU
relu_S1 = core.CreateOperator(
"Relu",
["S1"],
conv_S1 = core.CreateOperator(
"Conv",
["SX1", "Sw1", "Sb1"] if use_bias else ["SX1", "Sw1"],
["S1"],
stride=stride,
pad=pad,
kernel=kernel,
group=group,
device_option=dc[1]
)
conv_fusion = core.CreateOperator(
@ -297,6 +323,12 @@ class ConvFusionTest(hu.HypothesisTestCase):
fusion_type = 3,
device_option=dc[1]
)
SX = np.random.rand(
batch_size, input_channels * group, size, size).astype(np.float32) - 0.5
Sw = np.random.rand(
output_channels * group, input_channels, kernel, kernel) \
.astype(np.float32) - 0.5
Sb = np.random.rand(output_channels * group).astype(np.float32) - 0.5
X = np.random.rand(
batch_size, input_channels * group, size, size).astype(np.float32) - 0.5
w = np.random.rand(
@ -306,24 +338,26 @@ class ConvFusionTest(hu.HypothesisTestCase):
old_ws_name = workspace.CurrentWorkspace()
workspace.SwitchWorkspace("_device_check_", True)
workspace.FeedBlob('SX0', SX, dc[0])
workspace.FeedBlob('Sw0', Sw, dc[0])
workspace.FeedBlob('Sb0', Sb, dc[0])
workspace.FeedBlob('X0', X, dc[0])
workspace.FeedBlob('w0', w, dc[0])
workspace.FeedBlob('b0', b, dc[0])
workspace.RunOperatorOnce(conv_S0)
workspace.RunOperatorOnce(conv)
Y0 = workspace.FetchBlob('Y0')
S = np.random.rand(*Y0.shape).astype(np.float32) - 0.5
workspace.FeedBlob('S0', S, dc[0])
workspace.RunOperatorOnce(relu_S0)
workspace.RunOperatorOnce(sum)
workspace.RunOperatorOnce(relu)
S0 = workspace.FetchBlob('S0')
workspace.ResetWorkspace()
workspace.FeedBlob('SX1', SX, dc[1])
workspace.FeedBlob('Sw1', Sw, dc[1])
workspace.FeedBlob('Sb1', Sb, dc[1])
workspace.FeedBlob('X1', X, dc[1])
workspace.FeedBlob('w1', w, dc[1])
workspace.FeedBlob('b1', b, dc[1])
workspace.FeedBlob('S1', S, dc[1])
workspace.RunOperatorOnce(relu_S1)
workspace.RunOperatorOnce(conv_S1)
workspace.RunOperatorOnce(conv_fusion)
S1 = workspace.FetchBlob('S1')
@ -336,9 +370,9 @@ class ConvFusionTest(hu.HypothesisTestCase):
# Auto fusion for Conv + Sum + ReLU
workspace.ResetWorkspace()
old_net = caffe2_pb2.NetDef()
relu_S0_old = caffe2_pb2.OperatorDef()
relu_S0_old.CopyFrom(relu_S0)
relu_S0_old.device_option.CopyFrom(dc[1])
conv_S0_old = caffe2_pb2.OperatorDef()
conv_S0_old.CopyFrom(conv_S0)
conv_S0_old.device_option.CopyFrom(dc[1])
conv_old = caffe2_pb2.OperatorDef()
conv_old.CopyFrom(conv)
conv_old.device_option.CopyFrom(dc[1])
@ -348,11 +382,13 @@ class ConvFusionTest(hu.HypothesisTestCase):
relu_old = caffe2_pb2.OperatorDef()
relu_old.CopyFrom(relu)
relu_old.device_option.CopyFrom(dc[1])
old_net.op.extend([relu_S0_old, conv_old, sum_old, relu_old])
old_net.op.extend([conv_S0_old, conv_old, sum_old, relu_old])
workspace.FeedBlob('SX0', SX, dc[1])
workspace.FeedBlob('Sw0', Sw, dc[1])
workspace.FeedBlob('Sb0', Sb, dc[1])
workspace.FeedBlob('X0', X, dc[1])
workspace.FeedBlob('w0', w, dc[1])
workspace.FeedBlob('b0', b, dc[1])
workspace.FeedBlob('S0', S, dc[1])
net = core.Net("net")
net.Proto().CopyFrom(old_net)
optimizeForIDEEP(net)

View File

@ -9,7 +9,7 @@ from random import randint
from caffe2.proto import caffe2_pb2
from caffe2.python import core, workspace
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
class CopyTest(unittest.TestCase):
def _get_deep_device(self):
return caffe2_pb2.DeviceOption(device_type=caffe2_pb2.IDEEP)

View File

@ -14,7 +14,7 @@ import caffe2.python.hypothesis_test_util as hu
import caffe2.python.ideep_test_util as mu
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
class DropoutTest(hu.HypothesisTestCase):
@given(X=hu.tensor(),

View File

@ -12,7 +12,7 @@ import caffe2.python.hypothesis_test_util as hu
import caffe2.python.ideep_test_util as mu
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
class ElementwiseSumTest(hu.HypothesisTestCase):
@given(size=st.integers(7, 9),
input_channels=st.integers(1, 3),

View File

@ -11,7 +11,7 @@ from caffe2.python import core, workspace
import caffe2.python.hypothesis_test_util as hu
import caffe2.python.ideep_test_util as mu
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
class FcTest(hu.HypothesisTestCase):
@given(n=st.integers(1, 5), m=st.integers(1, 5),
k=st.integers(1, 5), **mu.gcs)

View File

@ -12,7 +12,7 @@ from hypothesis import given
import caffe2.python.ideep_test_util as mu
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
class TestMomentumSGDUpdateOps(hu.HypothesisTestCase):
@given(n=st.integers(4, 8), nesterov=st.booleans(),
**mu.gcs)

View File

@ -13,7 +13,7 @@ import caffe2.python.hypothesis_test_util as hu
import caffe2.python.ideep_test_util as mu
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
class TestFallbackOps(hu.HypothesisTestCase):
@given(stride=st.integers(1, 3),
pad=st.integers(0, 3),

View File

@ -11,7 +11,7 @@ from caffe2.python import core, workspace
import caffe2.python.hypothesis_test_util as hu
import caffe2.python.ideep_test_util as mu
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
class PoolTest(hu.HypothesisTestCase):
@given(stride=st.integers(1, 3),
pad=st.integers(0, 3),

View File

@ -11,7 +11,7 @@ from caffe2.python import core, workspace
import caffe2.python.hypothesis_test_util as hu
import caffe2.python.ideep_test_util as mu
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
class ReluTest(hu.HypothesisTestCase):
@given(X=hu.tensor(),
inplace=st.booleans(),

View File

@ -11,7 +11,7 @@ from caffe2.python import core, workspace
import caffe2.python.hypothesis_test_util as hu
import caffe2.python.ideep_test_util as mu
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
class SoftmaxTest(hu.HypothesisTestCase):
@given(size=st.integers(8, 20),
input_channels=st.integers(1, 3),

View File

@ -13,7 +13,7 @@ from caffe2.python.model_helper import ModelHelper
import caffe2.python.ideep_test_util as mu
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
class TestSpatialBN(hu.HypothesisTestCase):
@given(size=st.integers(7, 10),
input_channels=st.integers(7, 10),

View File

@ -12,7 +12,7 @@ import caffe2.python.hypothesis_test_util as hu
import caffe2.python.ideep_test_util as mu
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
class SqueezeTest(hu.HypothesisTestCase):
@given(
squeeze_dims=st.lists(st.integers(0, 3), min_size=1, max_size=3),

View File

@ -180,8 +180,7 @@ def complex_resnet():
return model, [(1, 1, 224, 224)]
@unittest.skipIf(not workspace.C.use_ideep,
"Skipping as we do not have IDEEP.")
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
class MKLRewriteTest(hu.HypothesisTestCase):
@given(gen=st.sampled_from([simple_relu, simple_fc,
simple_mlp, simple_cnn]))

View File

@ -977,12 +977,12 @@ void addGlobalMethods(py::module& m) {
// keep this Python attribute for BC
m.attr("has_mkldnn") = py::bool_(false);
m.attr("use_ideep") = py::bool_(
#ifdef CAFFE2_USE_IDEEP
m.attr("use_mkldnn") = py::bool_(
#ifdef CAFFE2_USE_MKLDNN
true
#else // CAFFE2_USE_IDEEP
#else // CAFFE2_USE_MKLDNN
false
#endif // CAFFE2_USE_IDEEP
#endif // CAFFE2_USE_MKLDNN
);
m.attr("use_trt") = py::bool_(

View File

@ -347,17 +347,7 @@ class TestWorkspaceGPU(test_util.TestCase):
self.assertEqual(pattern.shape[0], workspace.NumCudaDevices())
@unittest.skipIf(not workspace.C.has_mkldnn, "No MKLDNN support.")
class TestWorkspaceMKLDNN(test_util.TestCase):
def testFeedFetchBlobMKLDNN(self):
arr = np.random.randn(2, 3).astype(np.float32)
workspace.FeedBlob(
"testblob_mkldnn", arr, core.DeviceOption(caffe2_pb2.MKLDNN))
fetched = workspace.FetchBlob("testblob_mkldnn")
np.testing.assert_array_equal(arr, fetched)
@unittest.skipIf(not workspace.C.use_ideep, "No IDEEP support.")
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")
class TestWorkspaceIDEEP(test_util.TestCase):
def testFeedFetchBlobIDEEP(self):

View File

@ -1,6 +1,6 @@
#include "caffe2/sgd/iter_op.h"
#ifdef CAFFE2_USE_IDEEP
#ifdef CAFFE2_USE_MKLDNN
#include <caffe2/ideep/operators/operator_fallback_ideep.h>
#include <caffe2/ideep/utils/ideep_operator.h>
#endif
@ -28,7 +28,7 @@ void MutexDeserializer::Deserialize(const BlobProto& /* unused */, Blob* blob) {
REGISTER_CPU_OPERATOR(Iter, IterOp<CPUContext>);
REGISTER_CPU_OPERATOR(AtomicIter, AtomicIterOp<CPUContext>);
#ifdef CAFFE2_USE_IDEEP
#ifdef CAFFE2_USE_MKLDNN
REGISTER_IDEEP_OPERATOR(AtomicIter, IDEEPFallbackOp<AtomicIterOp<CPUContext>>);
#endif

View File

@ -72,10 +72,12 @@ elseif(BLAS STREQUAL "MKL")
if(MKL_FOUND)
include_directories(SYSTEM ${MKL_INCLUDE_DIR})
list(APPEND Caffe2_PUBLIC_DEPENDENCY_LIBS caffe2::mkl)
set(CAFFE2_USE_MKL ON)
else()
message(WARNING "MKL could not be found. Defaulting to Eigen")
set(BLAS "Eigen" CACHE STRING "Selected BLAS library")
set(CAFFE2_USE_EIGEN_FOR_BLAS ON)
set(CAFFE2_USE_MKL OFF)
endif()
elseif(BLAS STREQUAL "vecLib")
find_package(vecLib REQUIRED)
@ -1247,19 +1249,21 @@ if (NOT BUILD_ATEN_MOBILE)
set(AT_ROCM_ENABLED 1)
ENDIF()
if (NO_MKLDNN)
message("disabling MKLDNN because NO_MKLDNN is set")
set(AT_MKLDNN_ENABLED 0)
else()
find_package(MKLDNN)
if(NOT MKLDNN_FOUND)
message(STATUS "MKLDNN not found. Compiling without MKLDNN support")
set(AT_MKLDNN_ENABLED 0)
else()
include_directories(SYSTEM ${MKLDNN_INCLUDE_DIRS})
set(AT_MKLDNN_ENABLED 1)
endif()
endif()
SET(AT_MKLDNN_ENABLED 0)
SET(CAFFE2_USE_MKLDNN OFF)
IF (USE_MKLDNN)
FIND_PACKAGE(MKLDNN)
IF(MKLDNN_FOUND)
SET(AT_MKLDNN_ENABLED 1)
SET(CAFFE2_USE_MKLDNN ON)
INCLUDE_DIRECTORIES(SYSTEM ${MKLDNN_INCLUDE_DIR})
LIST(APPEND Caffe2_PUBLIC_DEPENDENCY_LIBS ${MKLDNN_LIBRARIES})
ELSE()
MESSAGE(WARNING "MKLDNN could not be found.")
ENDIF()
ELSE()
MESSAGE("disabling MKLDNN because USE_MKLDNN is not set")
ENDIF()
IF(UNIX AND NOT APPLE)
INCLUDE(CheckLibraryExists)

View File

@ -1,13 +1,5 @@
# - Find INTEL MKL library
#
# This module finds the Intel Mkl libraries.
#
# USE_IDEEP : use IDEEP interface
# USE_MKLML : use MKLML interface
# MKLML_USE_SINGLE_DYNAMIC_LIBRARY : use single dynamic library interface
# MKLML_USE_STATIC_LIBS : use static libraries
# MKLML_MULTI_THREADED : use multi-threading
#
# This module sets the following variables:
# MKL_FOUND - set to true if a library implementing the CBLAS interface is found
# MKL_VERSION - best guess
@ -69,7 +61,6 @@ ENDIF (CMAKE_COMPILER_IS_GNUCC)
SET(mklkerlibs "mc" "mc3" "nc" "p4n" "p4m" "p4m3" "p4p" "def")
SET(mklseq)
# Paths
SET(saved_CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH})
SET(saved_CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH})
@ -142,7 +133,7 @@ MACRO(CHECK_ALL_LIBRARIES LIBRARIES _name _list _flags)
IF(${_library} STREQUAL "gomp")
FIND_PACKAGE(OpenMP)
IF(OPENMP_FOUND)
SET(${_prefix}_${_library}_LIBRARY ${OpenMP_C_FLAGS})
SET(${_prefix}_${_library}_LIBRARY ${OpenMP_C_FLAGS})
ENDIF(OPENMP_FOUND)
ELSE(${_library} STREQUAL "gomp")
FIND_LIBRARY(${_prefix}_${_library}_LIBRARY NAMES ${_library})
@ -174,19 +165,19 @@ MACRO(CHECK_ALL_LIBRARIES LIBRARIES _name _list _flags)
ENDIF(_libraries_work)
ENDMACRO(CHECK_ALL_LIBRARIES)
if(WIN32)
set(mkl_m "")
set(mkl_pthread "")
else(WIN32)
set(mkl_m "m")
set(mkl_pthread "pthread")
endif(WIN32)
IF(WIN32)
SET(mkl_m "")
SET(mkl_pthread "")
ELSE(WIN32)
SET(mkl_m "m")
SET(mkl_pthread "pthread")
ENDIF(WIN32)
if(UNIX AND NOT APPLE)
set(mkl_dl "${CMAKE_DL_LIBS}")
else(UNIX AND NOT APPLE)
set(mkl_dl "")
endif(UNIX AND NOT APPLE)
IF(UNIX AND NOT APPLE)
SET(mkl_dl "${CMAKE_DL_LIBS}")
ELSE(UNIX AND NOT APPLE)
SET(mkl_dl "")
ENDIF(UNIX AND NOT APPLE)
# Check for version 10/11
IF (NOT MKL_LIBRARIES)
@ -286,7 +277,7 @@ SET(CMAKE_LIBRARY_PATH ${saved_CMAKE_LIBRARY_PATH})
SET(CMAKE_INCLUDE_PATH ${saved_CMAKE_INCLUDE_PATH})
IF (MKL_LIBRARIES AND MKL_INCLUDE_DIR)
SET(MKL_FOUND TRUE)
set(MKL_cmake_included true)
SET(MKL_cmake_included true)
ELSE (MKL_LIBRARIES AND MKL_INCLUDE_DIR)
SET(MKL_FOUND FALSE)
SET(MKL_VERSION)
@ -301,86 +292,8 @@ IF(NOT MKL_FIND_QUIETLY)
MESSAGE(STATUS "MKL library found")
ELSE(MKL_FOUND)
MESSAGE(STATUS "MKL library not found")
return()
ENDIF(MKL_FOUND)
ENDIF(NOT MKL_FIND_QUIETLY)
# MKLML is included in the MKL package
if (USE_MKL AND USE_MKLML)
set(CAFFE2_USE_MKL 1)
endif()
if (USE_MKL AND USE_IDEEP)
set(IDEEP_ROOT "${PROJECT_SOURCE_DIR}/third_party/ideep")
set(MKLDNN_ROOT "${IDEEP_ROOT}/mkl-dnn")
find_path(IDEEP_INCLUDE_DIR ideep.hpp PATHS ${IDEEP_ROOT} PATH_SUFFIXES include)
find_path(MKLDNN_INCLUDE_DIR_HACK mkldnn.hpp mkldnn.h PATHS ${MKLDNN_ROOT} PATH_SUFFIXES include)
if (NOT MKLDNN_INCLUDE_DIR_HACK)
execute_process(COMMAND git submodule update --init mkl-dnn WORKING_DIRECTORY ${IDEEP_ROOT})
find_path(MKLDNN_INCLUDE_DIR_HACK mkldnn.hpp mkldnn.h PATHS ${MKLDNN_ROOT} PATH_SUFFIXES include)
endif()
if (MKLDNN_INCLUDE_DIR_HACK)
list(APPEND IDEEP_INCLUDE_DIR ${MKLDNN_INCLUDE_DIR_HACK})
list(APPEND __ideep_looked_for MKLDNN_INCLUDE_DIR_HACK)
# to avoid adding conflicting submodels
set(ORIG_WITH_TEST ${WITH_TEST})
set(WITH_TEST OFF)
add_subdirectory(${IDEEP_ROOT})
set(WITH_TEST ${ORIG_WITH_TEST})
# If we cannot find MKL, we will use the Intel MKL Small library comes with ${MKLDNN_ROOT}/external
file(GLOB_RECURSE MKLML_INNER_INCLUDE_DIR ${MKLDNN_ROOT}/external/*/mkl_vsl.h)
if(NOT MKL_FOUND AND MKLML_INNER_INCLUDE_DIR)
# if user has multiple version under external/ then guess last
# one alphabetically is "latest" and warn
list(LENGTH MKLML_INNER_INCLUDE_DIR MKLINCLEN)
if(MKLINCLEN GREATER 1)
list(SORT MKLML_INNER_INCLUDE_DIR)
list(REVERSE MKLML_INNER_INCLUDE_DIR)
list(GET MKLML_INNER_INCLUDE_DIR 0 MKLINCLST)
set(MKLML_INNER_INCLUDE_DIR "${MKLINCLST}")
endif()
get_filename_component(MKLML_INNER_INCLUDE_DIR ${MKLML_INNER_INCLUDE_DIR} DIRECTORY)
list(APPEND IDEEP_INCLUDE_DIR ${MKLML_INNER_INCLUDE_DIR})
list(APPEND __ideep_looked_for MKLML_INNER_INCLUDE_DIR)
if(APPLE)
set(__mklml_inner_libs mklml iomp5)
else()
set(__mklml_inner_libs mklml_intel iomp5)
endif()
set(IDEEP_LIBRARIES "")
foreach (__mklml_inner_lib ${__mklml_inner_libs})
string(TOUPPER ${__mklml_inner_lib} __mklml_inner_lib_upper)
find_library(${__mklml_inner_lib_upper}_LIBRARY
NAMES ${__mklml_inner_lib}
PATHS "${MKLML_INNER_INCLUDE_DIR}/../lib"
DOC "The path to Intel(R) MKLML ${__mklml_inner_lib} library")
mark_as_advanced(${__mklml_inner_lib_upper}_LIBRARY)
list(APPEND IDEEP_LIBRARIES ${${__mklml_inner_lib_upper}_LIBRARY})
list(APPEND __ideep_looked_for ${__mklml_inner_lib_upper}_LIBRARY)
endforeach()
endif() # NOT MKL_FOUND AND MKLML_INNER_INCLUDE_DIR
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(IDEEP DEFAULT_MSG ${__ideep_looked_for})
if(IDEEP_FOUND)
set(MKLDNN_LIB "${CMAKE_SHARED_LIBRARY_PREFIX}mkldnn${CMAKE_SHARED_LIBRARY_SUFFIX}")
list(APPEND IDEEP_LIBRARIES "${PROJECT_BINARY_DIR}/lib/${MKLDNN_LIB}")
message(STATUS "Found IDEEP (include: ${IDEEP_INCLUDE_DIR}, lib: ${IDEEP_LIBRARIES})")
set(CAFFE2_USE_IDEEP 1)
list(APPEND MKL_INCLUDE_DIR ${IDEEP_INCLUDE_DIR})
list(APPEND MKL_LIBRARIES ${IDEEP_LIBRARIES})
else()
message(FATAL_ERROR "Did not find IDEEP files!")
endif()
caffe_clear_vars(__ideep_looked_for __mklml_inner_libs)
endif() # MKLDNN_INCLUDE_DIR_HACK
endif() # USE_IDEEP
# Do nothing if MKL_FOUND was set before!
ENDIF (NOT MKL_FOUND)

View File

@ -1,32 +1,115 @@
# - Try to find MKLDNN
#
# The following variables are optionally searched for defaults
# MKLDNN_ROOT_DIR: Base directory where all MKLDNN components are found
# MKL_FOUND : set to true if a library implementing the CBLAS interface is found
# USE_MKLDNN
#
# The following are set after configuration is done:
# MKLDNN_FOUND
# MKLDNN_INCLUDE_DIRS
# MKLDNN_LIBRARIES
# MKLDNN_LIBRARY_DIRS
# MKLDNN_FOUND : set to true if mkl-dnn is found.
# MKLDNN_INCLUDE_DIR : path to mkl-dnn include dir.
# MKLDNN_LIBRARIES : list of libraries for mkl-dnn
include(FindPackageHandleStandardArgs)
IF (NOT MKLDNN_FOUND)
set(MKLDNN_ROOT_DIR "" CACHE PATH "Folder contains Intel MKLDNN")
SET(MKLDNN_LIBRARIES)
SET(MKLDNN_INCLUDE_DIR)
find_path(MKLDNN_INCLUDE_DIR mkldnn.h
HINTS ${MKLDNN_ROOT_DIR}
PATH_SUFFIXES include)
IF (NOT USE_MKLDNN)
RETURN()
ENDIF(NOT USE_MKLDNN)
find_library(MKLDNN_LIBRARY mkldnn
HINTS ${MKLDNN_LIB_DIR} ${MKLDNN_ROOT_DIR}
PATH_SUFFIXES lib lib64)
IF(MSVC)
MESSAGE(STATUS "MKL-DNN needs omp 3+ which is not supported in MSVC so far")
RETURN()
ENDIF(MSVC)
find_package_handle_standard_args(
MKLDNN DEFAULT_MSG MKLDNN_INCLUDE_DIR MKLDNN_LIBRARY)
SET(IDEEP_ROOT "${PROJECT_SOURCE_DIR}/third_party/ideep")
SET(MKLDNN_ROOT "${IDEEP_ROOT}/mkl-dnn")
if(MKLDNN_FOUND)
set(MKLDNN_INCLUDE_DIRS ${MKLDNN_INCLUDE_DIR})
set(MKLDNN_LIBRARIES ${MKLDNN_LIBRARY})
message(STATUS "Found MKLDNN (include: ${MKLDNN_INCLUDE_DIR}, library: ${MKLDNN_LIBRARY})")
mark_as_advanced(MKLDNN_ROOT_DIR MKLDNN_LIBRARY MKLDNN_INCLUDE_DIR)
endif()
FIND_PACKAGE(BLAS)
FIND_PATH(IDEEP_INCLUDE_DIR ideep.hpp PATHS ${IDEEP_ROOT} PATH_SUFFIXES include)
FIND_PATH(MKLDNN_INCLUDE_DIR mkldnn.hpp mkldnn.h PATHS ${MKLDNN_ROOT} PATH_SUFFIXES include)
IF (NOT MKLDNN_INCLUDE_DIR)
EXECUTE_PROCESS(COMMAND git${CMAKE_EXECUTABLE_SUFFIX} submodule update --init mkl-dnn WORKING_DIRECTORY ${IDEEP_ROOT})
FIND_PATH(MKLDNN_INCLUDE_DIR mkldnn.hpp mkldnn.h PATHS ${MKLDNN_ROOT} PATH_SUFFIXES include)
ENDIF(NOT MKLDNN_INCLUDE_DIR)
IF (NOT IDEEP_INCLUDE_DIR OR NOT MKLDNN_INCLUDE_DIR)
MESSAGE(STATUS "MKLDNN source files not found!")
RETURN()
ENDIF(NOT IDEEP_INCLUDE_DIR OR NOT MKLDNN_INCLUDE_DIR)
LIST(APPEND MKLDNN_INCLUDE_DIR ${IDEEP_INCLUDE_DIR})
IF(MKL_FOUND)
LIST(APPEND MKLDNN_LIBRARIES ${MKL_LIBRARIES})
LIST(APPEND MKLDNN_INCLUDE_DIR ${MKL_INCLUDE_DIR})
ELSE(MKL_FOUND)
# If we cannot find MKL, we will use the Intel MKL Small library
# comes with ${MKLDNN_ROOT}/external
IF(NOT IS_DIRECTORY ${MKLDNN_ROOT}/external)
IF(UNIX)
EXECUTE_PROCESS(COMMAND "${MKLDNN_ROOT}/scripts/prepare_mkl.sh" RESULT_VARIABLE __result)
ELSE(UNIX)
EXECUTE_PROCESS(COMMAND "${MKLDNN_ROOT}/scripts/prepare_mkl.bat" RESULT_VARIABLE __result)
ENDIF(UNIX)
ENDIF(NOT IS_DIRECTORY ${MKLDNN_ROOT}/external)
FILE(GLOB_RECURSE MKLML_INNER_INCLUDE_DIR ${MKLDNN_ROOT}/external/*/mkl.h)
IF(MKLML_INNER_INCLUDE_DIR)
# if user has multiple version under external/ then guess last
# one alphabetically is "latest" and warn
LIST(LENGTH MKLML_INNER_INCLUDE_DIR MKLINCLEN)
IF(MKLINCLEN GREATER 1)
LIST(SORT MKLML_INNER_INCLUDE_DIR)
LIST(REVERSE MKLML_INNER_INCLUDE_DIR)
LIST(GET MKLML_INNER_INCLUDE_DIR 0 MKLINCLST)
SET(MKLML_INNER_INCLUDE_DIR "${MKLINCLST}")
ENDIF(MKLINCLEN GREATER 1)
GET_FILENAME_COMPONENT(MKLML_INNER_INCLUDE_DIR ${MKLML_INNER_INCLUDE_DIR} DIRECTORY)
LIST(APPEND MKLDNN_INCLUDE_DIR ${MKLML_INNER_INCLUDE_DIR})
IF(APPLE)
SET(__mklml_inner_libs mklml iomp5)
ELSE(APPLE)
SET(__mklml_inner_libs mklml_intel iomp5)
ENDIF(APPLE)
FOREACH(__mklml_inner_lib ${__mklml_inner_libs})
STRING(TOUPPER ${__mklml_inner_lib} __mklml_inner_lib_upper)
FIND_LIBRARY(${__mklml_inner_lib_upper}_LIBRARY
NAMES ${__mklml_inner_lib}
PATHS "${MKLML_INNER_INCLUDE_DIR}/../lib"
DOC "The path to Intel(R) MKLML ${__mklml_inner_lib} library")
MARK_AS_ADVANCED(${__mklml_inner_lib_upper}_LIBRARY)
LIST(APPEND MKLDNN_LIBRARIES ${${__mklml_inner_lib_upper}_LIBRARY})
ENDFOREACH(__mklml_inner_lib)
ENDIF(MKLML_INNER_INCLUDE_DIR)
ENDIF(MKL_FOUND)
LIST(APPEND __mkldnn_looked_for MKLDNN_LIBRARIES)
LIST(APPEND __mkldnn_looked_for MKLDNN_INCLUDE_DIR)
INCLUDE(FindPackageHandleStandardArgs)
find_package_handle_standard_args(MKLDNN DEFAULT_MSG ${__mkldnn_looked_for})
IF(MKLDNN_FOUND)
IF(NOT APPLE AND CMAKE_COMPILER_IS_GNUCC)
ADD_COMPILE_OPTIONS(-Wno-maybe-uninitialized)
ENDIF(NOT APPLE AND CMAKE_COMPILER_IS_GNUCC)
SET(WITH_TEST FALSE CACHE BOOL "build with mkl-dnn test" FORCE)
SET(WITH_EXAMPLE FALSE CACHE BOOL "build with mkl-dnn examples" FORCE)
ADD_SUBDIRECTORY(${MKLDNN_ROOT})
SET(MKLDNN_LIB "${CMAKE_SHARED_LIBRARY_PREFIX}mkldnn${CMAKE_SHARED_LIBRARY_SUFFIX}")
IF(MINGW OR MSVC)
LIST(APPEND MKLDNN_LIBRARIES "${PROJECT_BINARY_DIR}/bin/${MKLDNN_LIB}")
ELSE(MINGW OR MSVC)
LIST(APPEND MKLDNN_LIBRARIES "${PROJECT_BINARY_DIR}/lib/${MKLDNN_LIB}")
ENDIF(MINGW OR MSVC)
ELSE(MKLDNN_FOUND)
MESSAGE(STATUS "MKLDNN source files not found!")
ENDIF(MKLDNN_FOUND)
UNSET(__mklml_inner_libs)
UNSET(__mkldnn_looked_for)
ENDIF(NOT MKLDNN_FOUND)

View File

@ -95,11 +95,8 @@ function (caffe2_print_configuration_summary)
message(STATUS " LMDB version : ${LMDB_VERSION}")
endif()
message(STATUS " USE_METAL : ${USE_METAL}")
message(STATUS " USE_MKL : ${USE_MKL}")
if(${USE_MKL})
message(STATUS " USE_MKLML : ${USE_MKLML}")
message(STATUS " USE_IDEEP : ${USE_IDEEP}")
endif()
message(STATUS " USE_MKL : ${CAFFE2_USE_MKL}")
message(STATUS " USE_MKLDNN : ${CAFFE2_USE_MKLDNN}")
message(STATUS " USE_MOBILE_OPENGL : ${USE_MOBILE_OPENGL}")
message(STATUS " USE_NCCL : ${USE_NCCL}")
if(${USE_NCCL})

View File

@ -15,14 +15,14 @@
*/
#include "batch_permutation_op.h"
#ifdef CAFFE2_USE_IDEEP
#ifdef CAFFE2_USE_MKLDNN
#include <caffe2/ideep/operators/operator_fallback_ideep.h>
#include <caffe2/ideep/utils/ideep_operator.h>
#endif
namespace caffe2 {
#ifdef CAFFE2_USE_IDEEP
#ifdef CAFFE2_USE_MKLDNN
REGISTER_IDEEP_OPERATOR(
BatchPermutation,
IDEEPFallbackOp<BatchPermutationOp<float, CPUContext>>);

View File

@ -15,13 +15,13 @@
*/
#include "upsample_nearest_op.h"
#ifdef CAFFE2_USE_IDEEP
#ifdef CAFFE2_USE_MKLDNN
#include "caffe2/ideep/operators/operator_fallback_ideep.h"
#include "caffe2/ideep/utils/ideep_operator.h"
#endif
namespace caffe2 {
#ifdef CAFFE2_USE_IDEEP
#ifdef CAFFE2_USE_MKLDNN
REGISTER_IDEEP_OPERATOR(
UpsampleNearest,
IDEEPFallbackOp<UpsampleNearestOp<float, CPUContext>>);

View File

@ -111,11 +111,6 @@
# NCCL_INCLUDE_DIR
# specify where nccl is installed
#
# MKLDNN_LIB_DIR
# MKLDNN_LIBRARY
# MKLDNN_INCLUDE_DIR
# specify where MKLDNN is installed
#
# NVTOOLSEXT_PATH (Windows only)
# specify where nvtoolsext is installed
#
@ -177,8 +172,6 @@ from tools.setup_helpers.miopen import (USE_MIOPEN, MIOPEN_LIBRARY,
MIOPEN_LIB_DIR, MIOPEN_INCLUDE_DIR)
from tools.setup_helpers.nccl import USE_NCCL, USE_SYSTEM_NCCL, NCCL_LIB_DIR, \
NCCL_INCLUDE_DIR, NCCL_ROOT_DIR, NCCL_SYSTEM_LIB
from tools.setup_helpers.mkldnn import (USE_MKLDNN, MKLDNN_LIBRARY,
MKLDNN_LIB_DIR, MKLDNN_INCLUDE_DIR)
from tools.setup_helpers.nnpack import USE_NNPACK
from tools.setup_helpers.qnnpack import USE_QNNPACK
from tools.setup_helpers.nvtoolext import NVTOOLEXT_HOME
@ -197,6 +190,7 @@ IS_DARWIN = (platform.system() == 'Darwin')
IS_LINUX = (platform.system() == 'Linux')
BUILD_PYTORCH = check_env_flag('BUILD_PYTORCH')
USE_MKLDNN = check_env_flag('USE_MKLDNN')
USE_CUDA_STATIC_LINK = check_env_flag('USE_CUDA_STATIC_LINK')
RERUN_CMAKE = True
@ -396,9 +390,6 @@ def build_libs(libs):
my_env["MIOPEN_LIBRARY"] = MIOPEN_LIBRARY
my_env["MIOPEN_INCLUDE_DIR"] = MIOPEN_INCLUDE_DIR
if USE_MKLDNN:
my_env["MKLDNN_LIB_DIR"] = MKLDNN_LIB_DIR
my_env["MKLDNN_LIBRARY"] = MKLDNN_LIBRARY
my_env["MKLDNN_INCLUDE_DIR"] = MKLDNN_INCLUDE_DIR
build_libs_cmd += ['--use-mkldnn']
if USE_QNNPACK:
build_libs_cmd += ['--use-qnnpack']
@ -610,7 +601,7 @@ class build_ext(build_ext_parent):
else:
print('-- Not using CUDA')
if USE_MKLDNN:
print('-- Detected MKLDNN at ' + MKLDNN_LIBRARY + ', ' + MKLDNN_INCLUDE_DIR)
print('-- Using MKLDNN')
else:
print('-- Not using MKLDNN')
if USE_NCCL and USE_SYSTEM_NCCL:

View File

@ -48,10 +48,10 @@ IF "%~1"=="--use-qnnpack" (
)
IF "%~1"=="--use-mkldnn" (
set /a NO_MKLDNN=0
set /a USE_MKLDNN=1
shift
) ELSE (
set /a NO_MKLDNN=1
set /a USE_MKLDNN=0
)
IF "%~1"=="--use-gloo-ibverbs" (
@ -205,10 +205,7 @@ goto:eof
-DCUDNN_INCLUDE_DIR="%CUDNN_INCLUDE_DIR%" ^
-DCUDNN_LIB_DIR="%CUDNN_LIB_DIR%" ^
-DCUDNN_LIBRARY="%CUDNN_LIBRARY%" ^
-DNO_MKLDNN=%NO_MKLDNN% ^
-DMKLDNN_INCLUDE_DIR="%MKLDNN_INCLUDE_DIR%" ^
-DMKLDNN_LIB_DIR="%MKLDNN_LIB_DIR%" ^
-DMKLDNN_LIBRARY="%MKLDNN_LIBRARY%" ^
-DUSE_MKLDNN=%USE_MKLDNN% ^
-DATEN_NO_CONTRIB=1 ^
-DCMAKE_INSTALL_PREFIX="%INSTALL_DIR%" ^
-DCMAKE_C_FLAGS="%USER_CFLAGS%" ^

View File

@ -328,9 +328,6 @@ function build_caffe2() {
-DCUDNN_LIB_DIR=$CUDNN_LIB_DIR \
-DCUDNN_LIBRARY=$CUDNN_LIBRARY \
-DUSE_MKLDNN=$USE_MKLDNN \
-DMKLDNN_INCLUDE_DIR=$MKLDNN_INCLUDE_DIR \
-DMKLDNN_LIB_DIR=$MKLDNN_LIB_DIR \
-DMKLDNN_LIBRARY=$MKLDNN_LIBRARY \
-DCMAKE_INSTALL_PREFIX="$INSTALL_DIR" \
-DCMAKE_C_FLAGS="$USER_CFLAGS" \
-DCMAKE_CXX_FLAGS="$USER_CFLAGS" \

View File

@ -1,87 +0,0 @@
import platform
import glob
import os
import sys
from itertools import chain
from .env import check_env_flag, IS_LINUX, IS_WINDOWS, IS_CONDA, CONDA_DIR
def gather_paths(env_vars):
return list(chain(*(os.getenv(v, '').split(os.pathsep) for v in env_vars)))
MKLDNN_HOME = os.getenv('MKLDNN_HOME', '/usr/local/mkl-dnn')
USE_MKLDNN = False
MKLDNN_LIB_DIR = None
MKLDNN_INCLUDE_DIR = None
MKLDNN_LIBRARY = None
if not check_env_flag('NO_MKLDNN'):
lib_paths = list(filter(bool, [
os.getenv('MKLDNN_LIB_DIR'),
os.path.join(MKLDNN_HOME, 'lib'),
os.path.join(MKLDNN_HOME, 'lib64'),
os.path.join(MKLDNN_HOME, 'lib/x64'),
'/usr/lib/',
'/usr/lib64/',
] + gather_paths([
'LIBRARY_PATH',
]) + gather_paths([
'LD_LIBRARY_PATH',
]) + gather_paths([
'LIB'
])))
include_paths = list(filter(bool, [
os.getenv('MKLDNN_INCLUDE_DIR'),
os.path.join(MKLDNN_HOME, 'include'),
'/usr/include/',
] + gather_paths([
'CPATH',
'C_INCLUDE_PATH',
'CPLUS_INCLUDE_PATH',
'INCLUDE',
])))
if IS_WINDOWS:
mkldnn_regex = 'mkldnn*.lib'
mklml_regex = 'mklml*.lib'
else:
mkldnn_regex = 'libmkldnn*'
mklml_regex = 'libmklml_intel*'
if IS_CONDA:
lib_paths.append(os.path.join(CONDA_DIR, 'lib'))
include_paths.append(os.path.join(CONDA_DIR, 'include'))
for path in lib_paths:
if path is None or not os.path.exists(path):
continue
else:
libraries = sorted(glob.glob(os.path.join(path, mkldnn_regex)))
if libraries:
if not glob.glob(os.path.join(path, mklml_regex)):
print("WARNING: MKL-DNN is not compiled with Intel MKL small library")
print("Convolution performance might be suboptimal")
print("Refer https://github.com/01org/mkl-dnn for detail info")
MKLDNN_LIBRARY = libraries[0]
MKLDNN_LIB_DIR = path
break
for path in include_paths:
if path is None or not os.path.exists(path):
continue
else:
if os.path.exists(os.path.join(path, 'mkldnn.hpp')):
MKLDNN_INCLUDE_DIR = path
break
# Specifying the library directly will overwrite the lib directory
library = os.getenv('MKLDNN_LIBRARY')
if library is not None and os.path.exists(library):
MKLDNN_LIBRARY = library
MKLDNN_LIB_DIR = os.path.dirname(MKLDNN_LIBRARY)
if not all([MKLDNN_LIBRARY, MKLDNN_LIB_DIR, MKLDNN_INCLUDE_DIR]):
MKLDNN_LIBRARY = MKLDNN_LIB_DIR = MKLDNN_INCLUDE_DIR = None
else:
real_mkldnn_library = os.path.realpath(MKLDNN_LIBRARY)
real_mkldnn_lib_dir = os.path.realpath(MKLDNN_LIB_DIR)
assert os.path.dirname(real_mkldnn_library) == real_mkldnn_lib_dir, (
'mkldnn library and lib_dir must agree')
USE_MKLDNN = True