mirror of
https://github.com/pytorch/pytorch.git
synced 2025-11-15 23:04:54 +08:00
Update (base update)
[ghstack-poisoned]
This commit is contained in:
@ -68,9 +68,9 @@ if ! which conda; then
|
||||
# intel cpu and later run tests on machines with amd cpu.
|
||||
# Also leave out two builds to make sure non-mkldnn builds still work.
|
||||
if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
|
||||
export USE_MKLDNN=1
|
||||
export USE_ONEDNN=1
|
||||
else
|
||||
export USE_MKLDNN=0
|
||||
export USE_ONEDNN=0
|
||||
fi
|
||||
else
|
||||
# CMAKE_PREFIX_PATH precedences
|
||||
@ -99,8 +99,8 @@ else
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *aarch64* ]]; then
|
||||
export USE_MKLDNN=1
|
||||
export USE_MKLDNN_ACL=1
|
||||
export USE_ONEDNN=1
|
||||
export USE_ONEDNN_ACL=1
|
||||
export ACL_ROOT_DIR=/ComputeLibrary
|
||||
fi
|
||||
|
||||
|
||||
@ -37,7 +37,7 @@ cross_compile_arm64() {
|
||||
# Cross compilation for arm64
|
||||
# Explicitly set USE_DISTRIBUTED=0 to align with the default build config on mac. This also serves as the sole CI config that tests
|
||||
# that building with USE_DISTRIBUTED=0 works at all. See https://github.com/pytorch/pytorch/issues/86448
|
||||
USE_DISTRIBUTED=0 CMAKE_OSX_ARCHITECTURES=arm64 MACOSX_DEPLOYMENT_TARGET=11.0 USE_MKLDNN=OFF USE_QNNPACK=OFF WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel
|
||||
USE_DISTRIBUTED=0 CMAKE_OSX_ARCHITECTURES=arm64 MACOSX_DEPLOYMENT_TARGET=11.0 USE_ONEDNN=OFF USE_QNNPACK=OFF WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel
|
||||
}
|
||||
|
||||
compile_arm64() {
|
||||
|
||||
6
.github/labeler.yml
vendored
6
.github/labeler.yml
vendored
@ -43,7 +43,7 @@
|
||||
- aten/src/ATen/native/cpu/**
|
||||
- aten/src/ATen/native/quantized/cpu/**
|
||||
- aten/src/ATen/native/Convolution*.cpp
|
||||
- aten/src/ATen/native/mkldnn/**
|
||||
- aten/src/ATen/native/onednn/**
|
||||
- torch/cpu/**
|
||||
- torch/utils/mkldnn.py
|
||||
- torch/utils/_sympy/**
|
||||
@ -53,7 +53,7 @@
|
||||
- third_party/ideep
|
||||
- caffe2/ideep/**
|
||||
- caffe2/python/ideep/**
|
||||
- cmake/Modules/FindMKLDNN.cmake
|
||||
- cmake/Modules/FindONEDNN.cmake
|
||||
- third_party/mkl-dnn.BUILD
|
||||
- torch/csrc/jit/codegen/onednn/**
|
||||
- test/test_jit_llga_fuser.py
|
||||
@ -63,7 +63,7 @@
|
||||
- third_party/ideep
|
||||
- caffe2/ideep/**
|
||||
- caffe2/python/ideep/**
|
||||
- cmake/Modules/FindMKLDNN.cmake
|
||||
- cmake/Modules/FindONEDNN.cmake
|
||||
- third_party/mkl-dnn.BUILD
|
||||
- torch/csrc/jit/codegen/onednn/**
|
||||
- test/test_jit_llga_fuser.py
|
||||
|
||||
10
.github/merge_rules.yaml
vendored
10
.github/merge_rules.yaml
vendored
@ -337,7 +337,7 @@
|
||||
- third_party/ideep
|
||||
- caffe2/ideep/**
|
||||
- caffe2/python/ideep/**
|
||||
- cmake/Modules/FindMKLDNN.cmake
|
||||
- cmake/Modules/FindONEDNN.cmake
|
||||
- third_party/mkl-dnn.BUILD
|
||||
approved_by:
|
||||
- XiaobingSuper
|
||||
@ -367,7 +367,7 @@
|
||||
- aten/src/ATen/native/cpu/**
|
||||
- aten/src/ATen/native/quantized/cpu/**
|
||||
- aten/src/ATen/native/Convolution*.cpp
|
||||
- aten/src/ATen/native/mkldnn/**
|
||||
- aten/src/ATen/native/onednn/**
|
||||
- test/test_mkl*.py
|
||||
approved_by:
|
||||
- mingfeima
|
||||
@ -395,9 +395,9 @@
|
||||
|
||||
- name: CPU inductor
|
||||
patterns:
|
||||
- torch/_inductor/mkldnn_ir.py
|
||||
- torch/_inductor/mkldnn_lowerings.py
|
||||
- torch/_inductor/fx_passes/mkldnn_fusion.py
|
||||
- torch/_inductor/onednn_ir.py
|
||||
- torch/_inductor/onednn_lowerings.py
|
||||
- torch/_inductor/fx_passes/onednn_fusion.py
|
||||
- torch/_inductor/fx_passes/quantization.py
|
||||
- torch/_inductor/codegen/cpp_prefix.h
|
||||
- torch/_inductor/codegen/cpp.py
|
||||
|
||||
@ -1641,7 +1641,7 @@ include_patterns = [
|
||||
"aten/src/ATen/native/xnnpack/*.h",
|
||||
"aten/src/ATen/native/metal/MetalPrepackOpContext.h",
|
||||
"aten/src/ATen/native/mps/Copy.h",
|
||||
"aten/src/ATen/native/mkldnn/**/*.h",
|
||||
"aten/src/ATen/native/onednn/**/*.h",
|
||||
]
|
||||
exclude_patterns = [
|
||||
"aten/src/ATen/Context.h",
|
||||
|
||||
10
BUILD.bazel
10
BUILD.bazel
@ -187,8 +187,8 @@ filegroup(
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "aten_native_mkldnn_cpp",
|
||||
srcs = glob(["aten/src/ATen/native/mkldnn/*.cpp"]),
|
||||
name = "aten_native_onednn_cpp",
|
||||
srcs = glob(["aten/src/ATen/native/onednn/*.cpp"]),
|
||||
)
|
||||
|
||||
filegroup(
|
||||
@ -260,8 +260,8 @@ header_template_rule(
|
||||
out = "aten/src/ATen/Config.h",
|
||||
include = "aten/src",
|
||||
substitutions = {
|
||||
"@AT_MKLDNN_ENABLED@": "1",
|
||||
"@AT_MKLDNN_ACL_ENABLED@": "0",
|
||||
"@AT_ONEDNN_ENABLED@": "1",
|
||||
"@AT_ONEDNN_ACL_ENABLED@": "0",
|
||||
"@AT_MKL_ENABLED@": "1",
|
||||
"@AT_MKL_SEQUENTIAL@": "0",
|
||||
"@AT_POCKETFFT_ENABLED@": "0",
|
||||
@ -345,7 +345,7 @@ cc_library(
|
||||
":aten_base_vulkan",
|
||||
":aten_native_cpp",
|
||||
":aten_native_mkl_cpp",
|
||||
":aten_native_mkldnn_cpp",
|
||||
":aten_native_onednn_cpp",
|
||||
":aten_native_nested_cpp",
|
||||
":aten_native_quantized_cpp",
|
||||
":aten_native_sparse_cpp",
|
||||
|
||||
@ -310,16 +310,16 @@ option(USE_ROCM_KERNEL_ASSERT "Use Kernel Assert for ROCm" OFF)
|
||||
# Ensure that an ITT build is the default for x86 CPUs
|
||||
cmake_dependent_option(USE_ITT "Use Intel(R) VTune Profiler ITT functionality"
|
||||
ON "CPU_INTEL" OFF)
|
||||
# Ensure that an MKLDNN build is the default for x86 CPUs but optional for
|
||||
# AArch64 (dependent on -DUSE_MKLDNN).
|
||||
# Ensure that an ONEDNN build is the default for x86 CPUs but optional for
|
||||
# AArch64 (dependent on -DUSE_ONEDNN).
|
||||
cmake_dependent_option(
|
||||
USE_MKLDNN "Use MKLDNN. Only available on x86, x86_64, and AArch64."
|
||||
USE_ONEDNN "Use ONEDNN. Only available on x86, x86_64, and AArch64."
|
||||
"${CPU_INTEL}" "CPU_INTEL OR CPU_AARCH64" OFF)
|
||||
cmake_dependent_option(
|
||||
USE_MKLDNN_ACL "Use Compute Library for the Arm architecture." OFF
|
||||
"USE_MKLDNN AND CPU_AARCH64" OFF)
|
||||
set(MKLDNN_ENABLE_CONCURRENT_EXEC ${USE_MKLDNN})
|
||||
cmake_dependent_option(USE_MKLDNN_CBLAS "Use CBLAS in MKLDNN" OFF "USE_MKLDNN"
|
||||
USE_ONEDNN_ACL "Use Compute Library for the Arm architecture." OFF
|
||||
"USE_ONEDNN AND CPU_AARCH64" OFF)
|
||||
set(ONEDNN_ENABLE_CONCURRENT_EXEC ${USE_ONEDNN})
|
||||
cmake_dependent_option(USE_ONEDNN_CBLAS "Use CBLAS in ONEDNN" OFF "USE_ONEDNN"
|
||||
OFF)
|
||||
option(USE_STATIC_MKL "Prefer to link with MKL statically (Unix only)" OFF)
|
||||
option(USE_DISTRIBUTED "Use distributed" ON)
|
||||
@ -1271,7 +1271,7 @@ if(BUILD_SHARED_LIBS)
|
||||
${PROJECT_SOURCE_DIR}/cmake/public/glog.cmake
|
||||
${PROJECT_SOURCE_DIR}/cmake/public/gflags.cmake
|
||||
${PROJECT_SOURCE_DIR}/cmake/public/mkl.cmake
|
||||
${PROJECT_SOURCE_DIR}/cmake/public/mkldnn.cmake
|
||||
${PROJECT_SOURCE_DIR}/cmake/public/onednn.cmake
|
||||
${PROJECT_SOURCE_DIR}/cmake/public/protobuf.cmake
|
||||
${PROJECT_SOURCE_DIR}/cmake/public/utils.cmake
|
||||
${PROJECT_SOURCE_DIR}/cmake/public/LoadHIP.cmake
|
||||
|
||||
@ -236,7 +236,7 @@ into the repo directory.
|
||||
operators.
|
||||
* [sparse](aten/src/ATen/native/sparse) - CPU and CUDA
|
||||
implementations of COO sparse tensor operations
|
||||
* [mkl](aten/src/ATen/native/mkl) [mkldnn](aten/src/ATen/native/mkldnn)
|
||||
* [mkl](aten/src/ATen/native/mkl) [mkldnn](aten/src/ATen/native/onednn)
|
||||
[miopen](aten/src/ATen/native/miopen) [cudnn](aten/src/ATen/native/cudnn)
|
||||
- implementations of operators which simply bind to some
|
||||
backend library.
|
||||
@ -693,12 +693,12 @@ only interested in a specific component.
|
||||
`make` if you don't have ninja installed).
|
||||
|
||||
On the initial build, you can also speed things up with the environment
|
||||
variables `DEBUG`, `USE_DISTRIBUTED`, `USE_MKLDNN`, `USE_CUDA`, `USE_FLASH_ATTENTION`, `USE_MEM_EFF_ATTENTION`, `BUILD_TEST`, `USE_FBGEMM`, `USE_NNPACK` and `USE_QNNPACK`.
|
||||
variables `DEBUG`, `USE_DISTRIBUTED`, `USE_ONEDNN`, `USE_CUDA`, `USE_FLASH_ATTENTION`, `USE_MEM_EFF_ATTENTION`, `BUILD_TEST`, `USE_FBGEMM`, `USE_NNPACK` and `USE_QNNPACK`.
|
||||
|
||||
- `DEBUG=1` will enable debug builds (-g -O0)
|
||||
- `REL_WITH_DEB_INFO=1` will enable debug symbols with optimizations (-g -O3)
|
||||
- `USE_DISTRIBUTED=0` will disable distributed (c10d, gloo, mpi, etc.) build.
|
||||
- `USE_MKLDNN=0` will disable using MKL-DNN.
|
||||
- `USE_ONEDNN=0` will disable using MKL-DNN.
|
||||
- `USE_CUDA=0` will disable compiling CUDA (in case you are developing on something not CUDA related), to save compile time.
|
||||
- `BUILD_TEST=0` will disable building C++ test binaries.
|
||||
- `USE_FBGEMM=0` will disable using FBGEMM (quantized 8-bit server operators).
|
||||
@ -710,7 +710,7 @@ variables `DEBUG`, `USE_DISTRIBUTED`, `USE_MKLDNN`, `USE_CUDA`, `USE_FLASH_ATTEN
|
||||
For example:
|
||||
|
||||
```bash
|
||||
DEBUG=1 USE_DISTRIBUTED=0 USE_MKLDNN=0 USE_CUDA=0 BUILD_TEST=0 USE_FBGEMM=0 USE_NNPACK=0 USE_QNNPACK=0 USE_XNNPACK=0 python setup.py develop
|
||||
DEBUG=1 USE_DISTRIBUTED=0 USE_ONEDNN=0 USE_CUDA=0 BUILD_TEST=0 USE_FBGEMM=0 USE_NNPACK=0 USE_QNNPACK=0 USE_XNNPACK=0 python setup.py develop
|
||||
```
|
||||
|
||||
For subsequent builds (i.e., when `build/CMakeCache.txt` exists), the build
|
||||
|
||||
@ -78,13 +78,13 @@ file(GLOB miopen_h "miopen/*.h")
|
||||
file(GLOB miopen_cpp "miopen/*.cpp")
|
||||
|
||||
file(GLOB mkl_cpp "mkl/*.cpp")
|
||||
file(GLOB mkldnn_cpp "mkldnn/*.cpp")
|
||||
file(GLOB onednn_cpp "onednn/*.cpp")
|
||||
|
||||
file(GLOB mkldnn_xpu_cpp "native/mkldnn/xpu/*.cpp" "native/mkldnn/xpu/detail/*.cpp")
|
||||
file(GLOB onednn_xpu_cpp "native/onednn/xpu/*.cpp" "native/onednn/xpu/detail/*.cpp")
|
||||
|
||||
file(GLOB native_cpp "native/*.cpp")
|
||||
file(GLOB native_mkl_cpp "native/mkl/*.cpp")
|
||||
file(GLOB native_mkldnn_cpp "native/mkldnn/*.cpp")
|
||||
file(GLOB native_onednn_cpp "native/onednn/*.cpp")
|
||||
file(GLOB vulkan_cpp "vulkan/*.cpp")
|
||||
file(GLOB native_vulkan_cpp "native/vulkan/*.cpp" "native/vulkan/api/*.cpp" "native/vulkan/impl/*.cpp" "native/vulkan/ops/*.cpp")
|
||||
|
||||
@ -213,7 +213,7 @@ else()
|
||||
set(
|
||||
all_cpu_cpp ${base_cpp} ${ATen_CORE_SRCS} ${native_cpp}
|
||||
${native_ao_sparse_cpp} ${native_sparse_cpp} ${native_nested_cpp}
|
||||
${native_quantized_cpp} ${native_mkl_cpp} ${native_mkldnn_cpp}
|
||||
${native_quantized_cpp} ${native_mkl_cpp} ${native_onednn_cpp}
|
||||
${native_transformers_cpp}
|
||||
${native_utils_cpp} ${native_xnnpack} ${generated_sources} ${core_generated_sources}
|
||||
${ATen_CPU_SRCS} ${ATen_QUANTIZED_SRCS} ${ATen_NNAPI_SRCS} ${cpu_kernel_cpp}
|
||||
@ -226,8 +226,8 @@ endif()
|
||||
if(AT_MKL_ENABLED)
|
||||
set(all_cpu_cpp ${all_cpu_cpp} ${mkl_cpp})
|
||||
endif()
|
||||
if(AT_MKLDNN_ENABLED)
|
||||
set(all_cpu_cpp ${all_cpu_cpp} ${mkldnn_cpp})
|
||||
if(AT_ONEDNN_ENABLED)
|
||||
set(all_cpu_cpp ${all_cpu_cpp} ${onednn_cpp})
|
||||
endif()
|
||||
if(USE_VULKAN)
|
||||
set(all_cpu_cpp ${all_cpu_cpp} ${vulkan_cpp} ${native_vulkan_cpp} ${vulkan_generated_cpp})
|
||||
@ -236,14 +236,14 @@ else()
|
||||
endif()
|
||||
|
||||
if(USE_XPU)
|
||||
list(APPEND ATen_XPU_SRCS ${mkldnn_xpu_cpp})
|
||||
list(APPEND ATen_XPU_DEPENDENCY_LIBS xpu_mkldnn)
|
||||
list(APPEND ATen_XPU_SRCS ${onednn_xpu_cpp})
|
||||
list(APPEND ATen_XPU_DEPENDENCY_LIBS xpu_onednn)
|
||||
|
||||
list(APPEND ATen_XPU_DEPENDENCY_LIBS ${OCL_LIBRARY})
|
||||
list(APPEND ATen_XPU_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/native/mkldnn/xpu)
|
||||
list(APPEND ATen_XPU_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/native/mkldnn/xpu/detail)
|
||||
list(APPEND ATen_XPU_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/native/onednn/xpu)
|
||||
list(APPEND ATen_XPU_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/native/onednn/xpu/detail)
|
||||
list(APPEND ATen_XPU_INCLUDE ${PROJECT_SOURCE_DIR}/third_party/ideep/mkl-dnn/include)
|
||||
list(APPEND ATen_XPU_INCLUDE ${XPU_MKLDNN_INCLUDE})
|
||||
list(APPEND ATen_XPU_INCLUDE ${XPU_ONEDNN_INCLUDE})
|
||||
|
||||
list(APPEND ATen_XPU_INCLUDE ${SYCL_INCLUDE_DIR})
|
||||
list(APPEND ATen_XPU_DEPENDENCY_LIBS ${SYCL_LIBRARY})
|
||||
@ -414,9 +414,9 @@ if(AT_NNPACK_ENABLED)
|
||||
list(APPEND ATen_CPU_DEPENDENCY_LIBS nnpack) # cpuinfo is added below
|
||||
endif()
|
||||
|
||||
if(MKLDNN_FOUND)
|
||||
list(APPEND ATen_CPU_DEPENDENCY_LIBS ${MKLDNN_LIBRARIES})
|
||||
endif(MKLDNN_FOUND)
|
||||
if(ONEDNN_FOUND)
|
||||
list(APPEND ATen_CPU_DEPENDENCY_LIBS ${ONEDNN_LIBRARIES})
|
||||
endif(ONEDNN_FOUND)
|
||||
|
||||
if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(s390x|ppc64le)$")
|
||||
list(APPEND ATen_CPU_DEPENDENCY_LIBS cpuinfo)
|
||||
|
||||
@ -6,8 +6,8 @@
|
||||
//
|
||||
// DO NOT put the macros for CUDA libraries in this file; they belong in cuda/CUDAConfig.h
|
||||
|
||||
#define AT_MKLDNN_ENABLED() @AT_MKLDNN_ENABLED@
|
||||
#define AT_MKLDNN_ACL_ENABLED() @AT_MKLDNN_ACL_ENABLED@
|
||||
#define AT_ONEDNN_ENABLED() @AT_ONEDNN_ENABLED@
|
||||
#define AT_ONEDNN_ACL_ENABLED() @AT_ONEDNN_ACL_ENABLED@
|
||||
#define AT_MKL_ENABLED() @AT_MKL_ENABLED@
|
||||
#define AT_MKL_SEQUENTIAL() @AT_MKL_SEQUENTIAL@
|
||||
#define AT_POCKETFFT_ENABLED() @AT_POCKETFFT_ENABLED@
|
||||
|
||||
@ -345,7 +345,7 @@ bool Context::hasMKL() {
|
||||
}
|
||||
|
||||
bool Context::hasMKLDNN() {
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
@ -375,7 +375,7 @@ at::QEngine Context::qEngine() const {
|
||||
qengine = at::kQNNPACK;
|
||||
#endif
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
qengine = at::kONEDNN;
|
||||
#endif
|
||||
|
||||
@ -421,7 +421,7 @@ const std::vector<at::QEngine>& Context::supportedQEngines() {
|
||||
engines.push_back(at::kNoQEngine);
|
||||
#endif // C10_MOBILE
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
engines.push_back(at::kONEDNN);
|
||||
#endif
|
||||
|
||||
|
||||
@ -13,7 +13,7 @@
|
||||
#include <caffe2/utils/threadpool/pthreadpool-cpp.h>
|
||||
|
||||
namespace at {
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
namespace native { namespace mkldnn {
|
||||
void clear_computation_cache();
|
||||
}} // namespace native::mkldnn
|
||||
@ -65,7 +65,7 @@ void set_num_threads(int nthreads) {
|
||||
TORCH_INTERNAL_ASSERT(pool, "Invalid thread pool!");
|
||||
pool->set_thread_count(nthreads);
|
||||
#endif
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
at::native::mkldnn::clear_computation_cache();
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -5,7 +5,7 @@
|
||||
#include <mkl.h>
|
||||
#endif
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
#include <dnnl.hpp>
|
||||
#include <ideep.hpp>
|
||||
#endif
|
||||
@ -36,7 +36,7 @@ std::string get_mkl_version() {
|
||||
|
||||
std::string get_mkldnn_version() {
|
||||
std::ostringstream ss;
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
// Cribbed from mkl-dnn/src/common/verbose.cpp
|
||||
// Too bad: can't get ISA info conveniently :(
|
||||
// Apparently no way to get ideep version?
|
||||
@ -162,7 +162,7 @@ std::string show_config() {
|
||||
ss << " - " << get_mkl_version() << "\n";
|
||||
#endif
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
ss << " - " << get_mkldnn_version() << "\n";
|
||||
#endif
|
||||
|
||||
|
||||
@ -15,9 +15,9 @@
|
||||
|
||||
#include <c10/util/irange.h>
|
||||
#include <c10/core/ScalarType.h>
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/mkldnn/Utils.h>
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
#include <ATen/native/onednn/ONEDNNCommon.h>
|
||||
#include <ATen/native/onednn/Utils.h>
|
||||
#endif
|
||||
|
||||
#ifndef AT_PER_OPERATOR_HEADERS
|
||||
@ -371,7 +371,7 @@ TORCH_IMPL_FUNC(softshrink_backward_out) (
|
||||
shrink_backward_stub(device_type(), *this, lambd);
|
||||
}
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
static bool use_mkldnn(const Tensor& input) {
|
||||
if (!at::globalContext().userEnabledMkldnn()) {
|
||||
return false;
|
||||
@ -390,7 +390,7 @@ TORCH_IMPL_FUNC(gelu_out_cpu) (
|
||||
const Tensor& self, c10::string_view approximate, const Tensor& result
|
||||
) {
|
||||
auto approximate_type = get_gelutype_enum(approximate);
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
if (use_mkldnn(self) && (approximate_type == GeluType::None)) {
|
||||
const ideep::tensor& x = itensor_from_tensor(self, /*from_const_data_ptr*/true);
|
||||
ideep::tensor y = itensor_from_tensor(result);
|
||||
@ -415,7 +415,7 @@ TORCH_IMPL_FUNC(gelu_backward_out_cpu) (
|
||||
const Tensor& grad, const Tensor& self, c10::string_view approximate, const Tensor& grad_input
|
||||
) {
|
||||
auto approximate_type = get_gelutype_enum(approximate);
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
if (use_mkldnn(self) && (approximate_type == GeluType::None)) {
|
||||
const ideep::tensor& x = itensor_from_tensor(self, /*from_const_data_ptr*/true);
|
||||
ideep::tensor grady = itensor_from_tensor(grad, /*from_const_data_ptr*/true);
|
||||
|
||||
@ -6,7 +6,7 @@
|
||||
#include <ATen/NamedTensorUtils.h>
|
||||
#include <ATen/Config.h>
|
||||
|
||||
#include <ATen/native/mkldnn/Matmul.h>
|
||||
#include <ATen/native/onednn/Matmul.h>
|
||||
|
||||
#ifndef AT_PER_OPERATOR_HEADERS
|
||||
#include <ATen/CPUFunctions.h>
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
|
||||
#include <ATen/native/CPUBlas.h>
|
||||
#include <ATen/native/mkl/LinearAlgebra.h>
|
||||
#include <ATen/native/mkldnn/Matmul.h>
|
||||
#include <ATen/native/onednn/Matmul.h>
|
||||
#include <ATen/Config.h>
|
||||
|
||||
#include <c10/util/SmallBuffer.h>
|
||||
@ -165,7 +165,7 @@ void gemm(
|
||||
const float beta,
|
||||
float *c, int64_t ldc) {
|
||||
internal::normalize_last_dims(transa, transb, m, n, k, &lda, &ldb, &ldc);
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
if (mkldnn_bf32_gemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)) {
|
||||
return;
|
||||
}
|
||||
@ -320,7 +320,7 @@ void gemm(
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
if (mkldnn_bf16_gemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)) {
|
||||
return;
|
||||
}
|
||||
@ -339,7 +339,7 @@ void gemm(
|
||||
const float beta,
|
||||
at::Half *c, int64_t ldc) {
|
||||
internal::normalize_last_dims(transa, transb, m, n, k, &lda, &ldb, &ldc);
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
if (mkldnn_fp16_gemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -26,8 +26,8 @@
|
||||
#include <nnpack.h>
|
||||
#endif
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#include <ATen/native/mkldnn/Utils.h>
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
#include <ATen/native/onednn/Utils.h>
|
||||
#endif
|
||||
|
||||
#ifndef AT_PER_OPERATOR_HEADERS
|
||||
@ -508,7 +508,7 @@ struct ConvParams {
|
||||
;
|
||||
}
|
||||
bool use_mkldnn(const at::Tensor& input, const at::Tensor& weight) const {
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
if (!at::globalContext().userEnabledMkldnn()) {
|
||||
return false;
|
||||
}
|
||||
@ -782,7 +782,7 @@ static void check_input_same_type_as_parameters(
|
||||
check_input_same_type_as_parameters(input, weight, /*bias=*/ Tensor());
|
||||
}
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
static void check_input_same_type_as_parameters(
|
||||
const Tensor& input,
|
||||
const Tensor& weight,
|
||||
@ -1579,7 +1579,7 @@ at::Tensor _convolution(
|
||||
params.stride, params.dilation, params.groups, params.benchmark, params.deterministic);
|
||||
break;
|
||||
case ConvBackend::Mkldnn:
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
check_input_same_type_as_parameters(input, weight, bias, backend);
|
||||
if (!input.is_mkldnn()) {
|
||||
// need to ensure contiguous for non-mkldnn tensors
|
||||
@ -1594,7 +1594,7 @@ at::Tensor _convolution(
|
||||
#endif
|
||||
break;
|
||||
case ConvBackend::MkldnnTranspose:
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
check_input_same_type_as_parameters(input, weight, bias, backend);
|
||||
if (!input.is_mkldnn()) {
|
||||
// need to ensure contiguous for non-mkldnn tensors
|
||||
@ -1609,7 +1609,7 @@ at::Tensor _convolution(
|
||||
#endif
|
||||
break;
|
||||
case ConvBackend::MkldnnEmpty:
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
output = empty_mkldnn(
|
||||
calc_output_size(input, weight, params), optTypeMetaToScalarType(input.options().dtype_opt()),
|
||||
input.options().layout_opt(), input.options().device_opt(), input.options().pinned_memory_opt());
|
||||
@ -2123,7 +2123,7 @@ std::tuple<Tensor, Tensor, Tensor> convolution_backward(
|
||||
}
|
||||
break;
|
||||
case ConvBackend::MkldnnEmpty:
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
if (output_mask[0]) {
|
||||
if (input.is_mkldnn()) {
|
||||
backend_grad_input = empty_mkldnn(input.sizes(), optTypeMetaToScalarType(input.options().dtype_opt()),
|
||||
|
||||
@ -18,7 +18,7 @@
|
||||
#include <ATen/native/ReduceOps.h>
|
||||
#include <ATen/native/ReduceOpsUtils.h>
|
||||
#include <ATen/native/Resize.h>
|
||||
#include <ATen/native/mkldnn/Matmul.h>
|
||||
#include <ATen/native/onednn/Matmul.h>
|
||||
#include <c10/core/GradMode.h>
|
||||
#include <c10/util/accumulate.h>
|
||||
#include <c10/util/irange.h>
|
||||
@ -1512,7 +1512,7 @@ static void addmm_impl_cpu_(
|
||||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!c.is_conj());
|
||||
|
||||
bool dispatched = false;
|
||||
#if defined(__aarch64__) && AT_MKLDNN_ACL_ENABLED()
|
||||
#if defined(__aarch64__) && AT_ONEDNN_ACL_ENABLED()
|
||||
// On AArch64 if LHS matrix in BLAS routine is transposed but RHS is not then
|
||||
// it is faster to call oneDNN matrix multiplication primitive with RHS*LHS
|
||||
// that will call then into Arm® Compute Library (ACL) GEMM kernel and also
|
||||
|
||||
@ -81,7 +81,7 @@ bool use_miopen(const at::Tensor& input, const double dropout_state) {
|
||||
}
|
||||
|
||||
bool use_mkldnn(const Tensor& input, TensorList params, TensorList hx) {
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
if (!at::globalContext().userEnabledMkldnn()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -11,7 +11,7 @@
|
||||
#include <ATen/ops/mul_native.h>
|
||||
#endif
|
||||
|
||||
#if !AT_MKLDNN_ENABLED()
|
||||
#if !AT_ONEDNN_ENABLED()
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -48,9 +48,9 @@ Tensor& mkldnn_mul_(Tensor& self, const Tensor& other) {
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#else // AT_MKLDNN_ENABLED
|
||||
#else // AT_ONEDNN_ENABLED
|
||||
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/onednn/ONEDNNCommon.h>
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -158,4 +158,4 @@ Tensor& mkldnn_mul_(Tensor& self, const Tensor& other) {
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED
|
||||
#endif // AT_ONEDNN_ENABLED
|
||||
@ -3,7 +3,7 @@
|
||||
#include <ATen/ATen.h>
|
||||
#include <ATen/Config.h>
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
|
||||
#include <ideep/tensor.hpp>
|
||||
|
||||
@ -43,4 +43,4 @@ struct ContextConv final {
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED()
|
||||
#endif // AT_ONEDNN_ENABLED()
|
||||
@ -17,7 +17,7 @@
|
||||
#include <ATen/ops/mkldnn_convolution_native.h>
|
||||
#endif
|
||||
|
||||
#if !AT_MKLDNN_ENABLED()
|
||||
#if !AT_ONEDNN_ENABLED()
|
||||
|
||||
namespace at { namespace native {
|
||||
|
||||
@ -33,10 +33,10 @@ REGISTER_NO_CPU_DISPATCH(mkldnn_convolution_transpose_backward_stub);
|
||||
|
||||
}}
|
||||
|
||||
#else // AT_MKLDNN_ENABLED
|
||||
#else // AT_ONEDNN_ENABLED
|
||||
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/mkldnn/Utils.h>
|
||||
#include <ATen/native/onednn/ONEDNNCommon.h>
|
||||
#include <ATen/native/onednn/Utils.h>
|
||||
#include <ATen/native/ConvUtils.h>
|
||||
#include <c10/util/irange.h>
|
||||
|
||||
@ -1,15 +1,15 @@
|
||||
#include <vector>
|
||||
|
||||
#include <ATen/native/ConvUtils.h>
|
||||
#include <ATen/native/mkldnn/Common.h>
|
||||
#include <ATen/native/mkldnn/ConvPrepack.h>
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/mkldnn/OpContext.h>
|
||||
#include <ATen/native/onednn/Common.h>
|
||||
#include <ATen/native/onednn/ConvPrepack.h>
|
||||
#include <ATen/native/onednn/ONEDNNCommon.h>
|
||||
#include <ATen/native/onednn/OpContext.h>
|
||||
#include <ATen/native/utils/Factory.h>
|
||||
#include <ATen/native/utils/ParamUtils.h>
|
||||
#include <c10/util/irange.h>
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -286,4 +286,4 @@ Tensor conv_run(
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED()
|
||||
#endif // AT_ONEDNN_ENABLED()
|
||||
@ -1,10 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <ATen/Tensor.h>
|
||||
#include <ATen/native/mkldnn/Common.h>
|
||||
#include <ATen/native/mkldnn/OpContext.h>
|
||||
#include <ATen/native/onednn/Common.h>
|
||||
#include <ATen/native/onednn/OpContext.h>
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -46,4 +46,4 @@ void run(ContextConv& context, const Tensor& input, void* output);
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED()
|
||||
#endif // AT_ONEDNN_ENABLED()
|
||||
@ -8,7 +8,7 @@
|
||||
#include <ATen/ops/copy_native.h>
|
||||
#endif
|
||||
|
||||
#if !AT_MKLDNN_ENABLED()
|
||||
#if !AT_ONEDNN_ENABLED()
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -20,9 +20,9 @@ Tensor& copy_mkldnn_(Tensor& self, const Tensor& src, bool non_blocking) {
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#else // AT_MKLDNN_ENABLED
|
||||
#else // AT_ONEDNN_ENABLED
|
||||
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/onednn/ONEDNNCommon.h>
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -46,4 +46,4 @@ Tensor& copy_mkldnn_(Tensor& self, const Tensor& src, bool non_blocking) {
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED
|
||||
#endif // AT_ONEDNN_ENABLED
|
||||
@ -10,7 +10,7 @@
|
||||
#include <ATen/ops/gelu_backward_native.h>
|
||||
#endif
|
||||
|
||||
#if !AT_MKLDNN_ENABLED()
|
||||
#if !AT_ONEDNN_ENABLED()
|
||||
|
||||
namespace at { namespace native {
|
||||
|
||||
@ -24,10 +24,10 @@ Tensor mkldnn_gelu_backward(const Tensor& grad_output, const Tensor& input, c10:
|
||||
|
||||
}}
|
||||
|
||||
#else // AT_MKLDNN_ENABLED
|
||||
#else // AT_ONEDNN_ENABLED
|
||||
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/mkldnn/Utils.h>
|
||||
#include <ATen/native/onednn/ONEDNNCommon.h>
|
||||
#include <ATen/native/onednn/Utils.h>
|
||||
|
||||
namespace at { namespace native {
|
||||
|
||||
@ -61,4 +61,4 @@ Tensor mkldnn_gelu_backward(const Tensor& grad_output, const Tensor& input, c10:
|
||||
|
||||
}}
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED
|
||||
#endif // AT_ONEDNN_ENABLED
|
||||
@ -2,7 +2,7 @@
|
||||
#include <ATen/Config.h>
|
||||
#include <c10/core/Allocator.h>
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
|
||||
// needs to be included only once in library.
|
||||
#include <ideep_pin_singletons.hpp>
|
||||
@ -30,4 +30,4 @@ void clear_computation_cache() {
|
||||
|
||||
} // namespace at::native::mkldnn
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED()
|
||||
#endif // AT_ONEDNN_ENABLED()
|
||||
@ -19,7 +19,7 @@
|
||||
#include <ATen/ops/mkldnn_linear_native.h>
|
||||
#endif
|
||||
|
||||
#if !AT_MKLDNN_ENABLED()
|
||||
#if !AT_ONEDNN_ENABLED()
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -48,10 +48,10 @@ std::tuple<Tensor, Tensor, Tensor> mkldnn_linear_backward(
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#else // AT_MKLDNN_ENABLED
|
||||
#else // AT_ONEDNN_ENABLED
|
||||
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/mkldnn/Utils.h>
|
||||
#include <ATen/native/onednn/ONEDNNCommon.h>
|
||||
#include <ATen/native/onednn/Utils.h>
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -447,4 +447,4 @@ TORCH_LIBRARY_IMPL(mkldnn, MkldnnCPU, m) {
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED
|
||||
#endif // AT_ONEDNN_ENABLED
|
||||
@ -2,9 +2,9 @@
|
||||
#include <ATen/core/Tensor.h>
|
||||
#include <ATen/Config.h>
|
||||
#include <ATen/Context.h>
|
||||
#include <ATen/native/mkldnn/Matmul.h>
|
||||
#include <ATen/native/onednn/Matmul.h>
|
||||
|
||||
#if !AT_MKLDNN_ENABLED()
|
||||
#if !AT_ONEDNN_ENABLED()
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -88,10 +88,10 @@ void mkldnn_matmul_i8i8i32(
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#else // AT_MKLDNN_ENABLED
|
||||
#else // AT_ONEDNN_ENABLED
|
||||
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/mkldnn/Utils.h>
|
||||
#include <ATen/native/onednn/ONEDNNCommon.h>
|
||||
#include <ATen/native/onednn/Utils.h>
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -515,4 +515,4 @@ void mkldnn_matmul_i8i8i32(
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED
|
||||
#endif // AT_ONEDNN_ENABLED
|
||||
@ -14,9 +14,9 @@
|
||||
#include <ATen/ops/native_batch_norm_backward_native.h>
|
||||
#include <ATen/ops/native_batch_norm_native.h>
|
||||
#endif
|
||||
#include <ATen/native/mkldnn/Utils.h>
|
||||
#include <ATen/native/onednn/Utils.h>
|
||||
|
||||
#if !AT_MKLDNN_ENABLED()
|
||||
#if !AT_ONEDNN_ENABLED()
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -79,9 +79,9 @@ std::tuple<Tensor, Tensor, Tensor> _new_batch_norm_backward_mkldnn(
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#else // AT_MKLDNN_ENABLED
|
||||
#else // AT_ONEDNN_ENABLED
|
||||
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/onednn/ONEDNNCommon.h>
|
||||
#include <ATen/native/layer_norm.h>
|
||||
#include <ideep/abstract_types.hpp>
|
||||
|
||||
@ -282,4 +282,4 @@ std::tuple<Tensor, Tensor, Tensor> mkldnn_batch_norm_backward(const Tensor& grad
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED
|
||||
#endif // AT_ONEDNN_ENABLED
|
||||
@ -1,9 +1,9 @@
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/onednn/ONEDNNCommon.h>
|
||||
#include <ATen/OpaqueTensorImpl.h>
|
||||
#include <c10/core/Allocator.h>
|
||||
#include <torch/library.h>
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
|
||||
#include <ideep.hpp>
|
||||
|
||||
@ -211,4 +211,4 @@ TORCH_LIBRARY_IMPL(mkldnn, MkldnnCPU, m) {
|
||||
|
||||
}}
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED()
|
||||
#endif // AT_ONEDNN_ENABLED()
|
||||
@ -3,7 +3,7 @@
|
||||
#include <ATen/core/Tensor.h>
|
||||
#include <ATen/Config.h>
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
#include <ideep.hpp>
|
||||
|
||||
#ifndef IDEEP_PREREQ
|
||||
@ -64,4 +64,4 @@ TORCH_API int set_verbose(int level);
|
||||
|
||||
}}
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED
|
||||
#endif // AT_ONEDNN_ENABLED
|
||||
@ -1,8 +1,8 @@
|
||||
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
|
||||
#include <ATen/Config.h>
|
||||
#include <ATen/core/Tensor.h>
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/mkldnn/Utils.h>
|
||||
#include <ATen/native/onednn/ONEDNNCommon.h>
|
||||
#include <ATen/native/onednn/Utils.h>
|
||||
#include <ATen/native/utils/ParamUtils.h>
|
||||
#include <torch/library.h>
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
|
||||
namespace at { namespace native {
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
|
||||
Tensor mkldnn_to_dense(const Tensor& mkldnn_tensor, std::optional<ScalarType> dtype, std::optional<bool> masked_grad) {
|
||||
TORCH_CHECK(mkldnn_tensor.scalar_type() == ScalarType::Float ||
|
||||
@ -589,9 +589,9 @@ Tensor mkldnn_reorder_conv3d_weight(
|
||||
TORCH_CHECK(false, "mkldnn_reorder_conv3d_weight: MKL-DNN build is disabled");
|
||||
}
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED()
|
||||
#endif // AT_ONEDNN_ENABLED()
|
||||
|
||||
#if AT_MKL_ENABLED() && AT_MKLDNN_ENABLED()
|
||||
#if AT_MKL_ENABLED() && AT_ONEDNN_ENABLED()
|
||||
#include <mkl.h>
|
||||
|
||||
static Tensor mkl_reorder_linear_weight(
|
||||
@ -635,6 +635,6 @@ TORCH_LIBRARY_IMPL(mkl, CPU, m) {
|
||||
TORCH_FN(mkl_reorder_linear_weight));
|
||||
}
|
||||
|
||||
#endif // AT_MKL_ENABLED && AT_MKLDNN_ENABLED
|
||||
#endif // AT_MKL_ENABLED && AT_ONEDNN_ENABLED
|
||||
|
||||
}}
|
||||
@ -11,7 +11,7 @@
|
||||
#include <ATen/ops/zero_native.h>
|
||||
#endif
|
||||
|
||||
#if !AT_MKLDNN_ENABLED()
|
||||
#if !AT_ONEDNN_ENABLED()
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -23,9 +23,9 @@ Tensor& mkldnn_zero_(Tensor& self) {
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#else // AT_MKLDNN_ENABLED
|
||||
#else // AT_ONEDNN_ENABLED
|
||||
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/onednn/ONEDNNCommon.h>
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -51,4 +51,4 @@ Tensor& mkldnn_zero_(Tensor& self) {
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED
|
||||
#endif // AT_ONEDNN_ENABLED
|
||||
@ -1,7 +1,7 @@
|
||||
#include <ATen/native/mkldnn/OpContext.h>
|
||||
#include <ATen/native/onednn/OpContext.h>
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#include <ATen/native/mkldnn/ConvPrepack.h>
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
#include <ATen/native/onednn/ConvPrepack.h>
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -44,4 +44,4 @@ void MkldnnConvOpContext::run(const Tensor& input, void* output) {
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED()
|
||||
#endif // AT_ONEDNN_ENABLED()
|
||||
@ -2,9 +2,9 @@
|
||||
|
||||
#include <ATen/Tensor.h>
|
||||
#include <ATen/core/ivalue.h>
|
||||
#include <ATen/native/mkldnn/Common.h>
|
||||
#include <ATen/native/onednn/Common.h>
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -96,4 +96,4 @@ class MkldnnConvOpContext final : public ConvOpContext {
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED()
|
||||
#endif // AT_ONEDNN_ENABLED()
|
||||
@ -24,7 +24,7 @@
|
||||
#endif
|
||||
|
||||
|
||||
#if !AT_MKLDNN_ENABLED()
|
||||
#if !AT_ONEDNN_ENABLED()
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -190,10 +190,10 @@ Tensor mkldnn_adaptive_avg_pool2d_backward(
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#else // AT_MKLDNN_ENABLED
|
||||
#else // AT_ONEDNN_ENABLED
|
||||
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/mkldnn/Utils.h>
|
||||
#include <ATen/native/onednn/ONEDNNCommon.h>
|
||||
#include <ATen/native/onednn/Utils.h>
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -670,4 +670,4 @@ Tensor mkldnn_adaptive_avg_pool2d_backward(
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED
|
||||
#endif // AT_ONEDNN_ENABLED
|
||||
@ -3,7 +3,7 @@
|
||||
#include <ATen/Config.h>
|
||||
|
||||
|
||||
#if !AT_MKLDNN_ENABLED()
|
||||
#if !AT_ONEDNN_ENABLED()
|
||||
|
||||
namespace at { namespace native {
|
||||
|
||||
@ -17,10 +17,10 @@ std::tuple<Tensor, Tensor> mkldnn_prelu_backward(const Tensor& grad_output, cons
|
||||
|
||||
}}
|
||||
|
||||
#else // AT_MKLDNN_ENABLED
|
||||
#else // AT_ONEDNN_ENABLED
|
||||
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/mkldnn/Utils.h>
|
||||
#include <ATen/native/onednn/ONEDNNCommon.h>
|
||||
#include <ATen/native/onednn/Utils.h>
|
||||
|
||||
namespace at { namespace native {
|
||||
|
||||
@ -69,4 +69,4 @@ std::tuple<Tensor, Tensor> mkldnn_prelu_backward(const Tensor& grad_output, cons
|
||||
}
|
||||
}}
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED
|
||||
#endif // AT_ONEDNN_ENABLED
|
||||
@ -19,7 +19,7 @@
|
||||
#include <ATen/ops/mkldnn_rnn_layer_native.h>
|
||||
#endif
|
||||
|
||||
#if !AT_MKLDNN_ENABLED()
|
||||
#if !AT_ONEDNN_ENABLED()
|
||||
|
||||
namespace at::native {
|
||||
|
||||
@ -75,10 +75,10 @@ REGISTER_NO_CPU_DISPATCH(lstm_mkldnn_stub);
|
||||
|
||||
} // namespace at::native
|
||||
|
||||
#else // AT_MKLDNN_ENABLED
|
||||
#else // AT_ONEDNN_ENABLED
|
||||
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/mkldnn/Utils.h>
|
||||
#include <ATen/native/onednn/ONEDNNCommon.h>
|
||||
#include <ATen/native/onednn/Utils.h>
|
||||
|
||||
namespace at::native {
|
||||
|
||||
@ -563,4 +563,4 @@ REGISTER_ALL_CPU_DISPATCH(lstm_mkldnn_stub, &lstm_mkldnn);
|
||||
|
||||
} // namespace at::native
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED
|
||||
#endif // AT_ONEDNN_ENABLED
|
||||
@ -1,11 +1,11 @@
|
||||
#include <ATen/Config.h>
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
|
||||
#include <ATen/Tensor.h>
|
||||
#include <ATen/native/mkldnn/ConvPrepack.h>
|
||||
#include <ATen/native/mkldnn/OpContext.h>
|
||||
#include <ATen/native/mkldnn/Utils.h>
|
||||
#include <ATen/native/onednn/ConvPrepack.h>
|
||||
#include <ATen/native/onednn/OpContext.h>
|
||||
#include <ATen/native/onednn/Utils.h>
|
||||
#include <torch/custom_class.h>
|
||||
#include <torch/library.h>
|
||||
|
||||
@ -28,7 +28,7 @@ static bool is_mkldnn_fp16_supported() {
|
||||
}
|
||||
|
||||
constexpr bool is_mkldnn_acl_supported() {
|
||||
return AT_MKLDNN_ACL_ENABLED();
|
||||
return AT_ONEDNN_ACL_ENABLED();
|
||||
}
|
||||
|
||||
TORCH_LIBRARY(mkldnn, m) {
|
||||
@ -100,9 +100,9 @@ TORCH_LIBRARY_IMPL(mkldnn_prepacked, CPU, m) {
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED()
|
||||
#endif // AT_ONEDNN_ENABLED()
|
||||
|
||||
#if AT_MKL_ENABLED() && AT_MKLDNN_ENABLED()
|
||||
#if AT_MKL_ENABLED() && AT_ONEDNN_ENABLED()
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -119,4 +119,4 @@ TORCH_LIBRARY(mkl, m) {
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#endif // AT_MKL_ENABLED && AT_MKLDNN_ENABLED
|
||||
#endif // AT_MKL_ENABLED && AT_ONEDNN_ENABLED
|
||||
@ -9,7 +9,7 @@
|
||||
#include <ATen/ops/threshold_backward_native.h> // for mkldnn_relu_backward
|
||||
#endif
|
||||
|
||||
#if !AT_MKLDNN_ENABLED()
|
||||
#if !AT_ONEDNN_ENABLED()
|
||||
|
||||
namespace at { namespace native {
|
||||
|
||||
@ -27,10 +27,10 @@ Tensor mkldnn_relu_backward(const Tensor& grad_output, const Tensor& input, cons
|
||||
|
||||
}}
|
||||
|
||||
#else // AT_MKLDNN_ENABLED
|
||||
#else // AT_ONEDNN_ENABLED
|
||||
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/mkldnn/Utils.h>
|
||||
#include <ATen/native/onednn/ONEDNNCommon.h>
|
||||
#include <ATen/native/onednn/Utils.h>
|
||||
|
||||
namespace at { namespace native {
|
||||
|
||||
@ -73,4 +73,4 @@ Tensor mkldnn_relu_backward(const Tensor& grad_output, const Tensor& input, cons
|
||||
|
||||
}}
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED
|
||||
#endif // AT_ONEDNN_ENABLED
|
||||
@ -8,7 +8,7 @@
|
||||
#include <ATen/ops/_softmax_native.h> // for mkldnn_softmax
|
||||
#endif
|
||||
|
||||
#if !AT_MKLDNN_ENABLED()
|
||||
#if !AT_ONEDNN_ENABLED()
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -23,9 +23,9 @@ Tensor mkldnn_softmax(
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#else // AT_MKLDNN_ENABLED
|
||||
#else // AT_ONEDNN_ENABLED
|
||||
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/onednn/ONEDNNCommon.h>
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -48,4 +48,4 @@ Tensor mkldnn_softmax(
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED
|
||||
#endif // AT_ONEDNN_ENABLED
|
||||
@ -1,6 +1,6 @@
|
||||
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
|
||||
#include <ATen/core/Tensor.h>
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/onednn/ONEDNNCommon.h>
|
||||
|
||||
#ifndef AT_PER_OPERATOR_HEADERS
|
||||
#include <ATen/NativeFunctions.h>
|
||||
@ -10,7 +10,7 @@
|
||||
|
||||
namespace at { namespace native {
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
|
||||
Tensor empty_mkldnn(IntArrayRef sizes, std::optional<ScalarType> dtype, std::optional<Layout> layout, std::optional<Device> device, std::optional<bool> pin_memory, std::optional<c10::MemoryFormat> optional_memory_format) {
|
||||
TORCH_CHECK(
|
||||
@ -30,6 +30,6 @@ Tensor empty_mkldnn(IntArrayRef sizes, std::optional<ScalarType> dtype, std::opt
|
||||
TORCH_CHECK(false, "empty_mkldnn: MKL-DNN build is disabled");
|
||||
}
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED()
|
||||
#endif // AT_ONEDNN_ENABLED()
|
||||
|
||||
}}
|
||||
@ -13,7 +13,7 @@
|
||||
#include <ATen/ops/view_native.h>
|
||||
#endif
|
||||
|
||||
#if !AT_MKLDNN_ENABLED()
|
||||
#if !AT_ONEDNN_ENABLED()
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -41,9 +41,9 @@ Tensor& mkldnn_transpose_(Tensor& self, int64_t dim0, int64_t dim1) {
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#else // AT_MKLDNN_ENABLED
|
||||
#else // AT_ONEDNN_ENABLED
|
||||
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/onednn/ONEDNNCommon.h>
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -98,4 +98,4 @@ Tensor& mkldnn_transpose_(Tensor& self, int64_t dim0, int64_t dim1) {
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED
|
||||
#endif // AT_ONEDNN_ENABLED
|
||||
@ -9,7 +9,7 @@
|
||||
#include <ATen/ops/tanh_native.h> // for mkldnn_tanh, mkldnn_tanh_
|
||||
#endif
|
||||
|
||||
#if !AT_MKLDNN_ENABLED()
|
||||
#if !AT_ONEDNN_ENABLED()
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -33,9 +33,9 @@ Tensor& mkldnn_tanh_(Tensor& self) {
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#else // AT_MKLDNN_ENABLED
|
||||
#else // AT_ONEDNN_ENABLED
|
||||
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/onednn/ONEDNNCommon.h>
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -75,4 +75,4 @@ Tensor& mkldnn_tanh_(Tensor& self) {
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED
|
||||
#endif // AT_ONEDNN_ENABLED
|
||||
@ -1,5 +1,5 @@
|
||||
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
|
||||
#include <ATen/native/mkldnn/Utils.h>
|
||||
#include <ATen/native/onednn/Utils.h>
|
||||
#include <ATen/native/Pool.h>
|
||||
#include <c10/util/irange.h>
|
||||
|
||||
@ -76,7 +76,7 @@ void check_mkldnn_binary_fusion_inputs(
|
||||
mkldnn_check_low_precision(input.scalar_type(), "mkldnn pointwise binary");
|
||||
}
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
|
||||
#define ATTR_FUNC(NAME) \
|
||||
[](torch::List<std::optional<at::Scalar>> scalars, \
|
||||
@ -173,5 +173,5 @@ const std::map<c10::string_view, ideep::algorithm>& fusion_binary_alg_map() {
|
||||
return fusion_attr_map;
|
||||
};
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED()
|
||||
#endif // AT_ONEDNN_ENABLED()
|
||||
}}
|
||||
@ -10,9 +10,9 @@
|
||||
#endif
|
||||
#include <vector>
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
#include <ideep/tensor.hpp>
|
||||
#endif // AT_MKLDNN_ENABLED()
|
||||
#endif // AT_ONEDNN_ENABLED()
|
||||
|
||||
namespace at { namespace native {
|
||||
|
||||
@ -70,7 +70,7 @@ inline Tensor may_convert_to_default_contiguous_strides(const Tensor& input) {
|
||||
return input;
|
||||
}
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
|
||||
using AttrFunction = std::function<ideep::attr_t(
|
||||
torch::List<std::optional<at::Scalar>>,
|
||||
@ -82,7 +82,7 @@ const std::map<c10::string_view, ideep::algorithm>& fusion_unary_alg_map();
|
||||
|
||||
const std::map<c10::string_view, ideep::algorithm>& fusion_binary_alg_map();
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED()
|
||||
#endif // AT_ONEDNN_ENABLED()
|
||||
};
|
||||
|
||||
#if defined(__aarch64__)
|
||||
@ -95,7 +95,7 @@ constexpr bool mkldnn_bf16_device_check_arm() {
|
||||
}
|
||||
#endif
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
inline bool mkldnn_bf16_device_check() {
|
||||
#if defined(__x86_64__) || (defined(_M_X64) && !defined(_M_ARM64EC))
|
||||
// Use ideep to check bf16 on X64 as cpuinfo has no avx_ne_convert check.
|
||||
@ -1,7 +1,7 @@
|
||||
#include <ATen/WrapDimUtilsMulti.h>
|
||||
#include <ATen/native/Resize.h>
|
||||
#include <torch/library.h>
|
||||
#include <ATen/native/mkldnn/xpu/detail/oneDNN.h>
|
||||
#include <ATen/native/onednn/xpu/detail/oneDNN.h>
|
||||
|
||||
namespace at::native::xpu {
|
||||
|
||||
@ -8,7 +8,7 @@
|
||||
#include <c10/util/Exception.h>
|
||||
#include <optional>
|
||||
#include <ATen/native/utils/ParamUtils.h>
|
||||
#include <ATen/native/mkldnn/xpu/detail/oneDNN.h>
|
||||
#include <ATen/native/onednn/xpu/detail/oneDNN.h>
|
||||
#include <torch/library.h>
|
||||
#include <ATen/native/ConvUtils.h>
|
||||
|
||||
@ -3,8 +3,8 @@
|
||||
#include <ATen/ATen.h>
|
||||
#include <oneapi/dnnl/dnnl.hpp>
|
||||
#include <oneapi/dnnl/dnnl_types.h>
|
||||
#include <ATen/native/mkldnn/xpu/detail/Utils.h>
|
||||
#include <ATen/native/mkldnn/xpu/detail/oneDNNContext.h>
|
||||
#include <ATen/native/onednn/xpu/detail/Utils.h>
|
||||
#include <ATen/native/onednn/xpu/detail/oneDNNContext.h>
|
||||
|
||||
namespace at::native::onednn {
|
||||
/* oneDNN quantization usage:
|
||||
@ -5,8 +5,8 @@
|
||||
#include <ATen/record_function.h>
|
||||
#include <c10/core/MemoryFormat.h>
|
||||
|
||||
#include <ATen/native/mkldnn/xpu/detail/Attr.h>
|
||||
#include <ATen/native/mkldnn/xpu/detail/Utils.h>
|
||||
#include <ATen/native/onednn/xpu/detail/Attr.h>
|
||||
#include <ATen/native/onednn/xpu/detail/Utils.h>
|
||||
|
||||
#include <oneapi/dnnl/dnnl.hpp>
|
||||
|
||||
@ -2,9 +2,9 @@
|
||||
#include <ATen/ATen.h>
|
||||
|
||||
#include <oneapi/dnnl/dnnl.hpp>
|
||||
#include <ATen/native/mkldnn/xpu/detail/oneDNNContext.h>
|
||||
#include <ATen/native/mkldnn/xpu/detail/Utils.h>
|
||||
#include <ATen/native/mkldnn/xpu/detail/Attr.h>
|
||||
#include <ATen/native/onednn/xpu/detail/oneDNNContext.h>
|
||||
#include <ATen/native/onednn/xpu/detail/Utils.h>
|
||||
#include <ATen/native/onednn/xpu/detail/Attr.h>
|
||||
|
||||
namespace at::native::onednn {
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
#include <ATen/native/mkldnn/xpu/detail/Utils.h>
|
||||
#include <ATen/native/onednn/xpu/detail/Utils.h>
|
||||
|
||||
namespace at::native::onednn {
|
||||
|
||||
@ -1,9 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
#include <ATen/ATen.h>
|
||||
#include <ATen/native/mkldnn/xpu/detail/oneDNNContext.h>
|
||||
#include <ATen/native/mkldnn/xpu/detail/Attr.h>
|
||||
#include <ATen/native/mkldnn/xpu/detail/Utils.h>
|
||||
#include <ATen/native/onednn/xpu/detail/oneDNNContext.h>
|
||||
#include <ATen/native/onednn/xpu/detail/Attr.h>
|
||||
#include <ATen/native/onednn/xpu/detail/Utils.h>
|
||||
|
||||
namespace at::native::onednn{
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#include <ATen/native/mkldnn/xpu/detail/oneDNNContext.h>
|
||||
#include <ATen/native/mkldnn/xpu/detail/Utils.h>
|
||||
#include <ATen/native/onednn/xpu/detail/oneDNNContext.h>
|
||||
#include <ATen/native/onednn/xpu/detail/Utils.h>
|
||||
|
||||
/* *
|
||||
* Do NOT put any kernels or call any device binaries here!
|
||||
@ -134,9 +134,9 @@ std::tuple<at::Tensor, std::optional<at::Tensor>> PackedLinearWeightFp16::
|
||||
}
|
||||
#endif // USE_FBGEMM
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
std::tuple<at::Tensor, std::optional<at::Tensor>> PackedLinearWeightsOnednn::unpack() {
|
||||
return std::tuple<at::Tensor, std::optional<at::Tensor>>(
|
||||
orig_weight_, orig_bias_);
|
||||
}
|
||||
#endif // #if AT_MKLDNN_ENABLED()
|
||||
#endif // #if AT_ONEDNN_ENABLED()
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <ATen/Config.h>
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
#include <ATen/Tensor.h>
|
||||
#include <ATen/native/quantized/PackedParams.h>
|
||||
#include <ideep.hpp>
|
||||
@ -454,4 +454,4 @@ at::Tensor _qconv_prepack_onednn(
|
||||
int64_t groups,
|
||||
std::optional<torch::List<int64_t>> input_shape=std::nullopt);
|
||||
|
||||
#endif // #if AT_MKLDNN_ENABLED()
|
||||
#endif // #if AT_ONEDNN_ENABLED()
|
||||
|
||||
@ -332,7 +332,7 @@ c10::intrusive_ptr<ConvPackedParamsBase<kSpatialDim>> deserialize_conv(
|
||||
|
||||
#ifdef USE_FBGEMM
|
||||
if (ctx.qEngine() == at::QEngine::X86) {
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
bool use_onednn = onednn_utils::should_use_onednn_quant(
|
||||
weight.value(), transpose, groups, output_padding);
|
||||
if (use_onednn) {
|
||||
@ -393,7 +393,7 @@ c10::intrusive_ptr<ConvPackedParamsBase<kSpatialDim>> deserialize_conv(
|
||||
);
|
||||
}
|
||||
#endif // USE_PYTORCH_QNNPACK
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
if (ctx.qEngine() == at::QEngine::ONEDNN) {
|
||||
return PackedConvWeightsOnednn<kSpatialDim>::prepack(
|
||||
weight.value(),
|
||||
@ -406,7 +406,7 @@ c10::intrusive_ptr<ConvPackedParamsBase<kSpatialDim>> deserialize_conv(
|
||||
transpose
|
||||
);
|
||||
}
|
||||
#endif // AT_MKLDNN_ENABLED()
|
||||
#endif // AT_ONEDNN_ENABLED()
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
"Didn't find engine for when deserializing ConvPackedParams: ",
|
||||
|
||||
@ -479,7 +479,7 @@ TORCH_API int register_linear_params() {
|
||||
std::move(weight), std::move(bias));
|
||||
}
|
||||
#endif // USE_PYTORCH_QNNPACK
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
if (at::globalContext().qEngine() == at::QEngine::ONEDNN) {
|
||||
TORCH_CHECK(
|
||||
weight.scalar_type() == at::kQInt8,
|
||||
@ -488,7 +488,7 @@ TORCH_API int register_linear_params() {
|
||||
return PackedLinearWeightsOnednn::prepack(
|
||||
std::move(weight), std::move(bias));
|
||||
}
|
||||
#endif // #if AT_MKLDNN_ENABLED()
|
||||
#endif // #if AT_ONEDNN_ENABLED()
|
||||
TORCH_CHECK(false, "Unknown qengine");
|
||||
})
|
||||
.def("bias", [](const c10::intrusive_ptr<LinearPackedParamsBase>& self) {
|
||||
|
||||
@ -20,7 +20,7 @@
|
||||
#include <caffe2/utils/threadpool/pthreadpool-cpp.h>
|
||||
#include <torch/library.h>
|
||||
#include <ATen/quantized/Quantizer.h>
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/onednn/ONEDNNCommon.h>
|
||||
|
||||
#ifndef AT_PER_OPERATOR_HEADERS
|
||||
#include <ATen/Functions.h>
|
||||
@ -1111,7 +1111,7 @@ template at::Tensor PackedConvWeightsQnnp<3>::apply_impl<false>(
|
||||
|
||||
#endif // USE_PYTORCH_QNNPACK
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
template <int kSpatialDim>
|
||||
at::Tensor PackedConvWeightsOnednn<kSpatialDim>::apply(
|
||||
const at::Tensor& input,
|
||||
@ -1734,7 +1734,7 @@ static at::Tensor _quantized_convolution_onednn(
|
||||
}
|
||||
}
|
||||
|
||||
#endif // #if AT_MKLDNN_ENABLED()
|
||||
#endif // #if AT_ONEDNN_ENABLED()
|
||||
|
||||
namespace at::native {
|
||||
namespace {
|
||||
@ -1793,10 +1793,10 @@ class QConvAddInt8 final {
|
||||
const c10::intrusive_ptr<ConvPackedParamsBase<kSpatialDim>>& packed_weight,
|
||||
double output_scale,
|
||||
int64_t output_zero_point) {
|
||||
#if AT_MKLDNN_ENABLED() || !defined(STRIP_ERROR_MESSAGES)
|
||||
#if AT_ONEDNN_ENABLED() || !defined(STRIP_ERROR_MESSAGES)
|
||||
auto& ctx = at::globalContext();
|
||||
#endif
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
if (ctx.qEngine() == at::QEngine::ONEDNN) {
|
||||
if (kReluFused) {
|
||||
return dynamic_cast<PackedConvWeightsOnednn<kSpatialDim>*>(packed_weight.get())->apply_add_relu(
|
||||
@ -1884,7 +1884,7 @@ class QConvoneDNN final {
|
||||
c10::string_view attr,
|
||||
torch::List<std::optional<at::Scalar>> scalars,
|
||||
std::optional<c10::string_view> algorithm) {
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
if (act.dim() == 3 || act.dim() == 5) {
|
||||
// Conv1D/3D post op check
|
||||
TORCH_CHECK(
|
||||
@ -1938,7 +1938,7 @@ class QConvoneDNN final {
|
||||
std::optional<c10::string_view> unary_attr,
|
||||
torch::List<std::optional<at::Scalar>> unary_scalars,
|
||||
std::optional<c10::string_view> unary_algorithm) {
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
// Conv2D post op check
|
||||
TORCH_CHECK(
|
||||
act.dim() == 4 && binary_attr == "sum" && (
|
||||
|
||||
@ -129,7 +129,7 @@ template at::Tensor PackedConvWeightsQnnp<3>::apply_dynamic(
|
||||
|
||||
#endif // USE_PYTORCH_QNNPACK
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
|
||||
template <int kSpatialDim>
|
||||
at::Tensor PackedConvWeightsOnednn<kSpatialDim>::apply_dynamic(
|
||||
@ -178,7 +178,7 @@ template at::Tensor PackedConvWeightsOnednn<3>::apply_dynamic(
|
||||
const at::Tensor& input,
|
||||
bool reduce_range);
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED()
|
||||
#endif // AT_ONEDNN_ENABLED()
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
|
||||
@ -12,7 +12,7 @@
|
||||
#include <ATen/native/quantized/cpu/OnednnUtils.h>
|
||||
#include <ATen/native/quantized/cpu/QuantUtils.h>
|
||||
#include <torch/library.h>
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/onednn/ONEDNNCommon.h>
|
||||
|
||||
#ifndef AT_PER_OPERATOR_HEADERS
|
||||
#include <ATen/Functions.h>
|
||||
@ -322,7 +322,7 @@ c10::intrusive_ptr<ConvPackedParamsBase<2>> PackedConvWeightsQnnp<
|
||||
bool transpose);
|
||||
#endif // USE_PYTORCH_QNNPACK
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
template <int kSpatialDim>
|
||||
c10::intrusive_ptr<ConvPackedParamsBase<kSpatialDim>> PackedConvWeightsOnednn<
|
||||
kSpatialDim>::
|
||||
@ -613,7 +613,7 @@ at::Tensor _qconv_prepack_onednn(
|
||||
return packed_weight;
|
||||
}
|
||||
|
||||
#endif // #if AT_MKLDNN_ENABLED()
|
||||
#endif // #if AT_ONEDNN_ENABLED()
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -663,7 +663,7 @@ class QConvPackWeightInt8 final {
|
||||
auto& ctx = at::globalContext();
|
||||
#ifdef USE_FBGEMM
|
||||
if (ctx.qEngine() == at::QEngine::X86) {
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
bool use_onednn = onednn_utils::should_use_onednn_quant(
|
||||
weight, transpose, groups, output_padding);
|
||||
if (use_onednn) {
|
||||
@ -674,7 +674,7 @@ class QConvPackWeightInt8 final {
|
||||
return PackedConvWeight<kSpatialDim>::prepack(
|
||||
weight, bias, stride, padding, output_padding, dilation, groups, transpose);
|
||||
} // x86
|
||||
#endif // defined(USE_FBGEMM) || AT_MKLDNN_ENABLED()
|
||||
#endif // defined(USE_FBGEMM) || AT_ONEDNN_ENABLED()
|
||||
|
||||
#ifdef USE_FBGEMM
|
||||
if (ctx.qEngine() == at::QEngine::FBGEMM) {
|
||||
@ -692,7 +692,7 @@ class QConvPackWeightInt8 final {
|
||||
}
|
||||
#endif
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
if (ctx.qEngine() == at::QEngine::ONEDNN) {
|
||||
return PackedConvWeightsOnednn<kSpatialDim>::prepack(
|
||||
weight, bias, stride, padding, output_padding, dilation, groups,
|
||||
@ -756,7 +756,7 @@ class QConv1dPackWeightInt8 final {
|
||||
|
||||
#ifdef USE_FBGEMM
|
||||
if (ctx.qEngine() == at::QEngine::X86) {
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
bool use_onednn = onednn_utils::should_use_onednn_quant(
|
||||
weight, transpose, groups, output_padding);
|
||||
if (use_onednn) {
|
||||
@ -788,7 +788,7 @@ class QConv1dPackWeightInt8 final {
|
||||
}
|
||||
#endif
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
if (ctx.qEngine() == at::QEngine::ONEDNN) {
|
||||
return PackedConvWeightsOnednn<2>::prepack(
|
||||
weight, bias, stride, padding, output_padding, dilation, groups,
|
||||
@ -815,7 +815,7 @@ class QConvPrepackOneDNN final {
|
||||
torch::List<int64_t> dilation,
|
||||
int64_t groups,
|
||||
std::optional<torch::List<int64_t>> input_shape) {
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
return _qconv_prepack_onednn(
|
||||
weight, weight_scales, input_scale, input_zero_point,
|
||||
stride, padding, dilation, groups, input_shape);
|
||||
|
||||
@ -121,7 +121,7 @@ template std::tuple<at::Tensor, std::optional<at::Tensor>> PackedConvWeightsQnnp
|
||||
3>::unpack();
|
||||
#endif // USE_PYTORCH_QNNPACK
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
template <int kSpatialDim>
|
||||
std::tuple<at::Tensor, std::optional<at::Tensor>> PackedConvWeightsOnednn<
|
||||
kSpatialDim>::unpack() {
|
||||
@ -133,4 +133,4 @@ template std::tuple<at::Tensor, std::optional<at::Tensor>> PackedConvWeightsOned
|
||||
2>::unpack();
|
||||
template std::tuple<at::Tensor, std::optional<at::Tensor>> PackedConvWeightsOnednn<
|
||||
3>::unpack();
|
||||
#endif // #if AT_MKLDNN_ENABLED()
|
||||
#endif // #if AT_ONEDNN_ENABLED()
|
||||
|
||||
@ -9,7 +9,7 @@
|
||||
#include <ATen/native/quantized/cpu/XnnpackUtils.h>
|
||||
#include <ATen/native/quantized/cpu/OnednnUtils.h>
|
||||
#include <ATen/native/quantized/cpu/QuantUtils.h>
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/onednn/ONEDNNCommon.h>
|
||||
#include <caffe2/utils/threadpool/pthreadpool-cpp.h>
|
||||
#include <torch/library.h>
|
||||
|
||||
@ -786,7 +786,7 @@ at::Tensor PackedLinearWeightsQnnp::apply_relu(
|
||||
|
||||
#endif // USE_PYTORCH_QNNPACK
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
template <PostOps post_op>
|
||||
at::Tensor PackedLinearWeightsOnednn::apply_impl(
|
||||
at::Tensor input,
|
||||
@ -1099,7 +1099,7 @@ static at::Tensor linear_int8_with_onednn_weight(
|
||||
primitive.execute(ideep::stream::default_stream(), args);
|
||||
return dim == 2 ? output : output.reshape(output_size);
|
||||
}
|
||||
#endif // #if AT_MKLDNN_ENABLED()
|
||||
#endif // #if AT_ONEDNN_ENABLED()
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -1131,10 +1131,10 @@ class QLinearLeakyReluInt8 final {
|
||||
double output_scale,
|
||||
int64_t output_zero_point,
|
||||
double negative_slope) {
|
||||
#if AT_MKLDNN_ENABLED() || !defined(STRIP_ERROR_MESSAGES)
|
||||
#if AT_ONEDNN_ENABLED() || !defined(STRIP_ERROR_MESSAGES)
|
||||
auto& ctx = at::globalContext();
|
||||
#endif
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
if (ctx.qEngine() == at::QEngine::ONEDNN) {
|
||||
return dynamic_cast<PackedLinearWeightsOnednn*>(packed_weight.get())->apply_leaky_relu(
|
||||
std::move(input), output_scale, output_zero_point, negative_slope);
|
||||
@ -1155,10 +1155,10 @@ class QLinearTanhInt8 final {
|
||||
const c10::intrusive_ptr<LinearPackedParamsBase>& packed_weight,
|
||||
double output_scale,
|
||||
int64_t output_zero_point) {
|
||||
#if AT_MKLDNN_ENABLED() || !defined(STRIP_ERROR_MESSAGES)
|
||||
#if AT_ONEDNN_ENABLED() || !defined(STRIP_ERROR_MESSAGES)
|
||||
auto& ctx = at::globalContext();
|
||||
#endif
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
if (ctx.qEngine() == at::QEngine::ONEDNN) {
|
||||
return dynamic_cast<PackedLinearWeightsOnednn*>(packed_weight.get())->apply_tanh(
|
||||
std::move(input), output_scale, output_zero_point);
|
||||
@ -1205,7 +1205,7 @@ class QLinearOnednn final {
|
||||
c10::string_view post_op_name,
|
||||
torch::List<std::optional<at::Scalar>> post_op_args,
|
||||
c10::string_view post_op_algorithm) {
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
static std::optional<at::Tensor> other = std::nullopt;
|
||||
static const c10::string_view binary_post_op = "none";
|
||||
return linear_int8_with_onednn_weight(
|
||||
@ -1234,7 +1234,7 @@ class QLinearOnednn final {
|
||||
c10::string_view post_op_name,
|
||||
torch::List<std::optional<at::Scalar>> post_op_args,
|
||||
c10::string_view post_op_algorithm) {
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
TORCH_CHECK(act_scale.numel() == 1 && act_zero_point.numel() == 1,
|
||||
"onednn int8 linear: act scale/zp size should be 1");
|
||||
static std::optional<at::Tensor> other = std::nullopt;
|
||||
@ -1270,7 +1270,7 @@ class QLinearOnednn final {
|
||||
c10::string_view unary_post_op, // e.g. "none", "relu"
|
||||
torch::List<std::optional<at::Scalar>> unary_post_op_args,
|
||||
c10::string_view unary_post_op_algorithm) {
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
return linear_int8_with_onednn_weight(
|
||||
act, act_scale, act_zero_point,
|
||||
onednn_weight, weight_scales, weight_zero_points,
|
||||
@ -1302,7 +1302,7 @@ class QLinearOnednn final {
|
||||
c10::string_view unary_post_op, // e.g. "none", "relu"
|
||||
torch::List<std::optional<at::Scalar>> unary_post_op_args,
|
||||
c10::string_view unary_post_op_algorithm) {
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
TORCH_CHECK(act_scale.numel() == 1 && act_zero_point.numel() == 1,
|
||||
"onednn int8 linear: act scale/zp size should be 1");
|
||||
return linear_int8_with_onednn_weight(
|
||||
|
||||
@ -489,7 +489,7 @@ void PackedLinearWeightFp16::set_bias(std::optional<at::Tensor> bias) {
|
||||
|
||||
#endif // USE_FBGEMM
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
template <bool ReluFused>
|
||||
at::Tensor PackedLinearWeightsOnednn::apply_dynamic_impl(
|
||||
at::Tensor input,
|
||||
@ -607,7 +607,7 @@ at::Tensor PackedLinearWeightsOnednn::apply_dynamic_relu(
|
||||
std::move(input), reduce_range);
|
||||
}
|
||||
|
||||
#endif // #if AT_MKLDNN_ENABLED()
|
||||
#endif // #if AT_ONEDNN_ENABLED()
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
|
||||
@ -7,7 +7,7 @@
|
||||
#include <ATen/native/quantized/cpu/QnnpackUtils.h>
|
||||
#include <ATen/native/quantized/cpu/OnednnUtils.h>
|
||||
#include <ATen/native/quantized/cpu/QuantUtils.h>
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/onednn/ONEDNNCommon.h>
|
||||
#include <ATen/quantized/Quantizer.h>
|
||||
#include <torch/custom_class.h>
|
||||
#include <torch/library.h>
|
||||
@ -205,7 +205,7 @@ c10::intrusive_ptr<LinearPackedParamsBase> PackedLinearWeightFp16::prepack(
|
||||
}
|
||||
#endif // USE_FBGEMM
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
c10::intrusive_ptr<LinearPackedParamsBase> PackedLinearWeightsOnednn::prepack(
|
||||
at::Tensor weight,
|
||||
std::optional<at::Tensor> bias) {
|
||||
@ -301,7 +301,7 @@ inline at::Tensor pack_weight_to_onednn_tensor(
|
||||
return packed_weight;
|
||||
}
|
||||
|
||||
#endif // #if AT_MKLDNN_ENABLED()
|
||||
#endif // #if AT_ONEDNN_ENABLED()
|
||||
|
||||
namespace at {
|
||||
namespace native {
|
||||
@ -334,11 +334,11 @@ class QLinearPackWeightInt8 final {
|
||||
std::move(weight), std::move(bias));
|
||||
}
|
||||
#endif
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
if (ctx.qEngine() == at::QEngine::ONEDNN) {
|
||||
return PackedLinearWeightsOnednn::prepack(std::move(weight), std::move(bias));
|
||||
}
|
||||
#endif // #if AT_MKLDNN_ENABLED()
|
||||
#endif // #if AT_ONEDNN_ENABLED()
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
"Didn't find engine for operation quantized::linear_prepack ",
|
||||
@ -370,14 +370,14 @@ class QLinearPackWeightFp16 final {
|
||||
"not supported by QNNPACK");
|
||||
}
|
||||
#endif // USE_PYTORCH_QNNPACK
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
if (ctx.qEngine() == at::QEngine::ONEDNN) {
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
"quantized::linear_prepack_fp16 is currently "
|
||||
"not supported by ONEDNN");
|
||||
}
|
||||
#endif // #if AT_MKLDNN_ENABLED()
|
||||
#endif // #if AT_ONEDNN_ENABLED()
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
"Didn't find engine for operation quantized::linear_prepack_fp16 ",
|
||||
@ -408,7 +408,7 @@ class QLinearPackWeightInt8Onednn final {
|
||||
static at::Tensor run(
|
||||
at::Tensor weight, // Not QTensor
|
||||
std::optional<torch::List<int64_t>> input_shape) {
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
return pack_weight_to_onednn_tensor(weight, input_shape);
|
||||
#else
|
||||
TORCH_CHECK(false, "Unimplemented as onednn is not available.");
|
||||
|
||||
@ -70,7 +70,7 @@ class QConvUnpackWeightsInt8 final {
|
||||
}
|
||||
#endif
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
if (ctx.qEngine() == at::QEngine::ONEDNN) {
|
||||
return packed_weight->unpack();
|
||||
}
|
||||
@ -108,7 +108,7 @@ class QConv1dUnpackWeightsInt8 final {
|
||||
}
|
||||
#endif
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
if (ctx.qEngine() == at::QEngine::ONEDNN) {
|
||||
std::tie(weight, bias) = packed_weight->unpack();
|
||||
at::Tensor new_weight = weight.clone();
|
||||
|
||||
@ -4,8 +4,8 @@
|
||||
#include <mkl.h>
|
||||
#endif
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
#include <ATen/native/onednn/ONEDNNCommon.h>
|
||||
#endif
|
||||
#include <ATen/native/verbose_wrapper.h>
|
||||
|
||||
@ -25,7 +25,7 @@ int _mkl_set_verbose(int enable [[maybe_unused]]) {
|
||||
}
|
||||
|
||||
int _mkldnn_set_verbose(int level [[maybe_unused]]) {
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
#if AT_ONEDNN_ENABLED()
|
||||
return at::native::set_verbose(level);
|
||||
#else
|
||||
return 0;
|
||||
|
||||
@ -8,12 +8,12 @@
|
||||
#error "AT_MKL_ENABLED should not be visible in public headers"
|
||||
#endif
|
||||
|
||||
#ifdef AT_MKLDNN_ENABLED
|
||||
#error "AT_MKLDNN_ENABLED should not be visible in public headers"
|
||||
#ifdef AT_ONEDNN_ENABLED
|
||||
#error "AT_ONEDNN_ENABLED should not be visible in public headers"
|
||||
#endif
|
||||
|
||||
#ifdef AT_MKLDNN_ACL_ENABLED
|
||||
#error "AT_MKLDNN_ACL_ENABLED should not be visible in public headers"
|
||||
#ifdef AT_ONEDNN_ACL_ENABLED
|
||||
#error "AT_ONEDNN_ACL_ENABLED should not be visible in public headers"
|
||||
#endif
|
||||
|
||||
#ifdef CAFFE2_STATIC_LINK_CUDA
|
||||
|
||||
@ -248,8 +248,8 @@ def get_aten_preprocessor_flags():
|
||||
"-DCPU_CAPABILITY=DEFAULT",
|
||||
"-DCAFFE2_USE_LITE_PROTO",
|
||||
"-DATEN_CUDNN_ENABLED_FBXPLAT=0",
|
||||
"-DATEN_MKLDNN_ENABLED_FBXPLAT=0",
|
||||
"-DATEN_MKLDNN_ACL_ENABLED_FBXPLAT=0",
|
||||
"-DATEN_ONEDNN_ENABLED_FBXPLAT=0",
|
||||
"-DATEN_ONEDNN_ACL_ENABLED_FBXPLAT=0",
|
||||
"-DATEN_NNPACK_ENABLED_FBXPLAT=0",
|
||||
"-DATEN_MKL_ENABLED_FBXPLAT=0",
|
||||
"-DATEN_MKL_SEQUENTIAL_FBXPLAT=0",
|
||||
@ -895,7 +895,7 @@ def define_buck_targets(
|
||||
# Remove the following after modifying codegen for mobile.
|
||||
("aten/src", "ATen/mkl/*.h"),
|
||||
("aten/src", "ATen/native/mkl/*.h"),
|
||||
("aten/src", "ATen/native/mkldnn/*.h"),
|
||||
("aten/src", "ATen/native/onednn/*.h"),
|
||||
]),
|
||||
visibility = ["PUBLIC"],
|
||||
labels = labels,
|
||||
@ -1078,11 +1078,11 @@ def define_buck_targets(
|
||||
"--output-file",
|
||||
"Config.h",
|
||||
"--replace",
|
||||
"@AT_MKLDNN_ENABLED@",
|
||||
"ATEN_MKLDNN_ENABLED_FBXPLAT",
|
||||
"@AT_ONEDNN_ENABLED@",
|
||||
"ATEN_ONEDNN_ENABLED_FBXPLAT",
|
||||
"--replace",
|
||||
"@AT_MKLDNN_ACL_ENABLED@",
|
||||
"ATEN_MKLDNN_ACL_ENABLED_FBXPLAT",
|
||||
"@AT_ONEDNN_ACL_ENABLED@",
|
||||
"ATEN_ONEDNN_ACL_ENABLED_FBXPLAT",
|
||||
"--replace",
|
||||
"@AT_MKL_ENABLED@",
|
||||
"ATEN_MKL_ENABLED_FBXPLAT",
|
||||
|
||||
@ -22,7 +22,7 @@ def define_targets(rules):
|
||||
"CAFFE2_PERF_WITH_AVX512",
|
||||
"CAFFE2_USE_EXCEPTION_PTR",
|
||||
"CAFFE2_USE_CUDNN",
|
||||
"USE_MKLDNN",
|
||||
"USE_ONEDNN",
|
||||
"CAFFE2_USE_ITT",
|
||||
"USE_ROCM_KERNEL_ASSERT",
|
||||
"EIGEN_MPL2_ONLY",
|
||||
|
||||
@ -270,7 +270,7 @@ core_sources_full_mobile_no_backend_interface_xplat = [
|
||||
"torch/csrc/jit/passes/frozen_conv_folding.cpp",
|
||||
"torch/csrc/jit/passes/frozen_linear_folding.cpp",
|
||||
"torch/csrc/jit/passes/frozen_linear_transpose.cpp",
|
||||
"torch/csrc/jit/passes/frozen_ops_to_mkldnn.cpp",
|
||||
"torch/csrc/jit/passes/frozen_ops_to_onednn.cpp",
|
||||
"torch/csrc/jit/passes/frozen_graph_optimizations.cpp",
|
||||
"torch/csrc/jit/passes/remove_expands.cpp",
|
||||
"torch/csrc/jit/passes/remove_dropout.cpp",
|
||||
@ -290,7 +290,7 @@ core_sources_full_mobile_no_backend_interface_xplat = [
|
||||
"torch/csrc/jit/passes/utils/subgraph_utils.cpp",
|
||||
"torch/csrc/jit/passes/utils/optimization_utils.cpp",
|
||||
"torch/csrc/jit/passes/utils/op_registry.cpp",
|
||||
"torch/csrc/jit/passes/mkldnn_rewrite.cpp",
|
||||
"torch/csrc/jit/passes/onednn_rewrite.cpp",
|
||||
"torch/csrc/jit/passes/xnnpack_rewrite.cpp",
|
||||
"torch/csrc/jit/passes/vulkan_rewrite.cpp",
|
||||
"torch/csrc/jit/passes/metal_rewrite.cpp",
|
||||
@ -470,7 +470,7 @@ inductor_core_resources = [
|
||||
"torch/csrc/inductor/aoti_runner/model_container_runner_cpu.cpp",
|
||||
"torch/csrc/inductor/aoti_torch/shim_common.cpp",
|
||||
"torch/csrc/inductor/aoti_torch/tensor_converter.cpp",
|
||||
"torch/csrc/inductor/aoti_torch/mkldnn_tensor.cpp",
|
||||
"torch/csrc/inductor/aoti_torch/onednn_tensor.cpp",
|
||||
"torch/csrc/inductor/aoti_torch/oss_proxy_executor.cpp",
|
||||
"torch/csrc/inductor/inductor_ops.cpp",
|
||||
]
|
||||
@ -1071,29 +1071,29 @@ aten_cpu_source_non_codegen_list = [
|
||||
"aten/src/ATen/native/ComparisonUtils.cpp",
|
||||
"aten/src/ATen/native/DispatchStub.cpp",
|
||||
"aten/src/ATen/native/UpSample.cpp",
|
||||
"aten/src/ATen/native/mkldnn/BinaryOps.cpp",
|
||||
"aten/src/ATen/native/mkldnn/Conv.cpp",
|
||||
"aten/src/ATen/native/mkldnn/ConvPrepack.cpp",
|
||||
"aten/src/ATen/native/mkldnn/Copy.cpp",
|
||||
"aten/src/ATen/native/mkldnn/Gelu.cpp",
|
||||
"aten/src/ATen/native/mkldnn/IDeepRegistration.cpp",
|
||||
"aten/src/ATen/native/mkldnn/Linear.cpp",
|
||||
"aten/src/ATen/native/mkldnn/MKLDNNCommon.cpp",
|
||||
"aten/src/ATen/native/mkldnn/MKLDNNConversions.cpp",
|
||||
"aten/src/ATen/native/mkldnn/MkldnnTensorMath.cpp",
|
||||
"aten/src/ATen/native/mkldnn/Normalization.cpp",
|
||||
"aten/src/ATen/native/mkldnn/OpContext.cpp",
|
||||
"aten/src/ATen/native/mkldnn/Pooling.cpp",
|
||||
"aten/src/ATen/native/mkldnn/Prelu.cpp",
|
||||
"aten/src/ATen/native/mkldnn/RegisterMkldnnOpContextClass.cpp",
|
||||
"aten/src/ATen/native/mkldnn/Relu.cpp",
|
||||
"aten/src/ATen/native/mkldnn/RNN.cpp",
|
||||
"aten/src/ATen/native/mkldnn/SoftMax.cpp",
|
||||
"aten/src/ATen/native/mkldnn/TensorFactories.cpp",
|
||||
"aten/src/ATen/native/mkldnn/TensorShape.cpp",
|
||||
"aten/src/ATen/native/mkldnn/UnaryOps.cpp",
|
||||
"aten/src/ATen/native/mkldnn/Utils.cpp",
|
||||
"aten/src/ATen/native/mkldnn/Matmul.cpp",
|
||||
"aten/src/ATen/native/onednn/BinaryOps.cpp",
|
||||
"aten/src/ATen/native/onednn/Conv.cpp",
|
||||
"aten/src/ATen/native/onednn/ConvPrepack.cpp",
|
||||
"aten/src/ATen/native/onednn/Copy.cpp",
|
||||
"aten/src/ATen/native/onednn/Gelu.cpp",
|
||||
"aten/src/ATen/native/onednn/IDeepRegistration.cpp",
|
||||
"aten/src/ATen/native/onednn/Linear.cpp",
|
||||
"aten/src/ATen/native/onednn/ONEDNNCommon.cpp",
|
||||
"aten/src/ATen/native/onednn/ONEDNNConversions.cpp",
|
||||
"aten/src/ATen/native/onednn/OnednnTensorMath.cpp",
|
||||
"aten/src/ATen/native/onednn/Normalization.cpp",
|
||||
"aten/src/ATen/native/onednn/OpContext.cpp",
|
||||
"aten/src/ATen/native/onednn/Pooling.cpp",
|
||||
"aten/src/ATen/native/onednn/Prelu.cpp",
|
||||
"aten/src/ATen/native/onednn/RegisterMkldnnOpContextClass.cpp",
|
||||
"aten/src/ATen/native/onednn/Relu.cpp",
|
||||
"aten/src/ATen/native/onednn/RNN.cpp",
|
||||
"aten/src/ATen/native/onednn/SoftMax.cpp",
|
||||
"aten/src/ATen/native/onednn/TensorFactories.cpp",
|
||||
"aten/src/ATen/native/onednn/TensorShape.cpp",
|
||||
"aten/src/ATen/native/onednn/UnaryOps.cpp",
|
||||
"aten/src/ATen/native/onednn/Utils.cpp",
|
||||
"aten/src/ATen/native/onednn/Matmul.cpp",
|
||||
"aten/src/ATen/native/quantized/cpu/init_qnnpack.cpp",
|
||||
# This is moved to aten_cpu because some of the custom ops use empty_with_tail_padding
|
||||
# which was available only within aten_native_cpu. Ideally the right fix is to make
|
||||
|
||||
@ -22,7 +22,7 @@
|
||||
#cmakedefine CAFFE2_USE_GOOGLE_GLOG
|
||||
#cmakedefine CAFFE2_USE_LITE_PROTO
|
||||
#cmakedefine CAFFE2_USE_MKL
|
||||
#cmakedefine USE_MKLDNN
|
||||
#cmakedefine USE_ONEDNN
|
||||
#cmakedefine CAFFE2_USE_NVTX
|
||||
#cmakedefine CAFFE2_USE_ITT
|
||||
|
||||
@ -63,7 +63,7 @@
|
||||
{"USE_EIGEN_FOR_BLAS", "${CAFFE2_USE_EIGEN_FOR_BLAS}"}, \
|
||||
{"USE_LITE_PROTO", "${CAFFE2_USE_LITE_PROTO}"}, \
|
||||
{"USE_MKL", "${CAFFE2_USE_MKL}"}, \
|
||||
{"USE_MKLDNN", "${USE_MKLDNN}"}, \
|
||||
{"USE_ONEDNN", "${USE_ONEDNN}"}, \
|
||||
{"USE_NVTX", "${CAFFE2_USE_NVTX}"}, \
|
||||
{"USE_ITT", "${CAFFE2_USE_ITT}"}, \
|
||||
{"USE_ROCM_KERNEL_ASSERT", "${USE_ROCM_KERNEL_ASSERT}"}, \
|
||||
|
||||
@ -106,8 +106,8 @@ if(@CAFFE2_USE_MKL@)
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/public/mkl.cmake")
|
||||
endif()
|
||||
|
||||
if(@USE_MKLDNN@)
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/public/mkldnn.cmake")
|
||||
if(@USE_ONEDNN@)
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/public/onednn.cmake")
|
||||
endif()
|
||||
|
||||
# import targets
|
||||
|
||||
@ -145,15 +145,15 @@ endif()
|
||||
|
||||
# ---[ BLAS
|
||||
|
||||
set(AT_MKLDNN_ACL_ENABLED 0)
|
||||
set(AT_MKLDNN_ENABLED 0)
|
||||
set(AT_ONEDNN_ACL_ENABLED 0)
|
||||
set(AT_ONEDNN_ENABLED 0)
|
||||
set(AT_MKL_ENABLED 0)
|
||||
# setting default preferred BLAS options if not already present.
|
||||
if(NOT INTERN_BUILD_MOBILE)
|
||||
set(BLAS "MKL" CACHE STRING "Selected BLAS library")
|
||||
else()
|
||||
set(BLAS "Eigen" CACHE STRING "Selected BLAS library")
|
||||
set(AT_MKLDNN_ENABLED 0)
|
||||
set(AT_ONEDNN_ENABLED 0)
|
||||
set(AT_MKL_ENABLED 0)
|
||||
endif()
|
||||
set_property(CACHE BLAS PROPERTY STRINGS "ATLAS;BLIS;Eigen;FLAME;Generic;MKL;OpenBLAS;vecLib")
|
||||
@ -1451,29 +1451,29 @@ if(NOT INTERN_BUILD_MOBILE)
|
||||
set(AT_ROCM_ENABLED 1)
|
||||
endif()
|
||||
|
||||
if(USE_MKLDNN)
|
||||
if(USE_ONEDNN)
|
||||
if(NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
message(WARNING
|
||||
"x64 operating system is required for MKLDNN. "
|
||||
"Not compiling with MKLDNN. "
|
||||
"Turn this warning off by USE_MKLDNN=OFF.")
|
||||
set(USE_MKLDNN OFF)
|
||||
"x64 operating system is required for ONEDNN. "
|
||||
"Not compiling with ONEDNN. "
|
||||
"Turn this warning off by USE_ONEDNN=OFF.")
|
||||
set(USE_ONEDNN OFF)
|
||||
endif()
|
||||
if(USE_MKLDNN_ACL)
|
||||
set(AT_MKLDNN_ACL_ENABLED 1)
|
||||
if(USE_ONEDNN_ACL)
|
||||
set(AT_ONEDNN_ACL_ENABLED 1)
|
||||
endif()
|
||||
endif()
|
||||
if(USE_MKLDNN)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/public/mkldnn.cmake)
|
||||
if(MKLDNN_FOUND)
|
||||
set(AT_MKLDNN_ENABLED 1)
|
||||
include_directories(AFTER SYSTEM ${MKLDNN_INCLUDE_DIR})
|
||||
if(USE_ONEDNN)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/public/onednn.cmake)
|
||||
if(ONEDNN_FOUND)
|
||||
set(AT_ONEDNN_ENABLED 1)
|
||||
include_directories(AFTER SYSTEM ${ONEDNN_INCLUDE_DIR})
|
||||
else()
|
||||
message(WARNING "MKLDNN could not be found.")
|
||||
caffe2_update_option(USE_MKLDNN OFF)
|
||||
message(WARNING "ONEDNN could not be found.")
|
||||
caffe2_update_option(USE_ONEDNN OFF)
|
||||
endif()
|
||||
else()
|
||||
message("disabling MKLDNN because USE_MKLDNN is not set")
|
||||
message("disabling ONEDNN because USE_ONEDNN is not set")
|
||||
endif()
|
||||
|
||||
if(UNIX AND NOT APPLE)
|
||||
|
||||
@ -1,23 +1,23 @@
|
||||
# - Try to find MKLDNN
|
||||
# - Try to find ONEDNN
|
||||
#
|
||||
# The following variables are optionally searched for defaults
|
||||
# MKL_FOUND : set to true if a library implementing the CBLAS interface is found
|
||||
#
|
||||
# The following are set after configuration is done:
|
||||
# MKLDNN_FOUND : set to true if mkl-dnn is found.
|
||||
# MKLDNN_INCLUDE_DIR : path to mkl-dnn include dir.
|
||||
# MKLDNN_LIBRARIES : list of libraries for mkl-dnn
|
||||
# ONEDNN_FOUND : set to true if mkl-dnn is found.
|
||||
# ONEDNN_INCLUDE_DIR : path to mkl-dnn include dir.
|
||||
# ONEDNN_LIBRARIES : list of libraries for mkl-dnn
|
||||
#
|
||||
# The following variables are used:
|
||||
# MKLDNN_USE_NATIVE_ARCH : Whether native CPU instructions should be used in MKLDNN. This should be turned off for
|
||||
# ONEDNN_USE_NATIVE_ARCH : Whether native CPU instructions should be used in ONEDNN. This should be turned off for
|
||||
# general packaging to avoid incompatible CPU instructions. Default: OFF.
|
||||
|
||||
IF(NOT MKLDNN_FOUND)
|
||||
SET(MKLDNN_LIBRARIES)
|
||||
SET(MKLDNN_INCLUDE_DIR)
|
||||
IF(NOT ONEDNN_FOUND)
|
||||
SET(ONEDNN_LIBRARIES)
|
||||
SET(ONEDNN_INCLUDE_DIR)
|
||||
|
||||
SET(IDEEP_ROOT "${PROJECT_SOURCE_DIR}/third_party/ideep")
|
||||
SET(MKLDNN_ROOT "${PROJECT_SOURCE_DIR}/third_party/ideep/mkl-dnn")
|
||||
SET(ONEDNN_ROOT "${PROJECT_SOURCE_DIR}/third_party/ideep/mkl-dnn")
|
||||
|
||||
if(USE_XPU) # Build oneDNN GPU library
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
@ -45,9 +45,9 @@ IF(NOT MKLDNN_FOUND)
|
||||
if(LINUX)
|
||||
set(ABI_NEUTRAL_FLAGS -fpreview-breaking-changes)
|
||||
endif()
|
||||
ExternalProject_Add(xpu_mkldnn_proj
|
||||
SOURCE_DIR ${MKLDNN_ROOT}
|
||||
PREFIX ${XPU_MKLDNN_DIR_PREFIX}
|
||||
ExternalProject_Add(xpu_onednn_proj
|
||||
SOURCE_DIR ${ONEDNN_ROOT}
|
||||
PREFIX ${XPU_ONEDNN_DIR_PREFIX}
|
||||
BUILD_IN_SOURCE 0
|
||||
CMAKE_ARGS -DCMAKE_C_COMPILER=icx
|
||||
-DCMAKE_CXX_COMPILER=${SYCL_CXX_DRIVER}
|
||||
@ -61,21 +61,21 @@ IF(NOT MKLDNN_FOUND)
|
||||
-DDNNL_DPCPP_HOST_COMPILER=${DNNL_HOST_COMPILER} # Use global cxx compiler as host compiler
|
||||
-G ${CMAKE_GENERATOR} # Align Generator to Torch
|
||||
BUILD_COMMAND ${DNNL_MAKE_COMMAND}
|
||||
BUILD_BYPRODUCTS "xpu_mkldnn_proj-prefix/src/xpu_mkldnn_proj-build/src/${DNNL_LIB_NAME}"
|
||||
BUILD_BYPRODUCTS "xpu_onednn_proj-prefix/src/xpu_onednn_proj-build/src/${DNNL_LIB_NAME}"
|
||||
INSTALL_COMMAND ""
|
||||
)
|
||||
|
||||
ExternalProject_Get_Property(xpu_mkldnn_proj BINARY_DIR)
|
||||
set(__XPU_MKLDNN_BUILD_DIR ${BINARY_DIR})
|
||||
set(XPU_MKLDNN_LIBRARIES ${__XPU_MKLDNN_BUILD_DIR}/src/${DNNL_LIB_NAME})
|
||||
set(XPU_MKLDNN_INCLUDE ${__XPU_MKLDNN_BUILD_DIR}/include)
|
||||
ExternalProject_Get_Property(xpu_onednn_proj BINARY_DIR)
|
||||
set(__XPU_ONEDNN_BUILD_DIR ${BINARY_DIR})
|
||||
set(XPU_ONEDNN_LIBRARIES ${__XPU_ONEDNN_BUILD_DIR}/src/${DNNL_LIB_NAME})
|
||||
set(XPU_ONEDNN_INCLUDE ${__XPU_ONEDNN_BUILD_DIR}/include)
|
||||
# This target would be further linked to libtorch_xpu.so.
|
||||
# The libtorch_xpu.so would contain Conv&GEMM operators that depend on
|
||||
# oneDNN primitive implementations inside libdnnl.a.
|
||||
add_library(xpu_mkldnn INTERFACE)
|
||||
add_dependencies(xpu_mkldnn xpu_mkldnn_proj)
|
||||
target_link_libraries(xpu_mkldnn INTERFACE ${__XPU_MKLDNN_BUILD_DIR}/src/${DNNL_LIB_NAME})
|
||||
target_include_directories(xpu_mkldnn INTERFACE ${XPU_MKLDNN_INCLUDE})
|
||||
add_library(xpu_onednn INTERFACE)
|
||||
add_dependencies(xpu_onednn xpu_onednn_proj)
|
||||
target_link_libraries(xpu_onednn INTERFACE ${__XPU_ONeDNN_BUILD_DIR}/src/${DNNL_LIB_NAME})
|
||||
target_include_directories(xpu_onednn INTERFACE ${XPU_ONEDNN_INCLUDE})
|
||||
endif()
|
||||
|
||||
IF(NOT APPLE AND NOT WIN32 AND NOT BUILD_LITE_INTERPRETER)
|
||||
@ -87,63 +87,63 @@ IF(NOT MKLDNN_FOUND)
|
||||
|
||||
FIND_PACKAGE(BLAS)
|
||||
FIND_PATH(IDEEP_INCLUDE_DIR ideep.hpp PATHS ${IDEEP_ROOT} PATH_SUFFIXES include)
|
||||
FIND_PATH(MKLDNN_INCLUDE_DIR dnnl.hpp dnnl.h PATHS ${MKLDNN_ROOT} PATH_SUFFIXES include/oneapi/dnnl)
|
||||
IF(NOT MKLDNN_INCLUDE_DIR)
|
||||
MESSAGE("MKLDNN_INCLUDE_DIR not found")
|
||||
FIND_PATH(ONEDNN_INCLUDE_DIR dnnl.hpp dnnl.h PATHS ${ONEDNN_ROOT} PATH_SUFFIXES include/oneapi/dnnl)
|
||||
IF(NOT ONEDNN_INCLUDE_DIR)
|
||||
MESSAGE("ONEDNN_INCLUDE_DIR not found")
|
||||
EXECUTE_PROCESS(COMMAND git${CMAKE_EXECUTABLE_SUFFIX} submodule update --init mkl-dnn WORKING_DIRECTORY ${IDEEP_ROOT})
|
||||
FIND_PATH(MKLDNN_INCLUDE_DIR dnnl.hpp dnnl.h PATHS ${MKLDNN_ROOT} PATH_SUFFIXES include)
|
||||
ENDIF(NOT MKLDNN_INCLUDE_DIR)
|
||||
FIND_PATH(ONEDNN_INCLUDE_DIR dnnl.hpp dnnl.h PATHS ${ONEDNN_ROOT} PATH_SUFFIXES include)
|
||||
ENDIF(NOT ONEDNN_INCLUDE_DIR)
|
||||
IF(BUILD_ONEDNN_GRAPH)
|
||||
FIND_PATH(LLGA_INCLUDE_DIR dnnl_graph.hpp PATHS ${LLGA_ROOT} PATH_SUFFIXES include/oneapi/dnnl)
|
||||
ENDIF(BUILD_ONEDNN_GRAPH)
|
||||
|
||||
IF(NOT IDEEP_INCLUDE_DIR OR NOT MKLDNN_INCLUDE_DIR)
|
||||
MESSAGE(STATUS "MKLDNN source files not found!")
|
||||
IF(NOT IDEEP_INCLUDE_DIR OR NOT ONEDNN_INCLUDE_DIR)
|
||||
MESSAGE(STATUS "ONEDNN source files not found!")
|
||||
RETURN()
|
||||
ENDIF(NOT IDEEP_INCLUDE_DIR OR NOT MKLDNN_INCLUDE_DIR)
|
||||
LIST(APPEND MKLDNN_INCLUDE_DIR ${IDEEP_INCLUDE_DIR})
|
||||
ENDIF(NOT IDEEP_INCLUDE_DIR OR NOT ONEDNN_INCLUDE_DIR)
|
||||
LIST(APPEND ONEDNN_INCLUDE_DIR ${IDEEP_INCLUDE_DIR})
|
||||
IF(BUILD_ONEDNN_GRAPH)
|
||||
LIST(APPEND MKLDNN_INCLUDE_DIR ${LLGA_INCLUDE_DIR})
|
||||
LIST(APPEND ONEDNN_INCLUDE_DIR ${LLGA_INCLUDE_DIR})
|
||||
ENDIF(BUILD_ONEDNN_GRAPH)
|
||||
IF(MKL_FOUND)
|
||||
ADD_DEFINITIONS(-DIDEEP_USE_MKL)
|
||||
# Append to mkldnn dependencies
|
||||
LIST(APPEND MKLDNN_LIBRARIES ${MKL_LIBRARIES})
|
||||
LIST(APPEND MKLDNN_INCLUDE_DIR ${MKL_INCLUDE_DIR})
|
||||
LIST(APPEND ONEDNN_LIBRARIES ${MKL_LIBRARIES})
|
||||
LIST(APPEND ONEDNN_INCLUDE_DIR ${MKL_INCLUDE_DIR})
|
||||
ELSE(MKL_FOUND)
|
||||
SET(MKLDNN_USE_MKL "NONE" CACHE STRING "" FORCE)
|
||||
SET(ONEDNN_USE_MKL "NONE" CACHE STRING "" FORCE)
|
||||
ENDIF(MKL_FOUND)
|
||||
|
||||
SET(MKL_cmake_included TRUE)
|
||||
IF(NOT MKLDNN_CPU_RUNTIME)
|
||||
SET(MKLDNN_CPU_RUNTIME "OMP" CACHE STRING "")
|
||||
ELSEIF(MKLDNN_CPU_RUNTIME STREQUAL "TBB")
|
||||
IF(NOT ONEDNN_CPU_RUNTIME)
|
||||
SET(ONEDNN_CPU_RUNTIME "OMP" CACHE STRING "")
|
||||
ELSEIF(ONEDNN_CPU_RUNTIME STREQUAL "TBB")
|
||||
IF(TARGET TBB::tbb)
|
||||
MESSAGE(STATUS "MKL-DNN is using TBB")
|
||||
|
||||
SET(TBB_cmake_included TRUE)
|
||||
SET(Threading_cmake_included TRUE)
|
||||
|
||||
SET(DNNL_CPU_THREADING_RUNTIME ${MKLDNN_CPU_RUNTIME})
|
||||
SET(DNNL_CPU_THREADING_RUNTIME ${ONEDNN_CPU_RUNTIME})
|
||||
INCLUDE_DIRECTORIES(${TBB_INCLUDE_DIR})
|
||||
LIST(APPEND EXTRA_SHARED_LIBS TBB::tbb)
|
||||
ELSE()
|
||||
MESSAGE(FATAL_ERROR "MKLDNN_CPU_RUNTIME is set to TBB but TBB is not used")
|
||||
MESSAGE(FATAL_ERROR "ONEDNN_CPU_RUNTIME is set to TBB but TBB is not used")
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
MESSAGE(STATUS "MKLDNN_CPU_RUNTIME = ${MKLDNN_CPU_RUNTIME}")
|
||||
MESSAGE(STATUS "ONEDNN_CPU_RUNTIME = ${ONEDNN_CPU_RUNTIME}")
|
||||
|
||||
SET(MKLDNN_CPU_RUNTIME ${MKLDNN_CPU_RUNTIME} CACHE STRING "" FORCE)
|
||||
SET(ONEDNN_CPU_RUNTIME ${ONEDNN_CPU_RUNTIME} CACHE STRING "" FORCE)
|
||||
SET(DNNL_BUILD_TESTS FALSE CACHE BOOL "" FORCE)
|
||||
SET(DNNL_BUILD_EXAMPLES FALSE CACHE BOOL "" FORCE)
|
||||
SET(DNNL_LIBRARY_TYPE STATIC CACHE STRING "" FORCE)
|
||||
SET(DNNL_ENABLE_PRIMITIVE_CACHE TRUE CACHE BOOL "" FORCE)
|
||||
SET(DNNL_GRAPH_CPU_RUNTIME ${MKLDNN_CPU_RUNTIME} CACHE STRING "" FORCE)
|
||||
SET(DNNL_GRAPH_CPU_RUNTIME ${ONEDNN_CPU_RUNTIME} CACHE STRING "" FORCE)
|
||||
|
||||
IF(BUILD_ONEDNN_GRAPH)
|
||||
SET(DNNL_GRAPH_LIBRARY_TYPE STATIC CACHE STRING "" FORCE)
|
||||
ENDIF(BUILD_ONEDNN_GRAPH)
|
||||
IF(MKLDNN_USE_NATIVE_ARCH) # Disable HostOpts in MKLDNN unless MKLDNN_USE_NATIVE_ARCH is set.
|
||||
IF(ONEDNN_USE_NATIVE_ARCH) # Disable HostOpts in ONEDNN unless ONEDNN_USE_NATIVE_ARCH is set.
|
||||
SET(DNNL_ARCH_OPT_FLAGS "HostOpts" CACHE STRING "" FORCE)
|
||||
ELSE()
|
||||
IF(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
||||
@ -158,7 +158,7 @@ IF(NOT MKLDNN_FOUND)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
ADD_SUBDIRECTORY(${MKLDNN_ROOT})
|
||||
ADD_SUBDIRECTORY(${ONEDNN_ROOT})
|
||||
|
||||
IF(NOT TARGET dnnl)
|
||||
MESSAGE("Failed to include MKL-DNN target")
|
||||
@ -170,10 +170,10 @@ IF(NOT MKLDNN_FOUND)
|
||||
TARGET_COMPILE_OPTIONS(dnnl PRIVATE -Wno-strict-overflow)
|
||||
TARGET_COMPILE_OPTIONS(dnnl PRIVATE -Wno-error=strict-overflow)
|
||||
ENDIF(NOT APPLE AND CMAKE_COMPILER_IS_GNUCC)
|
||||
LIST(APPEND MKLDNN_LIBRARIES ${MKL_OPENMP_LIBRARY})
|
||||
LIST(APPEND MKLDNN_LIBRARIES dnnl)
|
||||
LIST(APPEND ONEDNN_LIBRARIES ${MKL_OPENMP_LIBRARY})
|
||||
LIST(APPEND ONEDNN_LIBRARIES dnnl)
|
||||
|
||||
SET(MKLDNN_FOUND TRUE)
|
||||
SET(ONEDNN_FOUND TRUE)
|
||||
MESSAGE(STATUS "Found MKL-DNN: TRUE")
|
||||
|
||||
ENDIF(NOT MKLDNN_FOUND)
|
||||
ENDIF(NOT ONEDNN_FOUND)
|
||||
@ -134,10 +134,10 @@ function(caffe2_print_configuration_summary)
|
||||
message(STATUS " USE_PYTORCH_METAL_EXPORT : ${USE_PYTORCH_METAL_EXPORT}")
|
||||
message(STATUS " USE_MPS : ${USE_MPS}")
|
||||
message(STATUS " USE_MKL : ${CAFFE2_USE_MKL}")
|
||||
message(STATUS " USE_MKLDNN : ${USE_MKLDNN}")
|
||||
if(${USE_MKLDNN})
|
||||
message(STATUS " USE_MKLDNN_ACL : ${USE_MKLDNN_ACL}")
|
||||
message(STATUS " USE_MKLDNN_CBLAS : ${USE_MKLDNN_CBLAS}")
|
||||
message(STATUS " USE_ONEDNN : ${USE_ONEDNN}")
|
||||
if(${USE_ONEDNN})
|
||||
message(STATUS " USE_ONEDNN_ACL : ${USE_ONEDNN_ACL}")
|
||||
message(STATUS " USE_ONEDNN_CBLAS : ${USE_ONEDNN_CBLAS}")
|
||||
endif()
|
||||
message(STATUS " USE_UCC : ${USE_UCC}")
|
||||
if(${USE_UCC})
|
||||
|
||||
@ -109,7 +109,7 @@ else()
|
||||
append_torchlib_if_found(fbgemm)
|
||||
endif()
|
||||
|
||||
if(@USE_MKLDNN@)
|
||||
if(@USE_ONEDNN@)
|
||||
append_torchlib_if_found(dnnl mkldnn)
|
||||
endif()
|
||||
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
# must be set with the env var ACL_ROOT_DIR. This path will be checked later
|
||||
# as part of FindACL.cmake in oneDNN.
|
||||
|
||||
if(NOT USE_MKLDNN_ACL)
|
||||
if(NOT USE_ONEDNN_ACL)
|
||||
RETURN()
|
||||
endif()
|
||||
|
||||
|
||||
@ -1,18 +0,0 @@
|
||||
set(MKLDNN_USE_NATIVE_ARCH ${USE_NATIVE_ARCH})
|
||||
|
||||
if(CPU_AARCH64)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/ComputeLibrary.cmake)
|
||||
endif()
|
||||
|
||||
find_package(MKLDNN QUIET)
|
||||
|
||||
if(NOT TARGET caffe2::mkldnn)
|
||||
add_library(caffe2::mkldnn INTERFACE IMPORTED)
|
||||
endif()
|
||||
|
||||
set_property(
|
||||
TARGET caffe2::mkldnn PROPERTY INTERFACE_INCLUDE_DIRECTORIES
|
||||
${MKLDNN_INCLUDE_DIR})
|
||||
set_property(
|
||||
TARGET caffe2::mkldnn PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
${MKLDNN_LIBRARIES})
|
||||
18
cmake/public/onednn.cmake
Normal file
18
cmake/public/onednn.cmake
Normal file
@ -0,0 +1,18 @@
|
||||
set(ONEDNN_USE_NATIVE_ARCH ${USE_NATIVE_ARCH})
|
||||
|
||||
if(CPU_AARCH64)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/ComputeLibrary.cmake)
|
||||
endif()
|
||||
|
||||
find_package(ONEDNN QUIET)
|
||||
|
||||
if(NOT TARGET caffe2::onednn)
|
||||
add_library(caffe2::onednn INTERFACE IMPORTED)
|
||||
endif()
|
||||
|
||||
set_property(
|
||||
TARGET caffe2::onednn PROPERTY INTERFACE_INCLUDE_DIRECTORIES
|
||||
${ONEDNN_INCLUDE_DIR})
|
||||
set_property(
|
||||
TARGET caffe2::onednn PROPERTY INTERFACE_LINK_LIBRARIES
|
||||
${ONEDNN_LIBRARIES})
|
||||
@ -142,7 +142,7 @@ Fast Fourier Transform (torch.fft)
|
||||
- Mike Ruberry (`mruberry <https://github.com/mruberry>`__)
|
||||
- Peter Bell (`peterbell10 <https://github.com/peterbell10>`__)
|
||||
|
||||
CPU Performance (Torch Inductor / MKLDNN)
|
||||
CPU Performance (Torch Inductor / ONEDNN)
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
- Mingfei Ma (`mingfeima <https://github.com/mingfeima>`__)
|
||||
|
||||
@ -77,7 +77,7 @@ libraries at the build time with the following build options:
|
||||
+------------+------------------------+-----------------------------+----------------------------------------+
|
||||
| MKL | ``MKL_THREADING`` | (same) | To enable MKL use ``BLAS=MKL`` |
|
||||
+------------+------------------------+-----------------------------+----------------------------------------+
|
||||
| MKL-DNN | ``MKLDNN_CPU_RUNTIME`` | (same) | To enable MKL-DNN use ``USE_MKLDNN=1`` |
|
||||
| MKL-DNN | ``ONEDNN_CPU_RUNTIME`` | (same) | To enable MKL-DNN use ``USE_ONEDNN=1`` |
|
||||
+------------+------------------------+-----------------------------+----------------------------------------+
|
||||
|
||||
It is recommended not to mix OpenMP and TBB within one build.
|
||||
|
||||
@ -110,7 +110,7 @@ CMAKE_ARGS+=("-DUSE_OPENCV=OFF")
|
||||
CMAKE_ARGS+=("-DUSE_MPI=OFF")
|
||||
CMAKE_ARGS+=("-DUSE_NUMPY=OFF")
|
||||
CMAKE_ARGS+=("-DUSE_NNPACK=OFF")
|
||||
CMAKE_ARGS+=("-DUSE_MKLDNN=OFF")
|
||||
CMAKE_ARGS+=("-DUSE_ONEDNN=OFF")
|
||||
|
||||
# Metal
|
||||
if [ "${USE_PYTORCH_METAL:-}" == "1" ]; then
|
||||
|
||||
@ -70,7 +70,7 @@ CMAKE_ARGS+=("-DUSE_GFLAGS=OFF")
|
||||
CMAKE_ARGS+=("-DUSE_OPENCV=OFF")
|
||||
CMAKE_ARGS+=("-DUSE_MPI=OFF")
|
||||
CMAKE_ARGS+=("-DUSE_OPENMP=OFF")
|
||||
CMAKE_ARGS+=("-DUSE_MKLDNN=OFF")
|
||||
CMAKE_ARGS+=("-DUSE_ONEDNN=OFF")
|
||||
CMAKE_ARGS+=("-DUSE_NNPACK=OFF")
|
||||
CMAKE_ARGS+=("-DUSE_NUMPY=OFF")
|
||||
CMAKE_ARGS+=("-DUSE_BLAS=OFF")
|
||||
|
||||
30
setup.py
30
setup.py
@ -50,14 +50,14 @@
|
||||
# BUILD_TEST=0
|
||||
# disables the test build
|
||||
#
|
||||
# USE_MKLDNN=0
|
||||
# disables use of MKLDNN
|
||||
# USE_ONEDNN=0
|
||||
# disables use of ONEDNN
|
||||
#
|
||||
# USE_MKLDNN_ACL
|
||||
# enables use of Compute Library backend for MKLDNN on Arm;
|
||||
# USE_MKLDNN must be explicitly enabled.
|
||||
# USE_ONEDNN_ACL
|
||||
# enables use of Compute Library backend for ONEDNN on Arm;
|
||||
# USE_ONEDNN must be explicitly enabled.
|
||||
#
|
||||
# MKLDNN_CPU_RUNTIME
|
||||
# DNN_CPU_RUNTIME
|
||||
# MKL-DNN threading mode: TBB or OMP (default)
|
||||
#
|
||||
# USE_STATIC_MKL
|
||||
@ -617,20 +617,20 @@ class build_ext(setuptools.command.build_ext.build_ext):
|
||||
report("-- Detected XPU runtime at " + cmake_cache_vars["SYCL_LIBRARY_DIR"])
|
||||
else:
|
||||
report("-- Not using XPU")
|
||||
if cmake_cache_vars["USE_MKLDNN"]:
|
||||
report("-- Using MKLDNN")
|
||||
if cmake_cache_vars["USE_MKLDNN_ACL"]:
|
||||
report("-- Using Compute Library for the Arm architecture with MKLDNN")
|
||||
if cmake_cache_vars["USE_ONEDNN"]:
|
||||
report("-- Using ONEDNN")
|
||||
if cmake_cache_vars["USE_ONEDNN_ACL"]:
|
||||
report("-- Using Compute Library for the Arm architecture with ONEDNN")
|
||||
else:
|
||||
report(
|
||||
"-- Not using Compute Library for the Arm architecture with MKLDNN"
|
||||
"-- Not using Compute Library for the Arm architecture with ONEDNN"
|
||||
)
|
||||
if cmake_cache_vars["USE_MKLDNN_CBLAS"]:
|
||||
report("-- Using CBLAS in MKLDNN")
|
||||
if cmake_cache_vars["USE_ONEDNN_CBLAS"]:
|
||||
report("-- Using CBLAS in ONEDNN")
|
||||
else:
|
||||
report("-- Not using CBLAS in MKLDNN")
|
||||
report("-- Not using CBLAS in ONEDNN")
|
||||
else:
|
||||
report("-- Not using MKLDNN")
|
||||
report("-- Not using ONEDNN")
|
||||
if cmake_cache_vars["USE_NCCL"] and cmake_cache_vars["USE_SYSTEM_NCCL"]:
|
||||
report(
|
||||
"-- Using system provided NCCL library at {}, {}".format(
|
||||
|
||||
@ -117,8 +117,8 @@ if(USE_SYSTEM_ONNX)
|
||||
target_link_libraries(test_jit PRIVATE onnx_proto onnx)
|
||||
endif()
|
||||
|
||||
if(USE_MKLDNN)
|
||||
target_link_libraries(test_jit PRIVATE caffe2::mkldnn)
|
||||
if(USE_ONEDNN)
|
||||
target_link_libraries(test_jit PRIVATE caffe2::onednn)
|
||||
endif()
|
||||
|
||||
set(JIT_TEST_DEPENDENCIES torch gtest jitbackend_test backend_with_compiler)
|
||||
|
||||
@ -62,7 +62,7 @@ def change_cos_pass(graph):
|
||||
|
||||
class TestPostGradCustomPrePostPass(TestCustomPassBase):
|
||||
# mkldnn fusion's pattern_matcher
|
||||
# (torch/_inductor/fx_passes/mkldnn_fusion.py),
|
||||
# (torch/_inductor/fx_passes/onednn_fusion.py),
|
||||
# and apply it to custom post_grad_passes.
|
||||
def _register_mkldnn_conv_relu_fusion(self, custom_pass_dict):
|
||||
# pattern
|
||||
|
||||
@ -9,7 +9,7 @@ class TestMKLDNNVerbose(TestCase):
|
||||
def test_verbose_on(self):
|
||||
num = 0
|
||||
loc = os.path.dirname(os.path.abspath(__file__))
|
||||
with subprocess.Popen(f'{sys.executable} -u {loc}/mkldnn_verbose.py --verbose-level=1', shell=True,
|
||||
with subprocess.Popen(f'{sys.executable} -u {loc}/onednn_verbose.py --verbose-level=1', shell=True,
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT) as p:
|
||||
for line in p.stdout.readlines():
|
||||
line = str(line, 'utf-8').strip()
|
||||
@ -22,7 +22,7 @@ class TestMKLDNNVerbose(TestCase):
|
||||
def test_verbose_off(self):
|
||||
num = 0
|
||||
loc = os.path.dirname(os.path.abspath(__file__))
|
||||
with subprocess.Popen(f'{sys.executable} -u {loc}/mkldnn_verbose.py --verbose-level=0', shell=True,
|
||||
with subprocess.Popen(f'{sys.executable} -u {loc}/onednn_verbose.py --verbose-level=0', shell=True,
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT) as p:
|
||||
for line in p.stdout.readlines():
|
||||
line = str(line, 'utf-8').strip()
|
||||
|
||||
@ -217,7 +217,7 @@ class CMake:
|
||||
"INTEL_MKL_DIR",
|
||||
"INTEL_OMP_DIR",
|
||||
"MKL_THREADING",
|
||||
"MKLDNN_CPU_RUNTIME",
|
||||
"ONEDNN_CPU_RUNTIME",
|
||||
"MSVC_Z7_OVERRIDE",
|
||||
"CAFFE2_USE_MSVC_STATIC_RUNTIME",
|
||||
"Numa_INCLUDE_DIR",
|
||||
|
||||
@ -10,10 +10,10 @@ from torch.types import _bool, _device, _dtype, _int, _size
|
||||
|
||||
${c_nn_function_hints}
|
||||
|
||||
# Defined in aten/src/ATen/native/mkldnn/Linear.cpp
|
||||
# Defined in aten/src/ATen/native/onednn/Linear.cpp
|
||||
def mkldnn_linear(input: Tensor, weight: Tensor, bias: Optional[Tensor]) -> Tensor: ...
|
||||
|
||||
# Defined at aten/src/ATen/native/mkldnn/MKLDNNConversions.cpp
|
||||
# Defined at aten/src/ATen/native/onednn/ONEDNNConversions.cpp
|
||||
def mkldnn_reorder_conv2d_weight(
|
||||
self: Tensor,
|
||||
padding: List,
|
||||
@ -29,7 +29,7 @@ def mkldnn_reorder_conv3d_weight(
|
||||
groups: int,
|
||||
) -> Tensor: ...
|
||||
|
||||
# Defined in aten/src/ATen/native/mkldnn/Prelu.cpp
|
||||
# Defined in aten/src/ATen/native/onednn/Prelu.cpp
|
||||
def mkldnn_prelu(input: Tensor, weight: Tensor) -> Tensor: ...
|
||||
|
||||
# Defined at tools/autograd/templates/python_nn_functions.cpp
|
||||
|
||||
@ -3108,7 +3108,7 @@ def mkldnn_one_layer_lstm(inp, hidden, params, has_biases, reverse=False):
|
||||
|
||||
train = False
|
||||
# If batch_first, inp has been permuted in _rnn_helper. Convert to contiguous here.
|
||||
# Same as aten/src/ATen/native/mkldnn/RNN.cpp: mkldnn_rnn: input = input.contiguous();
|
||||
# Same as aten/src/ATen/native/onednn/RNN.cpp: mkldnn_rnn: input = input.contiguous();
|
||||
inp = inp.contiguous()
|
||||
hx = hx.contiguous()
|
||||
cx = cx.contiguous()
|
||||
|
||||
@ -74,7 +74,7 @@ def freezing_passes(gm: torch.fx.GraphModule, aot_example_inputs):
|
||||
and config.cpp.weight_prepack
|
||||
and config.layout_optimization
|
||||
):
|
||||
from .mkldnn_fusion import _eliminate_duplicate_packed_nodes
|
||||
from .onednn_fusion import _eliminate_duplicate_packed_nodes
|
||||
|
||||
_eliminate_duplicate_packed_nodes(gm)
|
||||
|
||||
@ -86,7 +86,7 @@ def freezing_passes(gm: torch.fx.GraphModule, aot_example_inputs):
|
||||
@init_once_fakemode
|
||||
def lazy_init():
|
||||
if torch._C._has_mkldnn and config.cpp.weight_prepack:
|
||||
from .mkldnn_fusion import _mkldnn_weight_pack_init
|
||||
from .onednn_fusion import _mkldnn_weight_pack_init
|
||||
|
||||
_mkldnn_weight_pack_init()
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user