mirror of
https://github.com/uxlfoundation/oneDNN.git
synced 2025-10-20 18:43:49 +08:00
1664 lines
65 KiB
C++
1664 lines
65 KiB
C++
/*******************************************************************************
|
|
* Copyright 2020-2025 Intel Corporation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*******************************************************************************/
|
|
|
|
/// @file
|
|
/// Graph C++ API
|
|
|
|
#ifndef ONEAPI_DNNL_DNNL_GRAPH_HPP
|
|
#define ONEAPI_DNNL_DNNL_GRAPH_HPP
|
|
// NOLINTBEGIN(readability-identifier-naming)
|
|
|
|
#include "oneapi/dnnl/dnnl_common.hpp"
|
|
#include "oneapi/dnnl/dnnl_graph.h"
|
|
|
|
#include <limits>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
/// @addtogroup dnnl_api
|
|
/// @{
|
|
|
|
namespace dnnl {
|
|
|
|
/// @addtogroup dnnl_graph_api Graph API
|
|
/// oneDNN Graph API
|
|
/// @{
|
|
|
|
/// oneDNN Graph namespace
|
|
namespace graph {
|
|
|
|
/// @cond DO_NOT_DOCUMENT_THIS
|
|
|
|
// Alias for common engine and stream API.
|
|
using engine = dnnl::engine;
|
|
using stream = dnnl::stream;
|
|
using fpmath_mode = dnnl::fpmath_mode;
|
|
|
|
/// @endcond
|
|
|
|
/// @addtogroup dnnl_graph_api_utils Utilities
|
|
/// Utility types and definitions
|
|
/// \ingroup dnnl_graph_api
|
|
/// @{
|
|
|
|
/// @cond DO_NOT_DOCUMENT_THIS
|
|
|
|
/// A class that provides the destructor for a oneDNN graph C API handle.
|
|
template <typename T>
|
|
struct graph_handle_traits : public dnnl::handle_traits<T> {};
|
|
|
|
template <>
|
|
struct graph_handle_traits<dnnl_graph_op_t> {
|
|
static dnnl_status_t destructor(dnnl_graph_op_t p) {
|
|
return dnnl_graph_op_destroy(p);
|
|
}
|
|
};
|
|
|
|
template <>
|
|
struct graph_handle_traits<dnnl_graph_graph_t> {
|
|
static dnnl_status_t destructor(dnnl_graph_graph_t p) {
|
|
return dnnl_graph_graph_destroy(p);
|
|
}
|
|
};
|
|
|
|
template <>
|
|
struct graph_handle_traits<dnnl_graph_tensor_t> {
|
|
static dnnl_status_t destructor(dnnl_graph_tensor_t p) {
|
|
return dnnl_graph_tensor_destroy(p);
|
|
}
|
|
};
|
|
|
|
template <>
|
|
struct graph_handle_traits<dnnl_graph_partition_t> {
|
|
static dnnl_status_t destructor(dnnl_graph_partition_t p) {
|
|
return dnnl_graph_partition_destroy(p);
|
|
}
|
|
};
|
|
|
|
template <>
|
|
struct graph_handle_traits<dnnl_graph_compiled_partition_t> {
|
|
static dnnl_status_t destructor(dnnl_graph_compiled_partition_t p) {
|
|
return dnnl_graph_compiled_partition_destroy(p);
|
|
}
|
|
};
|
|
|
|
template <>
|
|
struct graph_handle_traits<dnnl_graph_allocator_t> {
|
|
static dnnl_status_t destructor(dnnl_graph_allocator_t p) {
|
|
return dnnl_graph_allocator_destroy(p);
|
|
}
|
|
};
|
|
|
|
#define DNNL_GRAPH_HANDLE_ALIAS(type) \
|
|
using type##_handle = dnnl::handle<dnnl_graph_##type##_t, \
|
|
graph_handle_traits<dnnl_graph_##type##_t>>
|
|
|
|
DNNL_GRAPH_HANDLE_ALIAS(allocator);
|
|
DNNL_GRAPH_HANDLE_ALIAS(graph);
|
|
DNNL_GRAPH_HANDLE_ALIAS(op);
|
|
DNNL_GRAPH_HANDLE_ALIAS(tensor);
|
|
DNNL_GRAPH_HANDLE_ALIAS(compiled_partition);
|
|
DNNL_GRAPH_HANDLE_ALIAS(partition);
|
|
|
|
#undef DNNL_GRAPH_HANDLE_ALIAS
|
|
|
|
template <bool B>
|
|
using req = typename std::enable_if<B, bool>::type;
|
|
|
|
/// @endcond
|
|
|
|
/// @} dnnl_graph_api_utils
|
|
|
|
/// @addtogroup dnnl_graph_api_status Status
|
|
/// Definitions of status values returned by the library functions.
|
|
/// \ingroup dnnl_graph_api
|
|
/// @{
|
|
|
|
/// Status values returned by the library functions.
|
|
enum class status {
|
|
/// The operation was successful
|
|
success = dnnl_success,
|
|
/// The operation failed due to an out-of-memory condition
|
|
out_of_memory = dnnl_out_of_memory,
|
|
/// The operation failed because of incorrect function arguments
|
|
invalid_arguments = dnnl_invalid_arguments,
|
|
/// The operation failed because requested functionality is not implemented
|
|
unimplemented = dnnl_unimplemented,
|
|
/// The last available implementation is reached
|
|
last_impl_reached = dnnl_last_impl_reached,
|
|
/// Primitive or engine failed on execution
|
|
runtime_error = dnnl_runtime_error,
|
|
/// Queried element is not required for given primitive
|
|
not_required = dnnl_not_required,
|
|
/// The graph is not legitimate
|
|
invalid_graph = dnnl_invalid_graph,
|
|
/// The operation is not legitimate according to op schema
|
|
invalid_graph_op = dnnl_invalid_graph_op,
|
|
/// The shape cannot be inferred or compiled
|
|
invalid_shape = dnnl_invalid_shape,
|
|
/// The data type cannot be inferred or compiled
|
|
invalid_data_type = dnnl_invalid_data_type,
|
|
};
|
|
|
|
/// @} dnnl_graph_api_status
|
|
|
|
/// @addtogroup dnnl_graph_api_allocator Allocator
|
|
///
|
|
/// Definitions of allocator which is used to acquire memory resources in
|
|
/// partition compilation and execution. SYCL allocator
|
|
/// (#dnnl::graph::sycl_interop::make_allocator) should be used for SYCL runtime
|
|
/// and host allocator should be used for non-SYCL.
|
|
///
|
|
/// @{
|
|
|
|
/// Allocator
|
|
class allocator : public allocator_handle {
|
|
public:
|
|
using allocator_handle::handle;
|
|
|
|
/// Constructs an allocator according to given function pointers
|
|
///
|
|
/// @param host_malloc A pointer to malloc function for CPU
|
|
/// @param host_free A pointer to free function for CPU
|
|
allocator(dnnl_graph_host_allocate_f host_malloc,
|
|
dnnl_graph_host_deallocate_f host_free) {
|
|
dnnl_graph_allocator_t a = nullptr;
|
|
error::wrap_c_api(
|
|
dnnl_graph_allocator_create(&a, host_malloc, host_free),
|
|
"could not create allocator for cpu");
|
|
reset(a);
|
|
}
|
|
|
|
/// Default constructor
|
|
allocator() {
|
|
dnnl_graph_allocator_t a = nullptr;
|
|
error::wrap_c_api(dnnl_graph_allocator_create(&a, nullptr, nullptr),
|
|
"could not create allocator");
|
|
reset(a);
|
|
}
|
|
};
|
|
|
|
/// @} dnnl_graph_api_allocator
|
|
|
|
/// @addtogroup dnnl_graph_api_engine Engine
|
|
/// @{
|
|
|
|
/// This API is a supplement for existing onednn engine API.
|
|
inline engine make_engine_with_allocator(
|
|
engine::kind kind, size_t index, const allocator &alloc) {
|
|
dnnl_engine_t c_engine;
|
|
error::wrap_c_api(
|
|
dnnl_graph_make_engine_with_allocator(&c_engine,
|
|
static_cast<dnnl_engine_kind_t>(kind), index, alloc.get()),
|
|
"could not make an engine with allocator");
|
|
return engine(c_engine);
|
|
}
|
|
|
|
/// @} dnnl_graph_api_engine
|
|
|
|
/// @addtogroup dnnl_graph_api_logical_tensor Logical Tensor
|
|
///
|
|
/// Logical tensor describes the meta-data of the input or output tensor, like
|
|
/// elements data type, number of dimensions, size for each dimension (shape),
|
|
/// layout, and the property of the tensor.
|
|
///
|
|
/// Each logical tensor has an unique ID. The library uses logical tensor IDs to
|
|
/// build up the connections between operations if the output of one operation
|
|
/// has the same ID as the input of another operation. The meta-data in a
|
|
/// logical tensor may be enriched in the framework graph as it progresses
|
|
/// toward final execution. For example, the library doesn't require detailed
|
|
/// shape information at the operation and graph creation stage. But shape
|
|
/// information of input logical tensor will be required at partition
|
|
/// compilation stage. Logical tensor is not mutable. Users must create a new
|
|
/// logical tensor with the same ID to pass any new additional information to
|
|
/// oneDNN Graph API. Please note that the library also has unique IDs for
|
|
/// operations. The ID should be unique among different logical tensors, but it
|
|
/// can have the same value between a logical tensor and an operation.
|
|
///
|
|
/// @{
|
|
|
|
/// Logical tensor object
|
|
class logical_tensor {
|
|
friend class op;
|
|
friend class tensor;
|
|
friend class partition;
|
|
friend class compiled_partition;
|
|
|
|
dnnl_graph_logical_tensor_t data;
|
|
|
|
public:
|
|
/// Integer type for representing dimension sizes and indices.
|
|
using dim = dnnl_dim_t;
|
|
/// Vector of dimensions. Implementations are free to force a limit on the
|
|
/// vector's length.
|
|
using dims = std::vector<dim>;
|
|
|
|
/// Data Type
|
|
enum class data_type {
|
|
undef = dnnl_data_type_undef,
|
|
/// 16-bit/half-precision floating point.
|
|
f16 = dnnl_f16,
|
|
/// non-standard 16-bit (bfloat16 w/ 7 bit mantissa) floating point.
|
|
bf16 = dnnl_bf16,
|
|
/// 32-bit/single-precision floating point.
|
|
f32 = dnnl_f32,
|
|
/// 32-bit signed integer.
|
|
s32 = dnnl_s32,
|
|
/// 8-bit signed integer.
|
|
s8 = dnnl_s8,
|
|
/// 8-bit unsigned integer.
|
|
u8 = dnnl_u8,
|
|
/// Boolean data type. Size is C++ implementation defined.
|
|
boolean = dnnl_boolean,
|
|
/// [OFP8 standard 8-bit
|
|
/// floating-point](https://www.opencompute.org/documents/ocp-8-bit-floating-point-specification-ofp8-revision-1-0-2023-06-20-pdf)
|
|
/// with a 5-bit exponent and a 2-bit mantissa.
|
|
f8_e5m2 = dnnl_f8_e5m2,
|
|
/// [OFP8 standard 8-bit
|
|
/// floating-point](https://www.opencompute.org/documents/ocp-8-bit-floating-point-specification-ofp8-revision-1-0-2023-06-20-pdf)
|
|
/// with a 4-bit exponent and a 3-bit mantissa.
|
|
f8_e4m3 = dnnl_f8_e4m3,
|
|
/// 4-bit signed integer.
|
|
s4 = dnnl_s4,
|
|
/// 4-bit unsigned integer.
|
|
u4 = dnnl_u4,
|
|
};
|
|
|
|
/// Layout type
|
|
enum class layout_type {
|
|
/// Undefined layout type.
|
|
undef = dnnl_graph_layout_type_undef,
|
|
/// Any means to let the library to decide the layout for a tensor
|
|
/// during partition compilation.
|
|
any = dnnl_graph_layout_type_any,
|
|
/// Strided means that the layout of a tensor is determined by the
|
|
/// strides field in the logical tensor.
|
|
strided = dnnl_graph_layout_type_strided,
|
|
/// Opaque means that the layout of a tensor is the library specific.
|
|
/// Usually, an opaque layout is generated by a partition which is
|
|
/// compiled with layout type any.
|
|
opaque = dnnl_graph_layout_type_opaque,
|
|
};
|
|
|
|
/// Tensor property
|
|
enum class property_type {
|
|
/// Undefined tensor property.
|
|
undef = dnnl_graph_tensor_property_undef,
|
|
/// Variable means the tensor may be changed during computation or
|
|
/// between different iterations.
|
|
variable = dnnl_graph_tensor_property_variable,
|
|
/// Constant means the tensor will keep unchanged during computation and
|
|
/// between different iterations. It's useful for the library to apply
|
|
/// optimizations for constant tensors or cache constant tensors inside
|
|
/// the library. For example, constant weight tensors in inference
|
|
/// scenarios.
|
|
constant = dnnl_graph_tensor_property_constant,
|
|
/// Host scalar means the tensor will be a 0-D scalar tensor on host.
|
|
/// It should be used with a CPU engine when creating the tensor.
|
|
host_scalar = dnnl_graph_tensor_property_host_scalar,
|
|
};
|
|
|
|
/// default constructor
|
|
/// construct an empty object
|
|
logical_tensor() = default;
|
|
|
|
/// Constructs a logical tensor object
|
|
explicit logical_tensor(const dnnl_graph_logical_tensor_t &c_data)
|
|
: data(c_data) {}
|
|
|
|
/// Copy
|
|
logical_tensor(const logical_tensor &other) = default;
|
|
|
|
/// Assign
|
|
logical_tensor &operator=(const logical_tensor &other) = default;
|
|
|
|
/// Constructs a logical tensor object with ID, data type, ndims, layout
|
|
/// type, and property type.
|
|
///
|
|
/// @param tid Logical tensor ID.
|
|
/// @param dtype Elements data type.
|
|
/// @param ndims Number of dimensions. -1 means unknown (see
|
|
/// #DNNL_GRAPH_UNKNOWN_NDIMS) and 0 means a scalar tensor.
|
|
/// @param ltype Layout type.
|
|
/// @param ptype Property type.
|
|
logical_tensor(size_t tid, data_type dtype, int32_t ndims,
|
|
layout_type ltype, property_type ptype = property_type::undef) {
|
|
dnnl_graph_logical_tensor_t val;
|
|
error::wrap_c_api(
|
|
dnnl_graph_logical_tensor_init(&val, tid, convert_to_c(dtype),
|
|
ndims, convert_to_c(ltype), convert_to_c(ptype)),
|
|
"could not create logical_tensor with property");
|
|
data = val;
|
|
}
|
|
|
|
/// Delegated constructor.
|
|
///
|
|
/// @param tid Logical tensor ID.
|
|
/// @param dtype Elements data type.
|
|
/// @param ltype Layout type.
|
|
logical_tensor(
|
|
size_t tid, data_type dtype, layout_type ltype = layout_type::undef)
|
|
: logical_tensor(tid, dtype, DNNL_GRAPH_UNKNOWN_NDIMS, ltype) {}
|
|
|
|
/// Constructs a logical tensor object with basic information and detailed
|
|
/// dims.
|
|
///
|
|
/// @param tid Logical tensor ID.
|
|
/// @param dtype Elements data type.
|
|
/// @param adims Logical tensor dimensions. #DNNL_GRAPH_UNKNOWN_DIM means
|
|
/// the size of that dimension is unknown. 0 is used to define
|
|
/// zero-dimension tensor.
|
|
/// @param ltype Layout type. If it's strided, the strides field in the
|
|
/// output logical tensor will be deduced accordingly.
|
|
/// @param ptype Property type.
|
|
logical_tensor(size_t tid, data_type dtype, const dims &adims,
|
|
layout_type ltype, property_type ptype = property_type::undef) {
|
|
dnnl_graph_logical_tensor_t val;
|
|
// if dimension size equals to 0, it's a scalar
|
|
if (adims.empty())
|
|
error::wrap_c_api(dnnl_graph_logical_tensor_init(&val, tid,
|
|
convert_to_c(dtype), 0,
|
|
convert_to_c(ltype), convert_to_c(ptype)),
|
|
"could not create logical_tensor with property");
|
|
else
|
|
error::wrap_c_api(
|
|
dnnl_graph_logical_tensor_init_with_dims(&val, tid,
|
|
convert_to_c(dtype),
|
|
static_cast<int32_t>(adims.size()), adims.data(),
|
|
convert_to_c(ltype), convert_to_c(ptype)),
|
|
"could not create logical_tensor with dims and property");
|
|
data = val;
|
|
}
|
|
|
|
/// Constructs a logical tensor object with detailed dims and strides. The
|
|
/// layout_type of the output logical tensor object will always be strided.
|
|
///
|
|
/// @param tid Logical tensor ID.
|
|
/// @param dtype Elements data type.
|
|
/// @param adims Logical tensor dimensions. #DNNL_GRAPH_UNKNOWN_DIM means
|
|
/// the size of that dimension is unknown. 0 is used to define
|
|
/// zero-dimension tensor.
|
|
/// @param strides Logical tensor strides. #DNNL_GRAPH_UNKNOWN_DIM means
|
|
/// the stride of the dimension is unknown. The library currently
|
|
/// doesn't support other negative stride values.
|
|
/// @param ptype Property type.
|
|
logical_tensor(size_t tid, data_type dtype, const dims &adims,
|
|
const dims &strides, property_type ptype = property_type::undef) {
|
|
dnnl_graph_logical_tensor_t val;
|
|
// TODO(lvtao): check the size of adims and strides.
|
|
// They should be same.
|
|
error::wrap_c_api(
|
|
dnnl_graph_logical_tensor_init_with_strides(&val, tid,
|
|
convert_to_c(dtype), static_cast<int32_t>(adims.size()),
|
|
adims.data(), strides.data(), convert_to_c(ptype)),
|
|
"could not create logical_tensor with strides and property");
|
|
data = val;
|
|
}
|
|
|
|
/// Constructs a logical tensor object with detailed dims and an opaque
|
|
/// layout ID. layout_type of the output logical tensor object will always
|
|
/// be opaque.
|
|
///
|
|
/// @param tid Logical tensor ID.
|
|
/// @param dtype Elements data type.
|
|
/// @param adims Logical tensor dimensions. #DNNL_GRAPH_UNKNOWN_DIM means
|
|
/// the size of that dimension is unknown. 0 is used to define
|
|
/// zero-dimension tensor.
|
|
/// @param lid Opaque layout id.
|
|
/// @param ptype Property type
|
|
logical_tensor(size_t tid, data_type dtype, const dims &adims, size_t lid,
|
|
property_type ptype = property_type::undef) {
|
|
dnnl_graph_logical_tensor_t val;
|
|
|
|
if (adims.empty()) {
|
|
error::wrap_c_api(dnnl_graph_logical_tensor_init(&val, tid,
|
|
convert_to_c(dtype), 0,
|
|
convert_to_c(layout_type::opaque),
|
|
convert_to_c(ptype)),
|
|
"could not create logical_tensor");
|
|
} else {
|
|
error::wrap_c_api(
|
|
dnnl_graph_logical_tensor_init_with_dims(&val, tid,
|
|
convert_to_c(dtype),
|
|
static_cast<int32_t>(adims.size()), adims.data(),
|
|
convert_to_c(layout_type::opaque),
|
|
convert_to_c(ptype)),
|
|
"could not create logical_tensor with dims");
|
|
}
|
|
|
|
val.layout.layout_id = lid;
|
|
data = val;
|
|
}
|
|
|
|
/// Returns dimensions of a logical tensor.
|
|
///
|
|
/// @returns A vector describing the size of each dimension.
|
|
dims get_dims() const {
|
|
if (data.ndims < 0) {
|
|
error::wrap_c_api(dnnl_invalid_arguments,
|
|
"cannot return dims when ndims < 0");
|
|
}
|
|
|
|
return {data.dims, data.dims + data.ndims};
|
|
}
|
|
|
|
/// Returns the unique id of a logical tensor.
|
|
///
|
|
/// @returns An integer value describing the ID.
|
|
size_t get_id() const { return data.id; }
|
|
|
|
/// Returns the data type of a logical tensor.
|
|
///
|
|
/// @returns The data type.
|
|
data_type get_data_type() const {
|
|
return static_cast<data_type>(data.data_type);
|
|
}
|
|
|
|
/// Returns the property type of a logical tensor.
|
|
///
|
|
/// @returns The property type.
|
|
property_type get_property_type() const {
|
|
return static_cast<property_type>(data.property);
|
|
}
|
|
|
|
/// Returns the layout type of a logical tensor.
|
|
///
|
|
/// @returns The layout type.
|
|
layout_type get_layout_type() const {
|
|
return static_cast<layout_type>(data.layout_type);
|
|
}
|
|
|
|
/// Returns the layout ID of a logical tensor. The API should be called on a
|
|
/// logical tensor with opaque layout type. Otherwise, an exception will be
|
|
/// raised.
|
|
///
|
|
/// @returns Layout ID.
|
|
size_t get_layout_id() const {
|
|
if (get_layout_type() != layout_type::opaque) {
|
|
error::wrap_c_api(
|
|
dnnl_invalid_arguments, "layout type should be opaque");
|
|
}
|
|
|
|
return data.layout.layout_id;
|
|
}
|
|
|
|
/// Returns the strides of a logical tensor. The API should be called on a
|
|
/// logical tensor with strided layout type. Otherwise, an exception will be
|
|
/// raised.
|
|
///
|
|
/// @returns A vector describing the stride size of each dimension.
|
|
dims get_strides() const {
|
|
if (get_layout_type() != layout_type::strided) {
|
|
error::wrap_c_api(
|
|
dnnl_invalid_arguments, "layout type should be strided");
|
|
}
|
|
|
|
if (data.ndims < 0) {
|
|
error::wrap_c_api(dnnl_invalid_arguments,
|
|
"cannot return strides when ndims < 0");
|
|
}
|
|
|
|
return {data.layout.strides, data.layout.strides + data.ndims};
|
|
}
|
|
|
|
/// Returns memory size in bytes required by this logical tensor.
|
|
///
|
|
/// @returns The memory size in bytes.
|
|
size_t get_mem_size() const {
|
|
size_t size = 0;
|
|
error::wrap_c_api(dnnl_graph_logical_tensor_get_mem_size(&data, &size),
|
|
"could not get memory size from the logical_tensor");
|
|
return size;
|
|
}
|
|
|
|
/// Compares if two logical tenors are equal. Users can decide accordingly
|
|
/// if layout reordering is needed for two logical tensors. The method will
|
|
/// return true for below two circumstances:
|
|
///
|
|
/// 1. the two logical tensors are equal regarding each field in the struct,
|
|
/// eg. id, ndims, dims, layout type, property, etc.
|
|
/// 2. If all other fields are equal but the layout types in two logical
|
|
/// tensors are different, the method will return true when the underlying
|
|
/// memory layout is the same. For example, one logical tensor has strided
|
|
/// layout type while the other one has opaque layout type, but underneath,
|
|
/// both layouts are NHWC, the method will still return true for this case.
|
|
///
|
|
/// @param lt The input logical tensor to be compared.
|
|
/// @returns @c true if the two logical tensors are equal. @c false otherwise
|
|
bool is_equal(const logical_tensor <) const {
|
|
uint8_t equal = 0;
|
|
error::wrap_c_api(
|
|
dnnl_graph_logical_tensor_is_equal(&data, <.data, &equal),
|
|
"could not compare between the two logical tensors");
|
|
return equal != 0;
|
|
}
|
|
|
|
private:
|
|
static dnnl_data_type_t convert_to_c(data_type dtype) {
|
|
return static_cast<dnnl_data_type_t>(dtype);
|
|
}
|
|
|
|
static dnnl_graph_layout_type_t convert_to_c(layout_type ltype) {
|
|
return static_cast<dnnl_graph_layout_type_t>(ltype);
|
|
}
|
|
|
|
static dnnl_graph_tensor_property_t convert_to_c(property_type ptype) {
|
|
return static_cast<dnnl_graph_tensor_property_t>(ptype);
|
|
}
|
|
};
|
|
|
|
/// @} dnnl_graph_api_logical_tensor
|
|
|
|
/// @addtogroup dnnl_graph_api_tensor Tensor
|
|
///
|
|
/// Tensor is an abstraction for multi-dimensional input and output data needed
|
|
/// in the execution of a compiled partition. A tensor object encapsulates a
|
|
/// handle to a memory buffer allocated on a specific engine and a logical
|
|
/// tensor which describes the dimensions, elements data type, and memory
|
|
/// layout.
|
|
///
|
|
/// @{
|
|
|
|
/// A tensor object
|
|
class tensor : public tensor_handle {
|
|
public:
|
|
using tensor_handle::handle;
|
|
|
|
/// Default constructor. Constructs an empty object.
|
|
tensor() = default;
|
|
|
|
/// Constructs a tensor object according to a given logical tensor, an
|
|
/// engine, and a memory handle.
|
|
///
|
|
/// @param lt The given logical tensor
|
|
/// @param aengine Engine to store the data on.
|
|
/// @param handle Handle of memory buffer to use as an underlying storage.
|
|
/// - A pointer to the user-allocated buffer. In this case the library
|
|
/// doesn't own the buffer.
|
|
/// - The DNNL_MEMORY_ALLOCATE special value. Instructs the library to
|
|
/// allocate the buffer for the tensor. In this case the library
|
|
/// owns the buffer.
|
|
/// - DNNL_MEMORY_NONE to create tensor without an underlying buffer.
|
|
tensor(const logical_tensor <, const engine &aengine, void *handle) {
|
|
dnnl_graph_tensor_t t = nullptr;
|
|
error::wrap_c_api(
|
|
dnnl_graph_tensor_create(&t, &(lt.data), aengine.get(), handle),
|
|
"could not create tensor object with the logical_tensor, "
|
|
"engine, and handle");
|
|
reset(t);
|
|
}
|
|
|
|
/// Constructs a tensor object.
|
|
/// The underlying buffer for the memory will be allocated by the library.
|
|
///
|
|
/// @param lt The given logical tensor
|
|
/// @param aengine Engine to store the data on.
|
|
tensor(const logical_tensor <, const engine &aengine)
|
|
: tensor(lt, aengine, DNNL_MEMORY_ALLOCATE) {}
|
|
|
|
/// Creates a tensor object for host-side scalar value. The data type contained
|
|
/// in the logical tensor parameter will be used to interpret the scalar
|
|
/// pointer. The property type in the logical tensor must be `host_scalar`.
|
|
///
|
|
/// @param lt The logical tensor describing the host scalar
|
|
/// @param scalar The pointer to scalar value
|
|
/// @returns Created tensor object
|
|
static tensor make_scalar_tensor(const logical_tensor <, void *scalar) {
|
|
dnnl_graph_tensor_t t = nullptr;
|
|
error::wrap_c_api(
|
|
dnnl_graph_tensor_create_scalar(&t, &(lt.data), scalar),
|
|
"could not create a scalar tensor object");
|
|
|
|
return tensor(t);
|
|
}
|
|
|
|
/// Returns the underlying memory buffer.
|
|
///
|
|
/// On the CPU engine, or when using USM, this is a pointer to the
|
|
/// allocated memory.
|
|
void *get_data_handle() const {
|
|
void *handle = nullptr;
|
|
error::wrap_c_api(dnnl_graph_tensor_get_data_handle(get(), &handle),
|
|
"could not get data handle from the tensor");
|
|
return handle;
|
|
}
|
|
|
|
/// Sets the underlying memory handle.
|
|
///
|
|
/// @param handle Memory handle.
|
|
void set_data_handle(void *handle) {
|
|
error::wrap_c_api(dnnl_graph_tensor_set_data_handle(get(), handle),
|
|
"setting data handle to the tensor failed");
|
|
}
|
|
|
|
/// Returns the associated engine.
|
|
///
|
|
/// @returns An engine object
|
|
engine get_engine() const {
|
|
dnnl_engine_t c_engine = nullptr;
|
|
error::wrap_c_api(dnnl_graph_tensor_get_engine(get(), &c_engine),
|
|
"could not get an engine from a tensor object");
|
|
return engine(c_engine, true);
|
|
}
|
|
|
|
/// Returns the logical tensor of a tensor object.
|
|
///
|
|
/// @returns A logical_tensor object.
|
|
logical_tensor get_logical_tensor() const {
|
|
dnnl_graph_logical_tensor_t lt;
|
|
error::wrap_c_api(dnnl_graph_tensor_get_logical_tensor(get(), <),
|
|
"could not get logical tensor from a tensor object");
|
|
return logical_tensor(lt);
|
|
}
|
|
};
|
|
|
|
/// @} dnnl_graph_api_tensor
|
|
|
|
/// @addtogroup dnnl_graph_api_compiled_partition Compiled Partition
|
|
///
|
|
/// A compiled partition represents the generated kernels specialized for a
|
|
/// partition on a target hardware (engine) with input and output information
|
|
/// specified by the logical tensors.
|
|
///
|
|
/// @{
|
|
|
|
/// A compiled partition object.
|
|
class compiled_partition : public compiled_partition_handle {
|
|
public:
|
|
/// Default constructor. Constructs an empty object.
|
|
compiled_partition() = default;
|
|
|
|
/// Constructs a compiled partition object
|
|
compiled_partition(dnnl_graph_compiled_partition_t compiled_partition) {
|
|
reset(compiled_partition, false);
|
|
}
|
|
|
|
/// Queries an input or output logical tensor according to tensor ID. If the
|
|
/// tensor ID doesn't belong to any input or output of the compiled
|
|
/// partition, an exception will be raised by the API.
|
|
///
|
|
/// @param tid The unique id of required tensor.
|
|
/// @returns The logical tensor.
|
|
logical_tensor query_logical_tensor(size_t tid) const {
|
|
dnnl_graph_logical_tensor_t lt;
|
|
error::wrap_c_api(dnnl_graph_compiled_partition_query_logical_tensor(
|
|
get(), tid, <),
|
|
"query logical tensor from compiled_partition failed");
|
|
return logical_tensor {lt};
|
|
}
|
|
|
|
/// Returns the hint of in-place pairs from a compiled partition. It
|
|
/// indicates that an input and an output of the partition can share the
|
|
/// same memory buffer for computation. In-place computation helps to reduce
|
|
/// the memory footprint and improves cache locality. But since the library
|
|
/// may not have a global view of user's application, it's possible that the
|
|
/// input tensor is used at other places in user's computation graph. In
|
|
/// this case, the user should take the in-place pair as a hint and pass a
|
|
/// different memory buffer for output tensor to avoid overwriting the input
|
|
/// memory buffer which will probably cause unexpected incorrect results.
|
|
///
|
|
/// @returns A list of pairs of input and output IDs.
|
|
std::vector<std::pair<size_t, size_t>> get_inplace_ports() const {
|
|
size_t num = 0;
|
|
const dnnl_graph_inplace_pair_t *inplace_pairs;
|
|
|
|
error::wrap_c_api(dnnl_graph_compiled_partition_get_inplace_ports(
|
|
get(), &num, &inplace_pairs),
|
|
"could not get the in-place pairs from a compiled partition");
|
|
if (num == 0) return {};
|
|
|
|
std::vector<std::pair<size_t, size_t>> inplace_options;
|
|
inplace_options.reserve(num);
|
|
for (size_t i = 0; i < num; ++i) {
|
|
const dnnl_graph_inplace_pair_t *inplace_pair = inplace_pairs + i;
|
|
inplace_options.emplace_back(
|
|
inplace_pair->input_id, inplace_pair->output_id);
|
|
}
|
|
return inplace_options;
|
|
}
|
|
|
|
/// Execute a compiled partition.
|
|
///
|
|
/// @param astream Stream object to run over.
|
|
/// @param inputs A list of input tensors.
|
|
/// @param outputs A list of output tensors.
|
|
void execute(stream &astream, const std::vector<tensor> &inputs,
|
|
const std::vector<tensor> &outputs) const {
|
|
std::vector<const_dnnl_graph_tensor_t> c_inputs;
|
|
c_inputs.reserve(inputs.size());
|
|
for (auto &in : inputs) {
|
|
c_inputs.push_back(in.get());
|
|
}
|
|
std::vector<const_dnnl_graph_tensor_t> c_outputs;
|
|
c_outputs.reserve(outputs.size());
|
|
for (auto &out : outputs) {
|
|
c_outputs.push_back(out.get());
|
|
}
|
|
|
|
error::wrap_c_api(
|
|
dnnl_graph_compiled_partition_execute(get(), astream.get(),
|
|
c_inputs.size(), c_inputs.data(), c_outputs.size(),
|
|
c_outputs.data()),
|
|
"could not execute the compiled_partition");
|
|
}
|
|
};
|
|
|
|
/// @} dnnl_graph_api_compiled_partition
|
|
|
|
/// @addtogroup dnnl_graph_api_op Op
|
|
///
|
|
/// OP is an abstraction of computation logic for deep neural network
|
|
/// operations. An op object encapsulates an operation kind which describes the
|
|
/// computation logic, an unique ID which differentiates operations with the
|
|
/// same kind, and logical tensors which describes the input and output of the
|
|
/// operation and its connections to other operations in the graph.
|
|
///
|
|
/// @{
|
|
|
|
/// An op object.
|
|
class op : public op_handle {
|
|
public:
|
|
/// Kinds of operations
|
|
enum class kind {
|
|
Abs = dnnl_graph_op_abs,
|
|
AbsBackward = dnnl_graph_op_abs_backward,
|
|
Add = dnnl_graph_op_add,
|
|
AvgPool = dnnl_graph_op_avg_pool,
|
|
AvgPoolBackward = dnnl_graph_op_avg_pool_backward,
|
|
BatchNormForwardTraining = dnnl_graph_op_batch_norm_forward_training,
|
|
BatchNormInference = dnnl_graph_op_batch_norm_inference,
|
|
BatchNormTrainingBackward = dnnl_graph_op_batch_norm_backward,
|
|
BiasAdd = dnnl_graph_op_bias_add,
|
|
BiasAddBackward = dnnl_graph_op_bias_add_backward,
|
|
Clamp = dnnl_graph_op_clamp,
|
|
ClampBackward = dnnl_graph_op_clamp_backward,
|
|
Concat = dnnl_graph_op_concat,
|
|
Convolution = dnnl_graph_op_convolution,
|
|
ConvolutionBackwardData = dnnl_graph_op_convolution_backward_data,
|
|
ConvolutionBackwardWeights = dnnl_graph_op_convolution_backward_weights,
|
|
ConvTranspose = dnnl_graph_op_conv_transpose,
|
|
ConvTransposeBackwardData = dnnl_graph_op_conv_transpose_backward_data,
|
|
ConvTransposeBackwardWeights
|
|
= dnnl_graph_op_conv_transpose_backward_weights,
|
|
Dequantize = dnnl_graph_op_dequantize,
|
|
Divide = dnnl_graph_op_divide,
|
|
DynamicDequantize = dnnl_graph_op_dynamic_dequantize,
|
|
DynamicQuantize = dnnl_graph_op_dynamic_quantize,
|
|
Elu = dnnl_graph_op_elu,
|
|
EluBackward = dnnl_graph_op_elu_backward,
|
|
End = dnnl_graph_op_end,
|
|
Exp = dnnl_graph_op_exp,
|
|
GELU = dnnl_graph_op_gelu,
|
|
GELUBackward = dnnl_graph_op_gelu_backward,
|
|
GroupNorm = dnnl_graph_op_group_norm,
|
|
HardSigmoid = dnnl_graph_op_hard_sigmoid,
|
|
HardSigmoidBackward = dnnl_graph_op_hard_sigmoid_backward,
|
|
HardSwish = dnnl_graph_op_hard_swish,
|
|
HardSwishBackward = dnnl_graph_op_hard_swish_backward,
|
|
Interpolate = dnnl_graph_op_interpolate,
|
|
InterpolateBackward = dnnl_graph_op_interpolate_backward,
|
|
LayerNorm = dnnl_graph_op_layer_norm,
|
|
LayerNormBackward = dnnl_graph_op_layer_norm_backward,
|
|
LeakyReLU = dnnl_graph_op_leaky_relu,
|
|
Log = dnnl_graph_op_log,
|
|
LogSoftmax = dnnl_graph_op_log_softmax,
|
|
LogSoftmaxBackward = dnnl_graph_op_log_softmax_backward,
|
|
MatMul = dnnl_graph_op_matmul,
|
|
Maximum = dnnl_graph_op_maximum,
|
|
MaxPool = dnnl_graph_op_max_pool,
|
|
MaxPoolBackward = dnnl_graph_op_max_pool_backward,
|
|
Minimum = dnnl_graph_op_minimum,
|
|
Mish = dnnl_graph_op_mish,
|
|
MishBackward = dnnl_graph_op_mish_backward,
|
|
Multiply = dnnl_graph_op_multiply,
|
|
Pow = dnnl_graph_op_pow,
|
|
PReLU = dnnl_graph_op_prelu,
|
|
PReLUBackward = dnnl_graph_op_prelu_backward,
|
|
Quantize = dnnl_graph_op_quantize,
|
|
Reciprocal = dnnl_graph_op_reciprocal,
|
|
ReduceL1 = dnnl_graph_op_reduce_l1,
|
|
ReduceL2 = dnnl_graph_op_reduce_l2,
|
|
ReduceMax = dnnl_graph_op_reduce_max,
|
|
ReduceMean = dnnl_graph_op_reduce_mean,
|
|
ReduceMin = dnnl_graph_op_reduce_min,
|
|
ReduceProd = dnnl_graph_op_reduce_prod,
|
|
ReduceSum = dnnl_graph_op_reduce_sum,
|
|
ReLU = dnnl_graph_op_relu,
|
|
ReLUBackward = dnnl_graph_op_relu_backward,
|
|
Reorder = dnnl_graph_op_reorder,
|
|
Round = dnnl_graph_op_round,
|
|
Select = dnnl_graph_op_select,
|
|
Sigmoid = dnnl_graph_op_sigmoid,
|
|
SigmoidBackward = dnnl_graph_op_sigmoid_backward,
|
|
SoftMax = dnnl_graph_op_softmax,
|
|
SoftMaxBackward = dnnl_graph_op_softmax_backward,
|
|
SoftPlus = dnnl_graph_op_softplus,
|
|
SoftPlusBackward = dnnl_graph_op_softplus_backward,
|
|
Sqrt = dnnl_graph_op_sqrt,
|
|
SqrtBackward = dnnl_graph_op_sqrt_backward,
|
|
Square = dnnl_graph_op_square,
|
|
SquaredDifference = dnnl_graph_op_squared_difference,
|
|
StaticReshape = dnnl_graph_op_static_reshape,
|
|
StaticTranspose = dnnl_graph_op_static_transpose,
|
|
Subtract = dnnl_graph_op_subtract,
|
|
Tanh = dnnl_graph_op_tanh,
|
|
TanhBackward = dnnl_graph_op_tanh_backward,
|
|
TypeCast = dnnl_graph_op_type_cast,
|
|
Wildcard = dnnl_graph_op_wildcard,
|
|
GenIndex = dnnl_graph_op_gen_index,
|
|
GreaterEqual = dnnl_graph_op_greater_equal,
|
|
// Sentinel
|
|
LastSymbol = dnnl_graph_op_last_symbol,
|
|
};
|
|
|
|
/// Attributes of operations. Different operations support different
|
|
/// attributes. Check the document of each operation for what attributes are
|
|
/// supported and what are the potential values for them. Missing required
|
|
/// attribute or illegal attribute value may lead to failure when adding the
|
|
/// operation to a graph.
|
|
enum class attr {
|
|
/// Undefined op attribute.
|
|
undef = dnnl_graph_op_attr_undef,
|
|
|
|
// float32 attributes. The value of these attributes can be any single
|
|
// float32 number.
|
|
|
|
/// Specifies an alpha attribute to an op.
|
|
alpha = dnnl_graph_op_attr_alpha,
|
|
/// Specifies an beta attribute to an op.
|
|
beta = dnnl_graph_op_attr_beta,
|
|
/// Specifies an epsilon attribute to an op.
|
|
epsilon = dnnl_graph_op_attr_epsilon,
|
|
/// Specifies a max attribute to an op.
|
|
max = dnnl_graph_op_attr_max,
|
|
/// Specifies a min attribute to an op.
|
|
min = dnnl_graph_op_attr_min,
|
|
/// Specifies a momentum attribute to an op.
|
|
momentum = dnnl_graph_op_attr_momentum,
|
|
|
|
// float32 vector attributes. The value of these attributes can be a
|
|
// vector of float32 numbers.
|
|
|
|
/// Specifies a scales attribute to an op.
|
|
scales = dnnl_graph_op_attr_scales,
|
|
|
|
// int64_t attributes. The value of these attributes can be any single
|
|
// int64 number.
|
|
|
|
/// Specifies an axis attribute to an op.
|
|
axis = dnnl_graph_op_attr_axis,
|
|
/// Specifies a begin_norm_axis attribute to an op.
|
|
begin_norm_axis = dnnl_graph_op_attr_begin_norm_axis,
|
|
/// Specifies a groups attribute to an op.
|
|
groups = dnnl_graph_op_attr_groups,
|
|
|
|
// int64_t vector attributes. The value of these attributes can be a
|
|
// vector of int64 numbers.
|
|
|
|
/// Specifies an axes attribute to an op.
|
|
axes = dnnl_graph_op_attr_axes,
|
|
/// Specifies a dilations attribute to an op.
|
|
dilations = dnnl_graph_op_attr_dilations,
|
|
/// Specifies an dst_shape attribute to an op.
|
|
dst_shape = dnnl_graph_op_attr_dst_shape,
|
|
/// Specifies a kernel attribute to an op.
|
|
kernel = dnnl_graph_op_attr_kernel,
|
|
/// Specifies an order attribute to an op.
|
|
order = dnnl_graph_op_attr_order,
|
|
/// Specifies an output_padding attribute to an op.
|
|
output_padding = dnnl_graph_op_attr_output_padding,
|
|
/// Specifies a pads_begin attribute to an op.
|
|
pads_begin = dnnl_graph_op_attr_pads_begin,
|
|
/// Specifies a pads_end attribute to an op.
|
|
pads_end = dnnl_graph_op_attr_pads_end,
|
|
/// Specifies a shape attribute to an op.
|
|
shape = dnnl_graph_op_attr_shape,
|
|
/// Specifies a sizes attribute to an op.
|
|
sizes = dnnl_graph_op_attr_sizes,
|
|
/// Specifies an src_shape attribute to an op.
|
|
src_shape = dnnl_graph_op_attr_src_shape,
|
|
/// Specifies a strides attribute to an op.
|
|
strides = dnnl_graph_op_attr_strides,
|
|
/// Specifies a weight_shape attribute to an op.
|
|
weights_shape = dnnl_graph_op_attr_weights_shape,
|
|
/// Specifies a zps attribute to an op.
|
|
zps = dnnl_graph_op_attr_zps,
|
|
/// Specifies the group shape of an op. The size of the vector should
|
|
/// match that of the input. For the dimensions where the grouped
|
|
/// quantization occurs, the values should correspond to the group
|
|
/// size, which indicates the number of elements that will share the
|
|
/// same scaling factor.
|
|
group_shape = dnnl_graph_op_attr_group_shape,
|
|
|
|
// bool attributes. The value of these attributes can be any single bool
|
|
// value.
|
|
|
|
/// Specifies an exclude_pad attribute to an op.
|
|
exclude_pad = dnnl_graph_op_attr_exclude_pad,
|
|
/// Specifies a keep_dims attribute to an op.
|
|
keep_dims = dnnl_graph_op_attr_keep_dims,
|
|
/// Specifies a keep_stats attribute to an op.
|
|
keep_stats = dnnl_graph_op_attr_keep_stats,
|
|
/// Specifies a per_channel_broadcast attribute to an op.
|
|
per_channel_broadcast = dnnl_graph_op_attr_per_channel_broadcast,
|
|
/// Specifies a special_zero attribute to an op.
|
|
special_zero = dnnl_graph_op_attr_special_zero,
|
|
/// Specifies a transpose_a attribute to an op.
|
|
transpose_a = dnnl_graph_op_attr_transpose_a,
|
|
/// Specifies a transpose_b attribute to an op.
|
|
transpose_b = dnnl_graph_op_attr_transpose_b,
|
|
/// Specifies an use_affine attribute to an op.
|
|
use_affine = dnnl_graph_op_attr_use_affine,
|
|
/// Specifies an use_dst attribute to an op.
|
|
use_dst = dnnl_graph_op_attr_use_dst,
|
|
|
|
// string attributes. The value of these attributes can be a string.
|
|
|
|
/// Specifies an auto_broadcast attribute to an op. The value can be
|
|
/// "none" or "numpy".
|
|
auto_broadcast = dnnl_graph_op_attr_auto_broadcast,
|
|
/// Specifies an auto_pad attribute to an op. The value can be "none",
|
|
/// "same_upper", "same_lower", or "valid".
|
|
auto_pad = dnnl_graph_op_attr_auto_pad,
|
|
/// Specifies an coordinate_transformation_mode attribute to an op. The
|
|
/// value can be "half_pixel" or "align_corners". The attribute is
|
|
/// defined for Interpolate operations.
|
|
coordinate_transformation_mode
|
|
= dnnl_graph_op_attr_coordinate_transformation_mode,
|
|
/// Specifies a data_format of an op. The value can be "NCX" or "NXC".
|
|
data_format = dnnl_graph_op_attr_data_format,
|
|
/// Specifies a mode attribute of an op.
|
|
/// Interpolate: "nearest", "linear", "bilinear", or "trilinear".
|
|
/// SoftMax: "none", "inf_as_zero".
|
|
/// GELU/GELUBackward: "gelu_erf", "gelu_tanh".
|
|
mode = dnnl_graph_op_attr_mode,
|
|
/// Specifies a qtype attribute to an op. The value can be "per_channel"
|
|
/// or "per_tensor". The attribute is defined for quantization
|
|
/// operations.
|
|
qtype = dnnl_graph_op_attr_qtype,
|
|
/// Specifies a rounding_type attribute to an op. The value can be
|
|
/// "ceil" or "floor".
|
|
rounding_type = dnnl_graph_op_attr_rounding_type,
|
|
/// Specifies a weights_format of an op. The value can be "OIX", "XIO",
|
|
/// "IOX", or "XOI". Different operations may support different values.
|
|
weights_format = dnnl_graph_op_attr_weights_format,
|
|
/// Specifies an accumulation_mode attribute to an op. The value can be
|
|
/// "strict", "relaxed", "any", "f32", "s32", or "f16".
|
|
accumulation_mode = dnnl_graph_op_attr_accumulation_mode,
|
|
|
|
/// Specifies the end of all above exteral attributes for check.
|
|
end = dnnl_graph_op_attr_end,
|
|
};
|
|
|
|
/// Constructs an op object with an unique ID, an operation kind, and a name
|
|
/// string.
|
|
///
|
|
/// @param id The unique ID of the op.
|
|
/// @param akind The op kind specifies which computation is represented by
|
|
/// the op, such as Convolution or ReLU.
|
|
/// @param verbose_name The string added as the op name.
|
|
op(size_t id, kind akind, const std::string &verbose_name = "") {
|
|
dnnl_graph_op_t op = nullptr;
|
|
error::wrap_c_api(dnnl_graph_op_create(&op, id, convert_to_c(akind),
|
|
verbose_name.c_str()),
|
|
"could not create op with id and op kind");
|
|
reset(op);
|
|
}
|
|
|
|
/// Constructs an op object with an unique ID, an operation kind, and
|
|
/// input/output logical tensors.
|
|
///
|
|
/// @param id The unique ID of this op.
|
|
/// @param akind The op kind specifies which computation is represented by
|
|
/// this op, such as Convolution or ReLU.
|
|
/// @param inputs Input logical tensor to be bound to this op.
|
|
/// @param outputs Output logical tensor to be bound to this op.
|
|
/// @param verbose_name The string added as the op name.
|
|
op(size_t id, kind akind, const std::vector<logical_tensor> &inputs,
|
|
const std::vector<logical_tensor> &outputs,
|
|
const std::string &verbose_name = "")
|
|
: op(id, akind, verbose_name) {
|
|
for (const auto &input : inputs) {
|
|
error::wrap_c_api(dnnl_graph_op_add_input(get(), &(input.data)),
|
|
"adding input to the op failed");
|
|
}
|
|
for (const auto &output : outputs) {
|
|
error::wrap_c_api(dnnl_graph_op_add_output(get(), &(output.data)),
|
|
"adding output to the op failed");
|
|
}
|
|
}
|
|
|
|
/// Adds an input logical tensor to the op.
|
|
///
|
|
/// @param t Input logical tensor.
|
|
void add_input(const logical_tensor &t) {
|
|
error::wrap_c_api(dnnl_graph_op_add_input(get(), &(t.data)),
|
|
"adding input to the op failed");
|
|
}
|
|
|
|
/// Adds a vector of input logical tensors to the op.
|
|
///
|
|
/// @param ts The list of input logical tensors.
|
|
void add_inputs(const std::vector<logical_tensor> &ts) {
|
|
for (const auto &t : ts) {
|
|
error::wrap_c_api(dnnl_graph_op_add_input(get(), &(t.data)),
|
|
"adding input to the op failed");
|
|
}
|
|
}
|
|
|
|
/// Adds an output logical tensor to the op.
|
|
///
|
|
/// @param t Output logical tensor.
|
|
void add_output(const logical_tensor &t) {
|
|
error::wrap_c_api(dnnl_graph_op_add_output(get(), &(t.data)),
|
|
"adding output to the op failed");
|
|
}
|
|
|
|
/// Adds a vector of output logical tensors to the op.
|
|
///
|
|
/// @param ts The list of output logical tensors.
|
|
void add_outputs(const std::vector<logical_tensor> &ts) {
|
|
for (const auto &t : ts) {
|
|
error::wrap_c_api(dnnl_graph_op_add_output(get(), &(t.data)),
|
|
"adding output to the op failed");
|
|
}
|
|
}
|
|
|
|
/// Sets the attribute according to the name and type (int64_t).
|
|
///
|
|
/// @tparam Type_i Attribute's type.
|
|
/// @param name Attribute's name.
|
|
/// @param value The attribute's value.
|
|
/// @returns The Op self.
|
|
template <typename Type_i, req<std::is_same<Type_i, int64_t>::value> = true>
|
|
op &set_attr(attr name, const Type_i &value) {
|
|
dnnl_graph_op_attr_t attr = convert_to_c(name);
|
|
error::wrap_c_api(dnnl_graph_op_set_attr_s64(get(), attr, &value, 1),
|
|
"could not set attribute to the op");
|
|
return *this;
|
|
}
|
|
|
|
/// Sets the attribute according to the name and type (float).
|
|
///
|
|
/// @tparam Type_f Attribute's type.
|
|
/// @param name Attribute's name.
|
|
/// @param value The attribute's value.
|
|
/// @returns The Op self.
|
|
template <typename Type_f, req<std::is_same<Type_f, float>::value> = true>
|
|
op &set_attr(attr name, const Type_f &value) {
|
|
dnnl_graph_op_attr_t attr = convert_to_c(name);
|
|
error::wrap_c_api(dnnl_graph_op_set_attr_f32(get(), attr, &value, 1),
|
|
"could not set attribute to the op");
|
|
return *this;
|
|
}
|
|
|
|
/// Sets the attribute according to the name and type (bool).
|
|
///
|
|
/// @tparam Type_b Attribute's type.
|
|
/// @param name Attribute's name.
|
|
/// @param value The attribute's value.
|
|
/// @returns The Op self.
|
|
template <typename Type_b, req<std::is_same<Type_b, bool>::value> = true>
|
|
op &set_attr(attr name, const Type_b &value) {
|
|
dnnl_graph_op_attr_t attr = convert_to_c(name);
|
|
const uint8_t val = value;
|
|
error::wrap_c_api(dnnl_graph_op_set_attr_bool(get(), attr, &val, 1),
|
|
"could not set attribute to the op");
|
|
return *this;
|
|
}
|
|
|
|
/// Sets the attribute according to the name and type (string).
|
|
///
|
|
/// @tparam Type_s Attribute's type.
|
|
/// @param name Attribute's name.
|
|
/// @param value The attribute's value.
|
|
/// @returns The Op self.
|
|
template <typename Type_s,
|
|
req<std::is_same<Type_s, std::string>::value> = true>
|
|
op &set_attr(attr name, const Type_s &value) {
|
|
dnnl_graph_op_attr_t attr = convert_to_c(name);
|
|
error::wrap_c_api(dnnl_graph_op_set_attr_str(
|
|
get(), attr, value.c_str(), value.size()),
|
|
"could not set attribute to the op");
|
|
return *this;
|
|
}
|
|
|
|
/// Sets the attribute according to the name and type
|
|
/// (std::vector<int64_t>).
|
|
///
|
|
/// @tparam Type_is Attribute's type.
|
|
/// @param name Attribute's name.
|
|
/// @param value The attribute's value.
|
|
/// @returns The Op self.
|
|
template <typename Type_is,
|
|
req<std::is_same<Type_is, std::vector<int64_t>>::value> = true>
|
|
op &set_attr(attr name, const Type_is &value) {
|
|
dnnl_graph_op_attr_t attr = convert_to_c(name);
|
|
error::wrap_c_api(dnnl_graph_op_set_attr_s64(
|
|
get(), attr, value.data(), value.size()),
|
|
"could not set attribute to the op");
|
|
return *this;
|
|
}
|
|
|
|
/// Sets the attribute according to the name and type (std::vector<float>).
|
|
///
|
|
/// @tparam Type_fs Attribute's type.
|
|
/// @param name Attribute's name.
|
|
/// @param value The attribute's value.
|
|
/// @returns The Op self.
|
|
template <typename Type_fs,
|
|
req<std::is_same<Type_fs, std::vector<float>>::value> = true>
|
|
op &set_attr(attr name, const Type_fs &value) {
|
|
dnnl_graph_op_attr_t attr = convert_to_c(name);
|
|
error::wrap_c_api(dnnl_graph_op_set_attr_f32(
|
|
get(), attr, value.data(), value.size()),
|
|
"could not set attribute to the op");
|
|
return *this;
|
|
}
|
|
|
|
private:
|
|
dnnl_graph_op_kind_t convert_to_c(kind akind) {
|
|
return static_cast<dnnl_graph_op_kind_t>(akind);
|
|
}
|
|
|
|
dnnl_graph_op_attr_t convert_to_c(attr aattr) {
|
|
return static_cast<dnnl_graph_op_attr_t>(aattr);
|
|
}
|
|
};
|
|
|
|
/// @} dnnl_graph_api_op
|
|
|
|
/// @addtogroup dnnl_graph_api_partition Partition
|
|
///
|
|
/// Partition represents a collection of operations and their input and output
|
|
/// logical tensors identified by library as the basic unit for compilation and
|
|
/// execution.
|
|
///
|
|
/// @{
|
|
|
|
/// A partition object.
|
|
class partition : public partition_handle {
|
|
public:
|
|
/// Policy specifications for partitioning.
|
|
enum class policy {
|
|
/// Fusion policy returns partitions with typical post-op fusions, eg.
|
|
/// Convolution + ReLU or other element-wise operations or a chian of
|
|
/// post-ops.
|
|
fusion = dnnl_graph_partition_policy_fusion,
|
|
/// Debug policy doesn't not apply any fusions. It returns partitions
|
|
/// with single operations in each partition. The policy is useful when
|
|
/// users notice any bug or correctness issue in fusion policy.
|
|
debug = dnnl_graph_partition_policy_debug,
|
|
};
|
|
|
|
partition() = default;
|
|
|
|
/// Constructs a partition object
|
|
///
|
|
/// @param p A raw pointer to the C API handle
|
|
partition(dnnl_graph_partition_t p) { reset(p, false); }
|
|
|
|
/// Creates a new partition with a given operator and engine kind. The API
|
|
/// is used to create a partition from an operation directly without
|
|
/// creating the graph and calling `get_partitions()`. The output partition
|
|
/// contains only one operation.
|
|
///
|
|
/// @param aop An operation used to create the partition.
|
|
/// @param ekind Engine kind.
|
|
partition(const op &aop, engine::kind ekind) {
|
|
dnnl_graph_partition_t p = nullptr;
|
|
error::wrap_c_api(dnnl_graph_partition_create_with_op(&p, aop.get(),
|
|
static_cast<dnnl_engine_kind_t>(ekind)),
|
|
"could not create a partition with the op and engine kind");
|
|
reset(p);
|
|
}
|
|
|
|
/// Returns the number of operations contained in the partition.
|
|
///
|
|
/// @returns Number of operations.
|
|
size_t get_ops_num() const {
|
|
size_t num {0};
|
|
error::wrap_c_api(dnnl_graph_partition_get_op_num(get(), &num),
|
|
"could not get number of ops from the partition");
|
|
return num;
|
|
}
|
|
|
|
/// Returns all operation IDs contained in the partition.
|
|
///
|
|
/// @returns An unordered set of operation IDs.
|
|
std::vector<size_t> get_ops() const {
|
|
auto num = get_ops_num();
|
|
std::vector<size_t> ops(num);
|
|
|
|
error::wrap_c_api(dnnl_graph_partition_get_ops(get(), num, ops.data()),
|
|
"could not get op ids from the partition");
|
|
return ops;
|
|
}
|
|
|
|
/// Returns the unique ID of the partition. Partition ID is generated by the
|
|
/// library internally. The ID can be used for debugging purpose or verbose.
|
|
///
|
|
/// @returns ID of the partition.
|
|
size_t get_id() const {
|
|
size_t id {};
|
|
error::wrap_c_api(dnnl_graph_partition_get_id(get(), &id),
|
|
"could not get id of the partition");
|
|
return id;
|
|
}
|
|
|
|
/// Compiles a partition with given input and output logical tensors. The
|
|
/// output logical tensors can contain unknown dimensions. For this case,
|
|
/// the compilation will deduce the output shapes according to input shapes.
|
|
/// The output logical tensors can also have layout type `any`. The
|
|
/// compilation will choose the optimal layout for output tensors. The
|
|
/// optimal layout will be represented as an opaque layout ID saved in the
|
|
/// output logical tensor.
|
|
///
|
|
/// @param inputs A list of input logical tensors.
|
|
/// @param outputs A list of output logical tensors.
|
|
/// @param e The engine used to compile the partition.
|
|
/// @returns A compiled partition.
|
|
compiled_partition compile(const std::vector<logical_tensor> &inputs,
|
|
const std::vector<logical_tensor> &outputs, const engine &e) const {
|
|
if (!is_supported()) {
|
|
error::wrap_c_api(dnnl_invalid_arguments,
|
|
"could not compile an unsupported partition");
|
|
}
|
|
|
|
return compile_(inputs, outputs, e);
|
|
}
|
|
|
|
/// Returns the supporting status of a partition. Some operations may not be
|
|
/// supported by the library under certain circumstances. During
|
|
/// partitioning stage, unsupported partitions will be returned to users
|
|
/// with each containing an unsupported operation. Users should check the
|
|
/// supporting status of a partition before transforming the computation
|
|
/// graph or compiling the partition.
|
|
///
|
|
/// @returns @c true if this partition is supported or @c false if this
|
|
/// partition isn't supported by the library
|
|
bool is_supported() const {
|
|
uint8_t supported {0};
|
|
error::wrap_c_api(dnnl_graph_partition_is_supported(get(), &supported),
|
|
"could not get supporting status of the partition");
|
|
return supported != 0;
|
|
}
|
|
|
|
/// Returns a list of input logical tensors from the partition.
|
|
///
|
|
/// @returns A list of input logical tensors.
|
|
std::vector<logical_tensor> get_input_ports() const {
|
|
size_t num = 0;
|
|
error::wrap_c_api(dnnl_graph_partition_get_input_ports_num(get(), &num),
|
|
"could not get number of inputs of the partition");
|
|
if (num == 0) return {};
|
|
|
|
std::vector<dnnl_graph_logical_tensor_t> c_inputs(num);
|
|
error::wrap_c_api(dnnl_graph_partition_get_input_ports(
|
|
get(), num, c_inputs.data()),
|
|
"could not get input logical tensors of the partition");
|
|
|
|
std::vector<logical_tensor> inputs;
|
|
inputs.reserve(num);
|
|
for (auto &c_lt : c_inputs)
|
|
inputs.emplace_back(c_lt);
|
|
return inputs;
|
|
}
|
|
|
|
/// Returns a list of output logical tensors from the partition.
|
|
///
|
|
/// @returns A list of output logical tensor.
|
|
std::vector<logical_tensor> get_output_ports() const {
|
|
size_t num = 0;
|
|
error::wrap_c_api(
|
|
dnnl_graph_partition_get_output_ports_num(get(), &num),
|
|
"cannot get number of outputs of the partition");
|
|
if (num == 0) return {};
|
|
|
|
std::vector<dnnl_graph_logical_tensor_t> c_outputs(num);
|
|
error::wrap_c_api(dnnl_graph_partition_get_output_ports(
|
|
get(), num, c_outputs.data()),
|
|
"could not get output logical tensors of the partition");
|
|
|
|
std::vector<logical_tensor> outputs;
|
|
outputs.reserve(num);
|
|
for (auto &c_lt : c_outputs)
|
|
outputs.emplace_back(c_lt);
|
|
return outputs;
|
|
}
|
|
|
|
/// Returns the engine kind of the partition
|
|
///
|
|
/// @returns The engine kind
|
|
engine::kind get_engine_kind() const {
|
|
dnnl_engine_kind_t akind;
|
|
error::wrap_c_api(dnnl_graph_partition_get_engine_kind(get(), &akind),
|
|
"cannot get the engine kind from the partition");
|
|
|
|
return static_cast<engine::kind>(akind);
|
|
}
|
|
|
|
private:
|
|
compiled_partition compile_(const std::vector<logical_tensor> &inputs,
|
|
const std::vector<logical_tensor> &outputs, const engine &e) const {
|
|
std::vector<const dnnl_graph_logical_tensor_t *> c_inputs;
|
|
std::vector<const dnnl_graph_logical_tensor_t *> c_outputs;
|
|
|
|
c_inputs.reserve(inputs.size());
|
|
for (const auto &in : inputs) {
|
|
c_inputs.push_back(&(in.data));
|
|
}
|
|
|
|
c_outputs.reserve(outputs.size());
|
|
for (const auto &out : outputs) {
|
|
c_outputs.push_back(&(out.data));
|
|
}
|
|
|
|
dnnl_graph_compiled_partition_t cpartitions = nullptr;
|
|
error::wrap_c_api(
|
|
dnnl_graph_compiled_partition_create(&cpartitions, get()),
|
|
"could not create compiled_partition");
|
|
error::wrap_c_api(dnnl_graph_partition_compile(get(), cpartitions,
|
|
c_inputs.size(), c_inputs.data(),
|
|
c_outputs.size(), c_outputs.data(), e.get()),
|
|
"partition compile failed");
|
|
|
|
return compiled_partition(cpartitions);
|
|
}
|
|
};
|
|
|
|
/// @} dnnl_graph_api_partition
|
|
|
|
/// @addtogroup dnnl_graph_api_graph Graph
|
|
///
|
|
/// Graph represents a computational DAG with a set of operations.
|
|
/// #dnnl::graph::graph::add_op() adds an operation and its input and output
|
|
/// logical tensors into a graph. The library accumulates the operations and
|
|
/// logical tensors and constructs and validates the graph as an internal state.
|
|
/// A graph object is associated to a specific engine kind. The partitions
|
|
/// returned from the graph will inherit the engine kind of the graph.
|
|
///
|
|
/// @{
|
|
|
|
/// A graph object.
|
|
class graph : public graph_handle {
|
|
public:
|
|
/// Constructs a graph with an engine kind.
|
|
///
|
|
/// @param engine_kind Engine kind.
|
|
graph(engine::kind engine_kind) {
|
|
dnnl_graph_graph_t g = nullptr;
|
|
error::wrap_c_api(
|
|
dnnl_graph_graph_create(&g, convert_to_c(engine_kind)),
|
|
"could not create graph with engine kind");
|
|
reset(g);
|
|
}
|
|
|
|
/// Creates a new empty graph with an engine kind and a floating-point math
|
|
/// mode. All partitions returned from the graph will inherit the engine
|
|
/// kind and floating-point math mode.
|
|
///
|
|
/// Setting the floating-point math mode enables automatic down-conversion
|
|
/// of inputs for the given graph, promoting speedup by using
|
|
/// lower-precision data types when available.
|
|
///
|
|
/// @param engine_kind Engine kind.
|
|
/// @param mode Floating-point math mode.
|
|
graph(engine::kind engine_kind, fpmath_mode mode) {
|
|
dnnl_graph_graph_t g = nullptr;
|
|
error::wrap_c_api(
|
|
dnnl_graph_graph_create_with_fpmath_mode(
|
|
&g, convert_to_c(engine_kind), convert_to_c(mode)),
|
|
"could not create graph with engine kind and math mode");
|
|
reset(g);
|
|
}
|
|
|
|
/// Set the floating point math mode for a graph. Users can enforce the
|
|
/// graph to comply with the mode by specifying a boolean flag with the
|
|
/// setter function.
|
|
///
|
|
/// @param mode The floating-point math mode.
|
|
/// @param apply_to_int The flag that controls whether to use
|
|
/// floating-point arithmetic for integral operations.
|
|
void set_fpmath_mode(fpmath_mode mode, bool apply_to_int = false) {
|
|
error::wrap_c_api(dnnl_graph_graph_set_fpmath_mode(
|
|
get(), convert_to_c(mode), apply_to_int),
|
|
"could not set fpmath mode graph attribute");
|
|
}
|
|
|
|
/// Get the floating point math mode and the boolean flag that specifies
|
|
/// whether the graph will be enforced to comply the mode.
|
|
///
|
|
/// @param mode The floating-point math mode.
|
|
/// @param apply_to_int The flag that controls whether to use
|
|
/// floating-point arithmetic for integral operations.
|
|
void get_fpmath_mode(fpmath_mode &mode, bool &apply_to_int) const {
|
|
dnnl_fpmath_mode_t c_mode;
|
|
int c_apply_to_int;
|
|
|
|
error::wrap_c_api(dnnl_graph_graph_get_fpmath_mode(
|
|
get(), &c_mode, &c_apply_to_int),
|
|
"could not get fpmath mode graph attribute");
|
|
|
|
mode = fpmath_mode(c_mode);
|
|
apply_to_int = static_cast<bool>(c_apply_to_int);
|
|
}
|
|
|
|
/// Adds an op into the graph to construct a computational DAG. The API will
|
|
/// return failure if the operator has already been added to the graph or
|
|
/// the operation cannot pass the schema check in the library (eg. input and
|
|
/// output numbers and data types, the attributes of the operation, etc.).
|
|
///
|
|
/// @param op An operation to be added.
|
|
/// @param allow_exception A flag indicating whether the method is allowed
|
|
/// to throw an exception if it fails to add the op to the graph.
|
|
/// @returns #status::success or a status describing the error otherwise.
|
|
status add_op(const op &op, bool allow_exception = true) {
|
|
dnnl_status_t ret = dnnl_graph_add_op(get(), op.get());
|
|
|
|
if (allow_exception) {
|
|
error::wrap_c_api(ret, "could not add op to the graph");
|
|
}
|
|
|
|
return static_cast<status>(ret);
|
|
}
|
|
|
|
/// Finalizes a graph. It means users have finished adding operations into
|
|
/// the graph and the graph is ready for partitioning. Adding a new
|
|
/// operation into a finalized graph will return failures. Similarly,
|
|
/// partitioning on a un-finalized graph will also return failures.
|
|
void finalize() {
|
|
error::wrap_c_api(dnnl_graph_graph_finalize(get()),
|
|
"could not finalize the graph");
|
|
}
|
|
|
|
/// Checks if a graph is finalized.
|
|
///
|
|
/// @return True if the graph is finalized or false if the graph is not
|
|
/// finalized.
|
|
bool is_finalized() const {
|
|
uint8_t ret = 0;
|
|
error::wrap_c_api(dnnl_graph_graph_is_finalized(get(), &ret),
|
|
"could not get the finalization status of the graph");
|
|
|
|
return ret != 0;
|
|
}
|
|
|
|
/// Gets filtered partitions from a graph. Partitions will be claimed
|
|
/// internally according to the capability of the library, the engine kind
|
|
/// of the graph, and the policy.
|
|
///
|
|
/// @param policy Partition policy, defaults to policy
|
|
/// #dnnl::graph::partition::policy::fusion.
|
|
/// @return A vector storing the partitions.
|
|
std::vector<partition> get_partitions(
|
|
partition::policy policy = partition::policy::fusion) {
|
|
if (!is_finalized()) {
|
|
error::wrap_c_api(
|
|
dnnl_invalid_graph, "the graph is not finalized yet");
|
|
}
|
|
|
|
error::wrap_c_api(
|
|
dnnl_graph_graph_filter(get(),
|
|
static_cast<dnnl_graph_partition_policy_t>(policy)),
|
|
"could not filter the graph");
|
|
|
|
size_t num = 0;
|
|
error::wrap_c_api(dnnl_graph_graph_get_partition_num(get(), &num),
|
|
"could not get number of partitions from the graph");
|
|
|
|
// return early if there is no partitions in the graph.
|
|
if (num == 0) return {};
|
|
|
|
std::vector<partition> out_list;
|
|
out_list.reserve(num);
|
|
|
|
std::vector<dnnl_graph_partition_t> partitions(num);
|
|
error::wrap_c_api(
|
|
dnnl_graph_graph_get_partitions(get(), num, partitions.data()),
|
|
"could not get partitions from the graph");
|
|
|
|
for (auto p : partitions) {
|
|
out_list.emplace_back(p);
|
|
}
|
|
|
|
return out_list;
|
|
}
|
|
|
|
private:
|
|
static dnnl_fpmath_mode_t convert_to_c(fpmath_mode mode) {
|
|
return static_cast<dnnl_fpmath_mode_t>(mode);
|
|
}
|
|
|
|
static dnnl_engine_kind_t convert_to_c(engine::kind akind) {
|
|
return static_cast<dnnl_engine_kind_t>(akind);
|
|
}
|
|
};
|
|
|
|
/// @} dnnl_graph_api_graph
|
|
|
|
/// @addtogroup dnnl_graph_api_compiled_partition_cache Compiled Partition Cache
|
|
///
|
|
/// A set of functions that provide compiled partition cache control.
|
|
///
|
|
/// @{
|
|
|
|
/// Returns the number of compiled partition that can be held in the compiled
|
|
/// partition cache at the same time.
|
|
inline int get_compiled_partition_cache_capacity() {
|
|
int result = 0;
|
|
error::wrap_c_api(dnnl_graph_get_compiled_partition_cache_capacity(&result),
|
|
"could not get compiled partition cache capacity");
|
|
return result;
|
|
}
|
|
|
|
/// @copydoc dnnl_graph_set_compiled_partition_cache_capacity(int capacity)
|
|
inline void set_compiled_partition_cache_capacity(int capacity) {
|
|
error::wrap_c_api(
|
|
dnnl_graph_set_compiled_partition_cache_capacity(capacity),
|
|
"could not set compiled partition cache capacity");
|
|
}
|
|
|
|
/// @} dnnl_graph_api_compiled_partition_cache
|
|
|
|
/// @addtogroup dnnl_graph_api_constant_tensor_cache Constant Tensor Cache
|
|
///
|
|
/// A set of functions that provide constant tensor cache control
|
|
///
|
|
/// @{
|
|
|
|
/// Control the enabling or disabling of constant tensor cache. This API must be
|
|
/// called once before compilation stage. By default, constant tensor cache is
|
|
/// disabled in the library.
|
|
/// @note This API is deprecated and will be removed in future release, please
|
|
/// use the set_constant_tensor_cache_capacity API to disable
|
|
/// constant tensor cache by setting it's capacity to zero.
|
|
///
|
|
/// @param flag Set to positive value to enable the cache and set to 0 to
|
|
/// disable the cache. Negative values are invalid.
|
|
inline void set_constant_tensor_cache(int flag) {
|
|
error::wrap_c_api(dnnl_graph_set_constant_tensor_cache(flag),
|
|
"fail to set constant tensor cache");
|
|
}
|
|
|
|
/// Return the enabling status of constant tensor cache.
|
|
/// @note This API is deprecated and will be removed in future release, please
|
|
/// use the get_constant_tensor_cache_capacity API to check the
|
|
/// enabling status by checking it's capacity.
|
|
inline int get_constant_tensor_cache() {
|
|
int result = 0;
|
|
error::wrap_c_api(dnnl_graph_get_constant_tensor_cache(&result),
|
|
"fail to get constant tensor cache");
|
|
return result;
|
|
}
|
|
|
|
/// Control the capacity for the constant tensor cache that used for specific
|
|
/// engine kind. This API is thread safe and can be called multiple times at
|
|
/// runtime. The capacity is set to zero by default which means the cache is
|
|
/// disabled. When calling this API, the corresponding cache will be flushed.
|
|
/// Setting capacity to 0 means to clear all cached tensors and disable cache.
|
|
/// Once the capacity limit is reached, no new tensors will be cached. If there
|
|
/// are multiple devices for an engine kind, the capacity set here is for each
|
|
/// device.
|
|
///
|
|
/// @param kind The engine kind that the constant tensor cache used for.
|
|
/// @param size The constant tensor cache capacity size to set.
|
|
inline void set_constant_tensor_cache_capacity(engine::kind kind, size_t size) {
|
|
error::wrap_c_api(dnnl_graph_set_constant_tensor_cache_capacity(
|
|
static_cast<dnnl_engine_kind_t>(kind), size),
|
|
"fail to set constant tensor cache capacity");
|
|
}
|
|
|
|
/// Return the current capacity of constant tensor cache.
|
|
///
|
|
/// @param kind The engine kind that the constant tensor cache used for.
|
|
inline size_t get_constant_tensor_cache_capacity(engine::kind kind) {
|
|
size_t size = 0;
|
|
error::wrap_c_api(dnnl_graph_get_constant_tensor_cache_capacity(
|
|
static_cast<dnnl_engine_kind_t>(kind), &size),
|
|
"fail to get constant tensor cache capacity");
|
|
return size;
|
|
}
|
|
|
|
/// @} dnnl_graph_api_constant_tensor_cache
|
|
|
|
} // namespace graph
|
|
|
|
/// @} dnnl_graph_api
|
|
|
|
} // namespace dnnl
|
|
|
|
/// @cond DO_NOT_DOCUMENT_THIS
|
|
|
|
/// oneAPI namespace
|
|
// Contains the oneapi::dnnl namespace as an alias to the ::dnnl namespace.
|
|
namespace oneapi {
|
|
// Note: without this guard, doxygen warns of potentially recursive namespace
|
|
#ifndef DOXYGEN_SHOULD_SKIP_THIS
|
|
/// oneDNN alias namespace
|
|
namespace dnnl = ::dnnl;
|
|
#endif
|
|
} // namespace oneapi
|
|
|
|
/// @endcond
|
|
|
|
/// @} dnnl_api
|
|
|
|
// NOLINTEND(readability-identifier-naming)
|
|
#endif /* ONEAPI_DNNL_DNNL_GRAPH_HPP */
|