mirror of
https://github.com/uxlfoundation/oneDNN.git
synced 2025-10-20 10:03:50 +08:00
common: promote sparse functionality
This commit is contained in:
3
.github/automation/x64/build_linters.sh
vendored
3
.github/automation/x64/build_linters.sh
vendored
@ -13,7 +13,6 @@ if [[ "$ONEDNN_ACTION" == "configure" ]]; then
|
||||
-DCMAKE_BUILD_TYPE=debug \
|
||||
-DONEDNN_BUILD_GRAPH=ON \
|
||||
-DDNNL_EXPERIMENTAL=ON \
|
||||
-DDNNL_EXPERIMENTAL_SPARSE=ON \
|
||||
-DDNNL_EXPERIMENTAL_PROFILING=ON \
|
||||
-DDNNL_EXPERIMENTAL_UKERNEL=ON \
|
||||
-DONEDNN_EXPERIMENTAL_LOGGING=ON \
|
||||
@ -24,7 +23,7 @@ if [[ "$ONEDNN_ACTION" == "configure" ]]; then
|
||||
set +x
|
||||
elif [[ "$GITHUB_JOB" == "pr-format-tags" ]]; then
|
||||
set -x
|
||||
cmake -B../build -S. -DONEDNN_BUILD_GRAPH=OFF -DDNNL_EXPERIMENTAL_SPARSE=ON
|
||||
cmake -B../build -S. -DONEDNN_BUILD_GRAPH=OFF
|
||||
set +x
|
||||
else
|
||||
echo "Unknown linter job: $GITHUB_JOB"
|
||||
|
@ -1,5 +1,5 @@
|
||||
#===============================================================================
|
||||
# Copyright 2021-2024 Intel Corporation
|
||||
# Copyright 2021-2025 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@ -34,7 +34,6 @@ endmacro()
|
||||
|
||||
set(COMPAT_CACHE_BOOL_VARS
|
||||
"EXPERIMENTAL"
|
||||
"EXPERIMENTAL_SPARSE"
|
||||
"EXPERIMENTAL_UKERNEL"
|
||||
"EXPERIMENTAL_LOGGING"
|
||||
"VERBOSE"
|
||||
|
@ -203,11 +203,6 @@ option(DNNL_EXPERIMENTAL
|
||||
using environment variables."
|
||||
OFF) # disabled by default
|
||||
|
||||
option(DNNL_EXPERIMENTAL_SPARSE
|
||||
"Enable experimental functionality for sparse domain. This option works
|
||||
independently from DNNL_EXPERIMENTAL."
|
||||
OFF) # disabled by default
|
||||
|
||||
option(DNNL_EXPERIMENTAL_UKERNEL
|
||||
"Enable experimental functionality for ukernels. This option works
|
||||
independently from DNNL_EXPERIMENTAL."
|
||||
|
@ -1,5 +1,5 @@
|
||||
#===============================================================================
|
||||
# Copyright 2016-2022 Intel Corporation
|
||||
# Copyright 2016-2025 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@ -1962,7 +1962,7 @@ INCLUDE_FILE_PATTERNS =
|
||||
# recursively expanded use the := operator instead of the = operator.
|
||||
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
|
||||
|
||||
PREDEFINED = DOXYGEN_SHOULD_SKIP_THIS DNNL_GPU_RUNTIME=DNNL_RUNTIME_OCL DNNL_WITH_SYCL DNNL_USE_SYCL_BUFFERS DNNL_EXPERIMENTAL_SPARSE DNNL_EXPERIMENTAL_UKERNEL DNNL_EXPERIMENTAL_LOGGING
|
||||
PREDEFINED = DOXYGEN_SHOULD_SKIP_THIS DNNL_GPU_RUNTIME=DNNL_RUNTIME_OCL DNNL_WITH_SYCL DNNL_USE_SYCL_BUFFERS DNNL_EXPERIMENTAL_UKERNEL DNNL_EXPERIMENTAL_LOGGING
|
||||
|
||||
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
|
||||
# tag can be used to specify a list of macro names that should be expanded. The
|
||||
|
@ -27,249 +27,12 @@ Both kinds of experimental features can be enabled simultaneously.
|
||||
|
||||
| Build time option | Description |
|
||||
|:-------------------------------------------|:-------------------------------------------------------------------|
|
||||
| ONEDNN_EXPERIMENTAL_SPARSE | Enable experimental API and functionality for sparse domain. |
|
||||
| ONEDNN_EXPERIMENTAL_UKERNEL | Enable experimental microkernel APIs and functionalities. |
|
||||
| ONEDNN_EXPERIMENTAL_PROFILING | Enable experimental profiling API. |
|
||||
| ONEDNN_EXPERIMENTAL_LOGGING | Enable experimental logging support for oneDNN verbose mode. |
|
||||
|
||||
## Features details
|
||||
|
||||
### ONEDNN_EXPERIMENTAL_SPARSE
|
||||
This option extends the existing API and adds a new one to support sparse
|
||||
functionality in oneDNN.
|
||||
|
||||
#### API
|
||||
|
||||
The main change is in oneDNN memory object semantics. Now, the memory object can
|
||||
have multiple underlying buffers. In the case of regular dense computations, the
|
||||
memory object always contains a single buffer. But in the case of sparse
|
||||
computations, the memory object always contains one buffer for values and an
|
||||
arbitrary number of additional buffers for metadata.
|
||||
|
||||
The underlying buffers are enumerated starting with 0, meaning that each buffer
|
||||
has its own number. The buffer with values always has index 0.
|
||||
|
||||
In most cases, the API that works with underlying buffers takes a buffer index. The
|
||||
exception is the API for creating a memory object. In that case, the API takes a vector
|
||||
of buffers. The order of the buffers in the vector matters and should correspond to
|
||||
the buffers' indices.
|
||||
|
||||
oneDNN also introduces a new format kind dnnl::memory::format_kind::sparse.
|
||||
Sparse encoding (a.k.a. sparse format) is an enumeration type that specifies
|
||||
how data is encoded. Currently, oneDNN supports Compressed Sparse Row (CSR),
|
||||
Sorted Co-ordinate (COO) Sparse Format, and PACKED sparse encodings
|
||||
(dnnl::memory::sparse_encoding::csr, dnnl::memory::sparse_encoding::coo,
|
||||
dnnl::memory::sparse_encoding::packed) for CPU engine, and, only sorted
|
||||
COO (Co-ordinate Sparse Format) for GPU engine.
|
||||
|
||||
The memory descriptor has dedicated static member functions for creating memory
|
||||
descriptors for different sparse encodings.
|
||||
|
||||
Each encoding defines the number and meaning of the buffers.
|
||||
|
||||
| Sparse encoding | Buffers |
|
||||
|:----------------|:---------------------------------------------------------------------------|
|
||||
| CSR | 0 - values, 1 - indices, 2 - pointers |
|
||||
| Sorted COO | 0 - values, 1 to *ndims* - indices (*ndims* - number of tensor dimensions) |
|
||||
| PACKED | The meaning and content are unspecified |
|
||||
|
||||
The pseudocode below demonstrates how to create a memory object
|
||||
for the CSR and COO sparse encodings and use the new API to work with the
|
||||
underlying handles.
|
||||
|
||||
###### CSR Encoding:
|
||||
~~~cpp
|
||||
using namespace dnnl;
|
||||
const memory::dim M = 4, N = 6;
|
||||
const memory::dim nnz = 5;
|
||||
const auto values_dt = memory::data_type::f32;
|
||||
const auto indices_dt = memory::data_type::s32;
|
||||
const auto pointers_dt = memory::data_type::s32;
|
||||
|
||||
// Create a memory descriptor for CSR sparse encoding.
|
||||
const auto csr_md = memory::desc::csr(
|
||||
{M, N}, // Dimensions
|
||||
values_dt, // Data type of values
|
||||
nnz, // Number of non-zero entries
|
||||
indices_dt, // Data type of indices (metadata)
|
||||
pointers_dt); // Data type of pointers (metadata)
|
||||
|
||||
// A sparse matrix represented in the CSR format.
|
||||
std::vector<float> csr_values = {2.5f, 1.5f, 1.5f, 2.5f, 2.0f};
|
||||
std::vector<int32_t> csr_indices = {0, 2, 0, 5, 1};
|
||||
std::vector<int32_t> csr_pointers = {0, 1, 2, 4, 5, 5};
|
||||
|
||||
// Create a memory object for the given buffers with values and metadata.
|
||||
memory csr_mem(csr_md, engine, {
|
||||
csr_values.data(), // Buffer with values
|
||||
csr_indices.data(), // Buffer with indices (metadata)
|
||||
csr_pointers.data() // Buffer with pointers (metadata)
|
||||
});
|
||||
|
||||
const auto values_sz = csr_mem.get_size(0);
|
||||
const auto indices_sz = csr_mem.get_size(1);
|
||||
const auto pointers_sz = csr_mem.get_size(2);
|
||||
|
||||
assert(values_sz == csr_values.size() * sizeof(float));
|
||||
assert(indices_sz == csr_indices.size() * sizeof(int32_t));
|
||||
assert(pointers_sz == csr_pointers.size() * sizeof(int32_t));
|
||||
|
||||
void *values_handle = csr_mem.get_data_handle(0);
|
||||
void *indices_handle = csr_mem.get_data_handle(1);
|
||||
void *pointers_handle = csr_mem.get_data_handle(2);
|
||||
|
||||
assert(values_handle == (void *)csr_values.data());
|
||||
assert(indices_handle == (void *)csr_indices.data());
|
||||
assert(pointers_handle == (void *)csr_pointers.data());
|
||||
~~~
|
||||
|
||||
###### Sorted COO Encoding:
|
||||
~~~cpp
|
||||
using namespace dnnl;
|
||||
const memory::dim M = 4, N = 6;
|
||||
const memory::dim nnz = 5;
|
||||
const auto values_dt = memory::data_type::f32;
|
||||
const auto indices_dt = memory::data_type::s32;
|
||||
|
||||
// Create a memory descriptor for COO sparse encoding.
|
||||
const auto coo_md = memory::desc::coo(
|
||||
{M, N}, // Dimensions
|
||||
values_dt, // Data type of values
|
||||
nnz, // Number of non-zero entries
|
||||
indices_dt); // Data type of indices (metadata)
|
||||
|
||||
// A sparse matrix represented in the COO format.
|
||||
std::vector<float> coo_values = {2.5f, 1.5f, 1.5f, 2.5f, 2.0f};
|
||||
std::vector<int32_t> coo_row_indices = {0, 1, 2, 2, 3};
|
||||
std::vector<int32_t> coo_col_indices = {0, 2, 0, 5, 1};
|
||||
|
||||
// Create a memory object for the given buffers with values and metadata.
|
||||
memory coo_mem(coo_md, engine, {
|
||||
coo_values.data(), // Buffer with values
|
||||
coo_row_indices.data(), // Buffer with row indices (metadata)
|
||||
coo_col_indices.data() // Buffer with column indices (metadata)
|
||||
});
|
||||
|
||||
const auto values_sz = coo_mem.get_size(0);
|
||||
const auto indices_sz = coo_mem.get_size(1);
|
||||
|
||||
assert(values_sz == coo_values.size() * sizeof(float));
|
||||
assert(indices_sz == coo_row_indices.size() * sizeof(int32_t));
|
||||
assert(indices_sz == coo_col_indices.size() * sizeof(int32_t));
|
||||
|
||||
void *values_handle = coo_mem.get_data_handle(0);
|
||||
void *row_indices_handle = coo_mem.get_data_handle(1);
|
||||
void *col_indices_handle = coo_mem.get_data_handle(2);
|
||||
|
||||
assert(values_handle == (void *)coo_values.data());
|
||||
assert(row_indices_handle == (void *)coo_row_indices.data());
|
||||
assert(col_indices_handle == (void *)coo_col_indices.data());
|
||||
~~~
|
||||
|
||||
A memory descriptor created for the sparse encoding PACKED cannot
|
||||
be used to create a memory object. It can only be used to create
|
||||
a primitive descriptor to query the actual memory descriptor
|
||||
(similar to the format tag `any`).
|
||||
|
||||
#### Primitives
|
||||
|
||||
##### Matrix Multiplication
|
||||
|
||||
This option enables the matmul primitive that can work with
|
||||
sparse input tensors.
|
||||
|
||||
###### CSR encoding
|
||||
Supported only for the CPU engine. Only one of the input tensors can be sparse.
|
||||
The output tensor is always dense.
|
||||
|
||||
The following data type combinations are supported:
|
||||
|
||||
| Values (src, weight, dst) | Indices |
|
||||
|:----------------------------|:---------|
|
||||
| f16, f16, f16 | s32 |
|
||||
| f32, f32, f32 | s32 |
|
||||
|
||||
The following format tags are supported for dense input/output
|
||||
tensors:
|
||||
|
||||
* ab
|
||||
|
||||
See the example [here](@ref cpu_matmul_csr_cpp).
|
||||
|
||||
Benchdnn can be used to test matmul with a CSR input tensor as follows:
|
||||
`./benchdnn --matmul --encoding=csr+0.99:: --wtag=ab --dtag=ab 4x1000000:1000000x128`
|
||||
|
||||
For the case above, the number of non-zero elements for the source tensor is
|
||||
calculated as max(4 * 1000000 * (1 - 0.99), 1).
|
||||
|
||||
###### COO encoding
|
||||
Supported only for the CPU and GPU engines. Only one of the input tensors can
|
||||
be sparse. The output tensor is always dense.
|
||||
|
||||
The following data type combinations are supported:
|
||||
|
||||
| Values (src, weight, dst) | Indices |
|
||||
|:----------------------------|:---------|
|
||||
| f16, f16, f16 | s32 |
|
||||
| f32, f32, f32 | s32 |
|
||||
|
||||
The following format tags are supported for dense weights tensor:
|
||||
|
||||
* ab
|
||||
* ba
|
||||
|
||||
The following format tags are supported for dense destination tensor:
|
||||
|
||||
* ab
|
||||
|
||||
See the example [here](@ref cpu_matmul_coo_cpp).
|
||||
|
||||
Benchdnn can be used to test matmul with a COO input tensor as follows:
|
||||
`./benchdnn --matmul --encoding=coo+0.99:: --wtag=ab --dtag=ab 4x1000000:1000000x128`
|
||||
|
||||
For the case above, the number of non-zero elements for the source tensor is
|
||||
calculated as max(4 * 1000000 * (1 - 0.99), 1).
|
||||
|
||||
###### PACKED encoding
|
||||
|
||||
Only the weights tensor is allowed to be sparse. The other tensors
|
||||
are always dense.
|
||||
|
||||
In general, it is expected that all matmul related functionality (e.g. post-ops,
|
||||
scales, zero-points, etc) that is supported for the dense weights should
|
||||
also work for the sparse weights.
|
||||
|
||||
Currently, matmul has the following limitations for the PACKED encoding:
|
||||
* Supported only for the CPU engine
|
||||
* Only Intel Advanced Matrix Extensions (Intel AMX) instruction set
|
||||
architecture (ISA) is supported
|
||||
* Only `s8` data type for the weights is supported
|
||||
* Only 1 batch dimension is supported
|
||||
|
||||
See the example [here](@ref cpu_matmul_weights_compression_cpp).
|
||||
|
||||
Benchdnn can be used to test matmul with the PACKED weights tensor as follows:
|
||||
`./benchdnn --matmul --dt=s8:s8:s32 --encoding=:packed+0.99: 3x512x1024:1x1024x512`
|
||||
|
||||
For the case above, the number of non-zero elements for the weights tensor is
|
||||
calculated as max(1024 * 512 * (1 - 0.99), 1).
|
||||
|
||||
##### Reorder
|
||||
|
||||
Currently, there is only one reorder for packing a dense tensor, i.e. converting
|
||||
a dense tensor that is in `ab` format to a sparse tensor that is encoded with
|
||||
the `PACKED` encoding.
|
||||
|
||||
In general, it is expected that all reorder-related functionality
|
||||
(e.g. scales, zero-points, etc) that is supported for the dense
|
||||
destination tensor should also work for the sparse one.
|
||||
|
||||
#### Common Limitations
|
||||
* The interoperability API to get/set data handles is not supported. Use the
|
||||
runtime agnostic API to do that.
|
||||
* Sparse memory and memory descriptor can only be used with the Matrix
|
||||
Multiplication and Reorder primitives.
|
||||
|
||||
### ONEDNN_EXPERIMENTAL_UKERNEL
|
||||
|
||||
This option enables a new set of CPU-only APIs to support block-level
|
||||
|
121
doc/advanced/sparsity.md
Normal file
121
doc/advanced/sparsity.md
Normal file
@ -0,0 +1,121 @@
|
||||
Sparse memory formats {#dev_guide_sparsity}
|
||||
===============================================
|
||||
|
||||
# API
|
||||
|
||||
oneDNN support format kind dnnl::memory::format_kind::sparse to describe sparse tensors.
|
||||
Sparse encoding (a.k.a. sparse format) is an enumeration type that specifies
|
||||
how data is encoded. Currently, oneDNN supports Compressed Sparse Row (CSR),
|
||||
Sorted Co-ordinate (COO) Sparse Format, and PACKED sparse encodings
|
||||
(dnnl::memory::sparse_encoding::csr, dnnl::memory::sparse_encoding::coo,
|
||||
dnnl::memory::sparse_encoding::packed) for CPU engine, and, only sorted
|
||||
COO (Co-ordinate Sparse Format) for GPU engine.
|
||||
|
||||
The memory descriptor has dedicated static member functions for creating memory
|
||||
descriptors for different sparse encodings.
|
||||
|
||||
Each encoding defines the number and meaning of the buffers.
|
||||
|
||||
| Sparse encoding | Buffers |
|
||||
|:----------------|:---------------------------------------------------------------------------|
|
||||
| CSR | 0 - values, 1 - indices, 2 - pointers |
|
||||
| Sorted COO | 0 - values, 1 to *ndims* - indices (*ndims* - number of tensor dimensions) |
|
||||
| PACKED | The meaning and content are unspecified |
|
||||
|
||||
The pseudocode below demonstrates how to create a memory object
|
||||
for the CSR and COO sparse encodings and use the new API to work with the
|
||||
underlying handles.
|
||||
|
||||
# CSR Encoding:
|
||||
~~~cpp
|
||||
using namespace dnnl;
|
||||
const memory::dim M = 4, N = 6;
|
||||
const memory::dim nnz = 5;
|
||||
const auto values_dt = memory::data_type::f32;
|
||||
const auto indices_dt = memory::data_type::s32;
|
||||
const auto pointers_dt = memory::data_type::s32;
|
||||
|
||||
// Create a memory descriptor for CSR sparse encoding.
|
||||
const auto csr_md = memory::desc::csr(
|
||||
{M, N}, // Dimensions
|
||||
values_dt, // Data type of values
|
||||
nnz, // Number of non-zero entries
|
||||
indices_dt, // Data type of indices (metadata)
|
||||
pointers_dt); // Data type of pointers (metadata)
|
||||
|
||||
// A sparse matrix represented in the CSR format.
|
||||
std::vector<float> csr_values = {2.5f, 1.5f, 1.5f, 2.5f, 2.0f};
|
||||
std::vector<int32_t> csr_indices = {0, 2, 0, 5, 1};
|
||||
std::vector<int32_t> csr_pointers = {0, 1, 2, 4, 5, 5};
|
||||
|
||||
// Create a memory object for the given buffers with values and metadata.
|
||||
memory csr_mem(csr_md, engine, {
|
||||
csr_values.data(), // Buffer with values
|
||||
csr_indices.data(), // Buffer with indices (metadata)
|
||||
csr_pointers.data() // Buffer with pointers (metadata)
|
||||
});
|
||||
|
||||
const auto values_sz = csr_mem.get_size(0);
|
||||
const auto indices_sz = csr_mem.get_size(1);
|
||||
const auto pointers_sz = csr_mem.get_size(2);
|
||||
|
||||
assert(values_sz == csr_values.size() * sizeof(float));
|
||||
assert(indices_sz == csr_indices.size() * sizeof(int32_t));
|
||||
assert(pointers_sz == csr_pointers.size() * sizeof(int32_t));
|
||||
|
||||
void *values_handle = csr_mem.get_data_handle(0);
|
||||
void *indices_handle = csr_mem.get_data_handle(1);
|
||||
void *pointers_handle = csr_mem.get_data_handle(2);
|
||||
|
||||
assert(values_handle == (void *)csr_values.data());
|
||||
assert(indices_handle == (void *)csr_indices.data());
|
||||
assert(pointers_handle == (void *)csr_pointers.data());
|
||||
~~~
|
||||
|
||||
# Sorted COO Encoding:
|
||||
~~~cpp
|
||||
using namespace dnnl;
|
||||
const memory::dim M = 4, N = 6;
|
||||
const memory::dim nnz = 5;
|
||||
const auto values_dt = memory::data_type::f32;
|
||||
const auto indices_dt = memory::data_type::s32;
|
||||
|
||||
// Create a memory descriptor for COO sparse encoding.
|
||||
const auto coo_md = memory::desc::coo(
|
||||
{M, N}, // Dimensions
|
||||
values_dt, // Data type of values
|
||||
nnz, // Number of non-zero entries
|
||||
indices_dt); // Data type of indices (metadata)
|
||||
|
||||
// A sparse matrix represented in the COO format.
|
||||
std::vector<float> coo_values = {2.5f, 1.5f, 1.5f, 2.5f, 2.0f};
|
||||
std::vector<int32_t> coo_row_indices = {0, 1, 2, 2, 3};
|
||||
std::vector<int32_t> coo_col_indices = {0, 2, 0, 5, 1};
|
||||
|
||||
// Create a memory object for the given buffers with values and metadata.
|
||||
memory coo_mem(coo_md, engine, {
|
||||
coo_values.data(), // Buffer with values
|
||||
coo_row_indices.data(), // Buffer with row indices (metadata)
|
||||
coo_col_indices.data() // Buffer with column indices (metadata)
|
||||
});
|
||||
|
||||
const auto values_sz = coo_mem.get_size(0);
|
||||
const auto indices_sz = coo_mem.get_size(1);
|
||||
|
||||
assert(values_sz == coo_values.size() * sizeof(float));
|
||||
assert(indices_sz == coo_row_indices.size() * sizeof(int32_t));
|
||||
assert(indices_sz == coo_col_indices.size() * sizeof(int32_t));
|
||||
|
||||
void *values_handle = coo_mem.get_data_handle(0);
|
||||
void *row_indices_handle = coo_mem.get_data_handle(1);
|
||||
void *col_indices_handle = coo_mem.get_data_handle(2);
|
||||
|
||||
assert(values_handle == (void *)coo_values.data());
|
||||
assert(row_indices_handle == (void *)coo_row_indices.data());
|
||||
assert(col_indices_handle == (void *)coo_col_indices.data());
|
||||
~~~
|
||||
|
||||
A memory descriptor created for the sparse encoding PACKED cannot
|
||||
be used to create a memory object. It can only be used to create
|
||||
a primitive descriptor to query the actual memory descriptor
|
||||
(similar to the format tag `any`).
|
@ -171,6 +171,87 @@ memory buffer that shares its shape with the destination buffer).
|
||||
|
||||
@note Please check tutorials below to see run-time attributes in use.
|
||||
|
||||
### Sparsity
|
||||
|
||||
#### CSR encoding
|
||||
Supported only for the CPU engine. Only one of the input tensors can be sparse.
|
||||
The output tensor is always dense.
|
||||
|
||||
The following data type combinations are supported:
|
||||
|
||||
| Values (src, weight, dst) | Indices |
|
||||
|:----------------------------|:---------|
|
||||
| f16, f16, f16 | s32 |
|
||||
| f32, f32, f32 | s32 |
|
||||
|
||||
The following format tags are supported for dense input/output
|
||||
tensors:
|
||||
|
||||
* ab
|
||||
|
||||
See the example [here](@ref cpu_matmul_csr_cpp).
|
||||
|
||||
Benchdnn can be used to test matmul with a CSR input tensor as follows:
|
||||
`./benchdnn --matmul --encoding=csr+0.99:: --wtag=ab --dtag=ab 4x1000000:1000000x128`
|
||||
|
||||
For the case above, the number of non-zero elements for the source tensor is
|
||||
calculated as max(4 * 1000000 * (1 - 0.99), 1).
|
||||
|
||||
#### COO encoding
|
||||
Supported only for the CPU and GPU engines. Only one of the input tensors can
|
||||
be sparse. The output tensor is always dense.
|
||||
|
||||
The following data type combinations are supported:
|
||||
|
||||
| Values (src, weight, dst) | Indices |
|
||||
|:----------------------------|:---------|
|
||||
| f16, f16, f16 | s32 |
|
||||
| f32, f32, f32 | s32 |
|
||||
|
||||
The following format tags are supported for dense weights tensor:
|
||||
|
||||
* ab
|
||||
* ba
|
||||
|
||||
The following format tags are supported for dense destination tensor:
|
||||
|
||||
* ab
|
||||
|
||||
See the example [here](@ref cpu_matmul_coo_cpp).
|
||||
|
||||
Benchdnn can be used to test matmul with a COO input tensor as follows:
|
||||
`./benchdnn --matmul --encoding=coo+0.99:: --wtag=ab --dtag=ab 4x1000000:1000000x128`
|
||||
|
||||
For the case above, the number of non-zero elements for the source tensor is
|
||||
calculated as max(4 * 1000000 * (1 - 0.99), 1).
|
||||
|
||||
#### PACKED encoding
|
||||
|
||||
Only the weights tensor is allowed to be sparse. The other tensors
|
||||
are always dense.
|
||||
|
||||
In general, it is expected that all matmul related functionality (e.g. post-ops,
|
||||
scales, zero-points, etc) that is supported for the dense weights should
|
||||
also work for the sparse weights.
|
||||
|
||||
Currently, matmul has the following limitations for the PACKED encoding:
|
||||
* Supported only for the CPU engine
|
||||
* Only Intel Advanced Matrix Extensions (Intel AMX) instruction set
|
||||
architecture (ISA) is supported
|
||||
* Only `s8` data type for the weights is supported
|
||||
* Only 1 batch dimension is supported
|
||||
|
||||
See the example [here](@ref cpu_matmul_weights_compression_cpp).
|
||||
|
||||
Benchdnn can be used to test matmul with the PACKED weights tensor as follows:
|
||||
`./benchdnn --matmul --dt=s8:s8:s32 --encoding=:packed+0.99: 3x512x1024:1x1024x512`
|
||||
|
||||
For the case above, the number of non-zero elements for the weights tensor is
|
||||
calculated as max(1024 * 512 * (1 - 0.99), 1).
|
||||
|
||||
Refer to [Sparsity Advanced Topic](@ref dev_guide_sparsity) page for more
|
||||
information on sparse encding.
|
||||
|
||||
## Implementation Limitations
|
||||
|
||||
1. Check @ref dev_guide_data_types.
|
||||
|
@ -128,6 +128,25 @@ would lead to the following operation:
|
||||
multiplication of tensor values by a scale value. Using \f$scale_{dst}\f$
|
||||
argument will lead to division of tensor values by a scale value.
|
||||
|
||||
### Sparsity
|
||||
|
||||
Currently, there is only one reorder for packing a dense tensor, i.e. converting
|
||||
a dense tensor that is in `ab` format to a sparse tensor that is encoded with
|
||||
the `PACKED` encoding.
|
||||
|
||||
In general, it is expected that all reorder-related functionality
|
||||
(e.g. scales, zero-points, etc) that is supported for the dense
|
||||
destination tensor should also work for the sparse one.
|
||||
|
||||
Common Limitations
|
||||
* The interoperability API to get/set data handles is not supported. Use the
|
||||
runtime agnostic API to do that.
|
||||
* Sparse memory and memory descriptor can only be used with the Matrix
|
||||
Multiplication and Reorder primitives.
|
||||
|
||||
Refer to [Sparsity Advanced Topic](@ref dev_guide_sparsity) page for more
|
||||
information on sparse encding.
|
||||
|
||||
## Implementation Limitations
|
||||
|
||||
1. Refer to @ref dev_guide_data_types for limitations related to data types
|
||||
|
@ -10,4 +10,5 @@ Advanced Topics
|
||||
dev_guide_primitive_cache
|
||||
dev_guide_persistent_cache
|
||||
dev_guide_threadpool
|
||||
dev_guide_sparsity
|
||||
dev_guide_experimental
|
||||
|
@ -55,12 +55,6 @@ append_host_compiler_options(CMAKE_CXX_FLAGS "${DPCPP_CXX_NOWARN_FLAGS}")
|
||||
file(GLOB_RECURSE sources *.cpp *.c)
|
||||
file(GLOB_RECURSE headers *.hpp *.h)
|
||||
|
||||
if(NOT DNNL_EXPERIMENTAL_SPARSE)
|
||||
list(REMOVE_ITEM sources ${CMAKE_CURRENT_SOURCE_DIR}/cpu_matmul_csr.cpp)
|
||||
list(REMOVE_ITEM sources ${CMAKE_CURRENT_SOURCE_DIR}/cpu_matmul_coo.cpp)
|
||||
list(REMOVE_ITEM sources ${CMAKE_CURRENT_SOURCE_DIR}/cpu_matmul_weights_compression.cpp)
|
||||
endif()
|
||||
|
||||
if(NOT DNNL_EXPERIMENTAL_UKERNEL)
|
||||
list(REMOVE_ITEM sources ${CMAKE_CURRENT_SOURCE_DIR}/ukernels/cpu_brgemm.cpp)
|
||||
endif()
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2016-2024 Intel Corporation
|
||||
* Copyright 2016-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -881,7 +881,6 @@ dnnl_status_t DNNL_API dnnl_memory_desc_create_with_tag(
|
||||
dnnl_memory_desc_t *memory_desc, int ndims, const dnnl_dims_t dims,
|
||||
dnnl_data_type_t data_type, dnnl_format_tag_t tag);
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
/// Creates a memory descriptor for CSR encoding.
|
||||
///
|
||||
/// @param memory_desc Output memory descriptor.
|
||||
@ -940,10 +939,10 @@ dnnl_status_t DNNL_API dnnl_memory_desc_create_with_coo_encoding(
|
||||
/// @param nnz Number of non-zero entries.
|
||||
/// @returns #dnnl_success on success and a status describing the error
|
||||
/// otherwise.
|
||||
/// @sa @ref dev_guide_sparsity
|
||||
dnnl_status_t DNNL_API dnnl_memory_desc_create_with_packed_encoding(
|
||||
dnnl_memory_desc_t *memory_desc, int ndims, const dnnl_dims_t dims,
|
||||
dnnl_data_type_t data_type, dnnl_dim_t nnz);
|
||||
#endif
|
||||
|
||||
/// Creates a memory descriptor for a region inside an area
|
||||
/// described by an existing memory descriptor.
|
||||
@ -1109,7 +1108,6 @@ dnnl_status_t DNNL_API dnnl_memory_desc_permute_axes(
|
||||
dnnl_status_t DNNL_API dnnl_memory_desc_query(
|
||||
const_dnnl_memory_desc_t memory_desc, dnnl_query_t what, void *result);
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
/// Queries a memory descriptor for various pieces of information. This version
|
||||
/// support additional queries #dnnl_query_sparse_encoding, #dnnl_query_nnz_s64
|
||||
/// #dnnl_query_num_handles_s32 and #dnnl_query_data_type for a particular
|
||||
@ -1164,10 +1162,10 @@ dnnl_status_t DNNL_API dnnl_memory_desc_query(
|
||||
/// it must be a @c dnnl_dims_t** if querying for a strides.
|
||||
/// @returns #dnnl_success on success and a status describing the error
|
||||
/// otherwise.
|
||||
/// @sa @ref dev_guide_sparsity
|
||||
dnnl_status_t DNNL_API dnnl_memory_desc_query_v2(
|
||||
const_dnnl_memory_desc_t memory_desc, dnnl_query_t what, int index,
|
||||
void *result);
|
||||
#endif
|
||||
|
||||
/// Compares two memory descriptors.
|
||||
///
|
||||
@ -1188,7 +1186,6 @@ int DNNL_API dnnl_memory_desc_equal(
|
||||
/// descriptor.
|
||||
size_t DNNL_API dnnl_memory_desc_get_size(const_dnnl_memory_desc_t memory_desc);
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
/// Returns the size of the data that corresponds to the given index.
|
||||
///
|
||||
/// @param memory_desc Memory descriptor.
|
||||
@ -1197,7 +1194,6 @@ size_t DNNL_API dnnl_memory_desc_get_size(const_dnnl_memory_desc_t memory_desc);
|
||||
/// @returns The number of bytes required for the requested data.
|
||||
size_t DNNL_API dnnl_memory_desc_get_size_v2(
|
||||
const_dnnl_memory_desc_t memory_desc, int index);
|
||||
#endif
|
||||
|
||||
/// Returns the size of data type.
|
||||
///
|
||||
@ -1229,7 +1225,6 @@ dnnl_status_t DNNL_API dnnl_memory_create(dnnl_memory_t *memory,
|
||||
const_dnnl_memory_desc_t memory_desc, dnnl_engine_t engine,
|
||||
void *handle);
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
/// Creates a memory object with multiple handles.
|
||||
///
|
||||
/// @param memory Output memory object.
|
||||
@ -1250,7 +1245,6 @@ dnnl_status_t DNNL_API dnnl_memory_create(dnnl_memory_t *memory,
|
||||
dnnl_status_t DNNL_API dnnl_memory_create_v2(dnnl_memory_t *memory,
|
||||
const_dnnl_memory_desc_t memory_desc, dnnl_engine_t engine,
|
||||
int nhandles, void **handles);
|
||||
#endif
|
||||
|
||||
/// Returns the memory descriptor for a memory object.
|
||||
///
|
||||
@ -1296,7 +1290,6 @@ dnnl_status_t DNNL_API dnnl_memory_get_engine(
|
||||
dnnl_status_t DNNL_API dnnl_memory_map_data(
|
||||
const_dnnl_memory_t memory, void **mapped_ptr);
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
/// Maps a memory object and returns a host-side pointer to a memory buffer
|
||||
/// with a copy of its contents. The memory buffer corresponds to the given
|
||||
/// index.
|
||||
@ -1324,7 +1317,6 @@ dnnl_status_t DNNL_API dnnl_memory_map_data(
|
||||
/// otherwise.
|
||||
dnnl_status_t DNNL_API dnnl_memory_map_data_v2(
|
||||
const_dnnl_memory_t memory, void **mapped_ptr, int index);
|
||||
#endif
|
||||
|
||||
/// Unmaps a memory object and writes back any changes made to the previously
|
||||
/// mapped memory buffer. The pointer to the mapped buffer must be obtained
|
||||
@ -1343,7 +1335,6 @@ dnnl_status_t DNNL_API dnnl_memory_map_data_v2(
|
||||
dnnl_status_t DNNL_API dnnl_memory_unmap_data(
|
||||
const_dnnl_memory_t memory, void *mapped_ptr);
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
/// Unmaps a memory object and writes back any changes made to the previously
|
||||
/// mapped memory buffer. The pointer to the mapped buffer must be obtained
|
||||
/// via the dnnl_memory_map_data() call. The buffer corresponds to the given
|
||||
@ -1362,7 +1353,6 @@ dnnl_status_t DNNL_API dnnl_memory_unmap_data(
|
||||
/// otherwise.
|
||||
dnnl_status_t DNNL_API dnnl_memory_unmap_data_v2(
|
||||
const_dnnl_memory_t memory, void *mapped_ptr, int index);
|
||||
#endif
|
||||
|
||||
/// Returns memory object's data handle.
|
||||
///
|
||||
@ -1385,7 +1375,6 @@ dnnl_status_t DNNL_API dnnl_memory_get_data_handle(
|
||||
dnnl_status_t DNNL_API dnnl_memory_set_data_handle(
|
||||
dnnl_memory_t memory, void *handle);
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
/// Returns an underlying memory buffer that corresponds to the given index.
|
||||
///
|
||||
/// @param memory Memory object.
|
||||
@ -1409,7 +1398,6 @@ dnnl_status_t DNNL_API dnnl_memory_get_data_handle_v2(
|
||||
/// otherwise.
|
||||
dnnl_status_t DNNL_API dnnl_memory_set_data_handle_v2(
|
||||
dnnl_memory_t memory, void *handle, int index);
|
||||
#endif
|
||||
|
||||
/// Destroys a memory object.
|
||||
///
|
||||
|
@ -738,14 +738,12 @@ enum class query {
|
||||
inner_blks = dnnl_query_inner_blks,
|
||||
/// vector of logical indices of the blocks
|
||||
inner_idxs = dnnl_query_inner_idxs,
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
/// Sparse encoding
|
||||
sparse_encoding = dnnl_query_sparse_encoding,
|
||||
/// Number of non-zero entries
|
||||
nnz_s64 = dnnl_query_nnz_s64,
|
||||
/// Number of buffers required for a memory descriptor
|
||||
num_handles_s32 = dnnl_query_num_handles_s32,
|
||||
#endif
|
||||
};
|
||||
|
||||
/// Converts query enum value from C++ API to C API type.
|
||||
@ -905,31 +903,28 @@ struct memory : public handle<dnnl_memory_t> {
|
||||
/// A tensor in a generic format described by the stride and blocking
|
||||
/// values in each dimension.
|
||||
blocked = dnnl_blocked,
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
/// Format kind for sparse tensors.
|
||||
sparse = dnnl_format_kind_sparse,
|
||||
#endif
|
||||
/// A special format kind that indicates that tensor format is opaque.
|
||||
opaque = dnnl_format_kind_opaque,
|
||||
};
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
/// Sparse encodings.
|
||||
/// @sa @ref dev_guide_sparsity
|
||||
enum class sparse_encoding {
|
||||
/// Undefined sparse encoding kind, used for empty memory descriptors.
|
||||
undef = dnnl_sparse_encoding_undef,
|
||||
/// Compressed Sparse Row (CSR) encoding.
|
||||
csr = dnnl_csr,
|
||||
/// An encoding that is used for an opaque storage schema for
|
||||
/// tensors with unstructured sparsity. A memory descriptor with the
|
||||
/// packed encoding cannot be used to create a memory object. It can
|
||||
/// only be used to create a primitive descriptor to query the
|
||||
/// actual memory descriptor (similar to the format tag `any`).
|
||||
packed = dnnl_packed,
|
||||
/// Coordinate Sparse (COO) encoding.
|
||||
coo = dnnl_coo,
|
||||
/// Undefined sparse encoding kind, used for empty memory descriptors.
|
||||
undef = dnnl_sparse_encoding_undef,
|
||||
/// Compressed Sparse Row (CSR) encoding.
|
||||
csr = dnnl_csr,
|
||||
/// An encoding that is used for an opaque storage schema for
|
||||
/// tensors with unstructured sparsity. A memory descriptor with the
|
||||
/// packed encoding cannot be used to create a memory object. It can
|
||||
/// only be used to create a primitive descriptor to query the
|
||||
/// actual memory descriptor (similar to the format tag `any`).
|
||||
packed = dnnl_packed,
|
||||
/// Coordinate Sparse (COO) encoding.
|
||||
coo = dnnl_coo,
|
||||
};
|
||||
#endif
|
||||
|
||||
/// Memory format tag specification.
|
||||
///
|
||||
@ -2823,7 +2818,7 @@ struct memory : public handle<dnnl_memory_t> {
|
||||
"strides");
|
||||
reset(md);
|
||||
}
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
|
||||
/// Function for creating a memory descriptor for CSR sparse encoding.
|
||||
///
|
||||
/// The created memory descriptor will describe a memory object that
|
||||
@ -2842,6 +2837,7 @@ struct memory : public handle<dnnl_memory_t> {
|
||||
/// allowed to fail without throwing an exception. In this case a
|
||||
/// zero memory descriptor will be constructed. This flag is
|
||||
/// optional and defaults to false.
|
||||
/// @sa @ref dev_guide_sparsity
|
||||
static desc csr(const dims &adims, data_type adata_type, dim nnz,
|
||||
data_type index_dt, data_type pointer_dt,
|
||||
bool allow_empty = false) {
|
||||
@ -2876,6 +2872,7 @@ struct memory : public handle<dnnl_memory_t> {
|
||||
/// allowed to fail without throwing an exception. In this case a
|
||||
/// zero memory descriptor will be constructed. This flag is
|
||||
/// optional and defaults to false.
|
||||
/// @sa @ref dev_guide_sparsity
|
||||
static desc coo(const dims &adims, data_type adata_type, dim nnz,
|
||||
data_type index_dt, bool allow_empty = false) {
|
||||
validate_dims(adims);
|
||||
@ -2910,6 +2907,7 @@ struct memory : public handle<dnnl_memory_t> {
|
||||
/// allowed to fail without throwing an exception. In this case a
|
||||
/// zero memory descriptor will be constructed. This flag is
|
||||
/// optional and defaults to false.
|
||||
/// @sa @ref dev_guide_sparsity
|
||||
static desc packed(const dims &adims, data_type adata_type, dim nnz,
|
||||
bool allow_empty = false) {
|
||||
validate_dims(adims);
|
||||
@ -2923,7 +2921,7 @@ struct memory : public handle<dnnl_memory_t> {
|
||||
"sparse encoding");
|
||||
return desc {md};
|
||||
}
|
||||
#endif
|
||||
|
||||
/// Construct a memory descriptor from a C API ::dnnl_memory_desc_t
|
||||
/// handle. The resulting handle is not weak and the C handle will be
|
||||
/// destroyed during the destruction of the C++ object.
|
||||
@ -3146,7 +3144,6 @@ struct memory : public handle<dnnl_memory_t> {
|
||||
return query_dims(query::inner_idxs);
|
||||
}
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
/// Returns number of handles.
|
||||
///
|
||||
/// @returns A number of handles.
|
||||
@ -3170,6 +3167,7 @@ struct memory : public handle<dnnl_memory_t> {
|
||||
/// Returns the sparse encoding of the memory descriptor.
|
||||
///
|
||||
/// @returns the sparse encoding kind.
|
||||
/// @sa @ref dev_guide_sparsity
|
||||
memory::sparse_encoding get_sparse_encoding() const {
|
||||
dnnl_sparse_encoding_t sparse_encoding;
|
||||
dnnl_status_t status = dnnl_memory_desc_query_v2(
|
||||
@ -3186,14 +3184,6 @@ struct memory : public handle<dnnl_memory_t> {
|
||||
memory::data_type get_data_type(int index = 0) const {
|
||||
return query_data_type(query::data_type, index);
|
||||
}
|
||||
#else
|
||||
/// Returns the data type of the memory descriptor.
|
||||
///
|
||||
/// @returns The data type.
|
||||
memory::data_type get_data_type() const {
|
||||
return query_data_type(query::data_type);
|
||||
}
|
||||
#endif
|
||||
|
||||
/// Returns the format kind of the memory descriptor.
|
||||
///
|
||||
@ -3213,7 +3203,6 @@ struct memory : public handle<dnnl_memory_t> {
|
||||
/// @returns A copy of the dimensions vector.
|
||||
memory::dims get_dims() const { return query_dims(query::dims); }
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
/// Returns size of the memory descriptor in bytes.
|
||||
/// @param index Data index. Defaults to 0.
|
||||
/// @returns The number of bytes required to allocate a memory buffer
|
||||
@ -3222,13 +3211,6 @@ struct memory : public handle<dnnl_memory_t> {
|
||||
size_t get_size(int index = 0) const {
|
||||
return dnnl_memory_desc_get_size_v2(get(), index);
|
||||
}
|
||||
#else
|
||||
/// Returns size of the memory descriptor in bytes.
|
||||
/// @returns The number of bytes required to allocate a memory buffer
|
||||
/// for the memory object described by this memory descriptor
|
||||
/// including the padding area.
|
||||
size_t get_size() const { return dnnl_memory_desc_get_size(get()); }
|
||||
#endif
|
||||
|
||||
/// Returns a binary blob associated with the given memory descriptor
|
||||
/// @returns The memory descriptor blob associated with the memory descriptor
|
||||
@ -3265,7 +3247,6 @@ struct memory : public handle<dnnl_memory_t> {
|
||||
bool operator!=(const desc &other) const { return !operator==(other); }
|
||||
|
||||
private:
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
memory::data_type query_data_type(query what, int index) const {
|
||||
dnnl_data_type_t data_type;
|
||||
dnnl_status_t status = dnnl_memory_desc_query_v2(
|
||||
@ -3274,16 +3255,6 @@ struct memory : public handle<dnnl_memory_t> {
|
||||
? static_cast<dnnl::memory::data_type>(data_type)
|
||||
: dnnl::memory::data_type::undef;
|
||||
}
|
||||
#else
|
||||
memory::data_type query_data_type(query what) const {
|
||||
dnnl_data_type_t data_type;
|
||||
dnnl_status_t status = dnnl_memory_desc_query(
|
||||
get(), dnnl::convert_to_c(what), &data_type);
|
||||
return status == dnnl_success
|
||||
? static_cast<dnnl::memory::data_type>(data_type)
|
||||
: dnnl::memory::data_type::undef;
|
||||
}
|
||||
#endif
|
||||
|
||||
int query_s32(query what) const {
|
||||
int res;
|
||||
@ -3314,7 +3285,6 @@ struct memory : public handle<dnnl_memory_t> {
|
||||
/// absence of a parameter.
|
||||
memory() = default;
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
/// Constructs a memory object.
|
||||
///
|
||||
/// Unless @p handle is equal to #DNNL_MEMORY_NONE, the constructed memory
|
||||
@ -3383,43 +3353,6 @@ struct memory : public handle<dnnl_memory_t> {
|
||||
error::wrap_c_api(status, "could not create a memory object");
|
||||
reset(result);
|
||||
}
|
||||
#else
|
||||
/// Constructs a memory object.
|
||||
///
|
||||
/// Unless @p handle is equal to #DNNL_MEMORY_NONE, the constructed memory
|
||||
/// object will have the underlying buffer set. In this case, the buffer
|
||||
/// will be initialized as if #dnnl::memory::set_data_handle() had been
|
||||
/// called.
|
||||
///
|
||||
/// @sa memory::set_data_handle()
|
||||
///
|
||||
/// @param md Memory descriptor.
|
||||
/// @param aengine Engine to store the data on.
|
||||
/// @param handle Handle of the memory buffer to use.
|
||||
/// - A pointer to the user-allocated buffer. In this case the library
|
||||
/// doesn't own the buffer.
|
||||
/// - The #DNNL_MEMORY_ALLOCATE special value. Instructs the library to
|
||||
/// allocate the buffer for the memory object. In this case the
|
||||
/// library owns the buffer.
|
||||
/// - #DNNL_MEMORY_NONE to create dnnl::memory without an underlying
|
||||
/// buffer.
|
||||
memory(const desc &md, const engine &aengine, void *handle) {
|
||||
dnnl_memory_t result;
|
||||
error::wrap_c_api(
|
||||
dnnl_memory_create(&result, md.get(), aengine.get(), handle),
|
||||
"could not create a memory object");
|
||||
reset(result);
|
||||
}
|
||||
|
||||
/// Constructs a memory object.
|
||||
///
|
||||
/// The underlying buffer for the memory will be allocated by the library.
|
||||
///
|
||||
/// @param md Memory descriptor.
|
||||
/// @param aengine Engine to store the data on.
|
||||
memory(const desc &md, const engine &aengine)
|
||||
: memory(md, aengine, DNNL_MEMORY_ALLOCATE) {}
|
||||
#endif
|
||||
|
||||
/// Returns the associated memory descriptor.
|
||||
desc get_desc() const {
|
||||
@ -3440,7 +3373,6 @@ struct memory : public handle<dnnl_memory_t> {
|
||||
return engine(c_engine, true);
|
||||
}
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
/// Returns an underlying memory buffer that corresponds to the given index.
|
||||
///
|
||||
/// On the CPU engine, or when using USM, this is a pointer to the
|
||||
@ -3511,73 +3443,6 @@ struct memory : public handle<dnnl_memory_t> {
|
||||
error::wrap_c_api(dnnl_memory_unmap_data_v2(get(), mapped_ptr, index),
|
||||
"could not unmap memory object data");
|
||||
}
|
||||
#else
|
||||
/// Returns the underlying memory buffer.
|
||||
///
|
||||
/// On the CPU engine, or when using USM, this is a pointer to the
|
||||
/// allocated memory.
|
||||
void *get_data_handle() const {
|
||||
void *handle;
|
||||
error::wrap_c_api(dnnl_memory_get_data_handle(get(), &handle),
|
||||
"could not get a native handle from a memory object");
|
||||
return handle;
|
||||
}
|
||||
|
||||
/// Sets the underlying memory buffer.
|
||||
///
|
||||
/// @param handle Memory buffer to use. On the CPU engine or when USM is
|
||||
/// used, the memory buffer is a pointer to the actual data. For OpenCL
|
||||
/// it is a cl_mem. It must have at least
|
||||
/// #dnnl::memory::desc::get_size() bytes allocated.
|
||||
void set_data_handle(void *handle) const {
|
||||
error::wrap_c_api(dnnl_memory_set_data_handle(get(), handle),
|
||||
"could not set native handle of a memory object");
|
||||
}
|
||||
|
||||
/// Maps a memory object and returns a host-side pointer to a memory
|
||||
/// buffer with a copy of its contents.
|
||||
///
|
||||
/// Mapping enables read/write directly from/to the memory contents for
|
||||
/// engines that do not support direct memory access.
|
||||
///
|
||||
/// Mapping is an exclusive operation - a memory object cannot be used in
|
||||
/// other operations until it is unmapped via #dnnl::memory::unmap_data()
|
||||
/// call.
|
||||
///
|
||||
/// @note
|
||||
/// Any primitives working with the memory should be completed before
|
||||
/// the memory is mapped. Use #dnnl::stream::wait() to synchronize the
|
||||
/// corresponding execution stream.
|
||||
///
|
||||
/// @note
|
||||
/// The map_data and unmap_data functions are provided mainly for
|
||||
/// debug and testing purposes and their performance may be suboptimal.
|
||||
///
|
||||
/// @tparam T Data type to return a pointer to.
|
||||
/// @returns Pointer to the mapped memory.
|
||||
template <typename T = void>
|
||||
T *map_data() const {
|
||||
void *mapped_ptr;
|
||||
error::wrap_c_api(dnnl_memory_map_data(get(), &mapped_ptr),
|
||||
"could not map memory object data");
|
||||
return static_cast<T *>(mapped_ptr);
|
||||
}
|
||||
|
||||
/// Unmaps a memory object and writes back any changes made to the
|
||||
/// previously mapped memory buffer.
|
||||
///
|
||||
/// @note
|
||||
/// The map_data and unmap_data functions are provided mainly for
|
||||
/// debug and testing purposes and their performance may be
|
||||
/// suboptimal.
|
||||
///
|
||||
/// @param mapped_ptr A pointer previously returned by
|
||||
/// #dnnl::memory::map_data().
|
||||
void unmap_data(void *mapped_ptr) const {
|
||||
error::wrap_c_api(dnnl_memory_unmap_data(get(), mapped_ptr),
|
||||
"could not unmap memory object data");
|
||||
}
|
||||
#endif
|
||||
|
||||
static dnnl_data_type_t convert_to_c(data_type adata_type) {
|
||||
return static_cast<dnnl_data_type_t>(adata_type);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2019-2024 Intel Corporation
|
||||
* Copyright 2019-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -163,9 +163,6 @@
|
||||
// When defined, experimental features are enabled.
|
||||
#cmakedefine DNNL_EXPERIMENTAL
|
||||
|
||||
// When defined, experimental functionality for sparse domain is enabled.
|
||||
#cmakedefine DNNL_EXPERIMENTAL_SPARSE
|
||||
|
||||
// When defined, experimental functionality for ukernels is enabled.
|
||||
#cmakedefine DNNL_EXPERIMENTAL_UKERNEL
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2018-2024 Intel Corporation
|
||||
* Copyright 2018-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -37,9 +37,7 @@ const char DNNL_API *dnnl_dt2str(dnnl_data_type_t v);
|
||||
const char DNNL_API *dnnl_fpmath_mode2str(dnnl_fpmath_mode_t v);
|
||||
const char DNNL_API *dnnl_accumulation_mode2str(dnnl_accumulation_mode_t v);
|
||||
const char DNNL_API *dnnl_engine_kind2str(dnnl_engine_kind_t v);
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
const char DNNL_API *dnnl_sparse_encoding2str(dnnl_sparse_encoding_t v);
|
||||
#endif
|
||||
const char DNNL_API *dnnl_fmt_tag2str(dnnl_format_tag_t v);
|
||||
const char DNNL_API *dnnl_prop_kind2str(dnnl_prop_kind_t v);
|
||||
const char DNNL_API *dnnl_prim_kind2str(dnnl_primitive_kind_t v);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2020-2024 Intel Corporation
|
||||
* Copyright 2020-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -75,7 +75,6 @@ dnnl_status_t DNNL_API dnnl_ocl_interop_memory_create(dnnl_memory_t *memory,
|
||||
const_dnnl_memory_desc_t memory_desc, dnnl_engine_t engine,
|
||||
dnnl_ocl_interop_memory_kind_t memory_kind, void *handle);
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
/// Creates a memory object with multiple handles.
|
||||
///
|
||||
/// @param memory Output memory object.
|
||||
@ -102,7 +101,6 @@ dnnl_status_t DNNL_API dnnl_ocl_interop_memory_create_v2(dnnl_memory_t *memory,
|
||||
const_dnnl_memory_desc_t memory_desc, dnnl_engine_t engine,
|
||||
dnnl_ocl_interop_memory_kind_t memory_kind, int nhandles,
|
||||
void **handles);
|
||||
#endif
|
||||
|
||||
/// Returns the memory allocation kind associated with a memory object.
|
||||
///
|
||||
|
@ -236,7 +236,6 @@ inline memory_kind get_memory_kind(const memory &amemory) {
|
||||
return static_cast<memory_kind>(ckind);
|
||||
}
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
/// Creates a memory object with multiple handles.
|
||||
///
|
||||
/// @param memory_desc Memory descriptor.
|
||||
@ -340,61 +339,6 @@ inline memory make_memory(const memory::desc &memory_desc,
|
||||
const engine &aengine, cl_mem mem_object) {
|
||||
return make_memory(memory_desc, aengine, std::vector<cl_mem> {mem_object});
|
||||
}
|
||||
#else
|
||||
|
||||
/// Creates a memory object.
|
||||
///
|
||||
/// Unless @p handle is equal to DNNL_MEMORY_NONE or DNNL_MEMORY_ALLOCATE, the
|
||||
/// constructed memory object will have the underlying buffer set. In this
|
||||
/// case, the buffer will be initialized as if:
|
||||
/// - dnnl::memory::set_data_handle() had been called, if @p memory_kind is
|
||||
/// equal to dnnl::ocl_interop::memory_kind::usm, or
|
||||
/// - dnnl::ocl_interop::set_mem_object() has been called, if @p memory_kind is
|
||||
/// equal to dnnl::ocl_interop::memory_kind::buffer.
|
||||
///
|
||||
/// @param memory_desc Memory descriptor.
|
||||
/// @param aengine Engine to use.
|
||||
/// @param kind Memory allocation kind to specify the type of handle.
|
||||
/// @param handle Handle of the memory buffer to use as an underlying storage.
|
||||
/// - A USM pointer to the user-allocated buffer. In this case the library
|
||||
/// doesn't own the buffer. Requires @p memory_kind to be equal to
|
||||
/// dnnl::ocl_interop::memory_kind::usm.
|
||||
/// - An OpenCL buffer. In this case the library doesn't own the buffer.
|
||||
/// Requires @p memory_kind be equal to be equal to
|
||||
/// dnnl::ocl_interop::memory_kind::buffer.
|
||||
/// - The DNNL_MEMORY_ALLOCATE special value. Instructs the library to
|
||||
/// allocate the buffer that corresponds to the memory allocation kind
|
||||
/// @p memory_kind for the memory object. In this case the library
|
||||
/// owns the buffer.
|
||||
/// - The DNNL_MEMORY_NONE specific value. Instructs the library to
|
||||
/// create memory object without an underlying buffer.
|
||||
///
|
||||
/// @returns Created memory object.
|
||||
inline memory make_memory(const memory::desc &memory_desc,
|
||||
const engine &aengine, memory_kind kind,
|
||||
void *handle = DNNL_MEMORY_ALLOCATE) {
|
||||
dnnl_memory_t c_memory;
|
||||
error::wrap_c_api(
|
||||
dnnl_ocl_interop_memory_create(&c_memory, memory_desc.get(),
|
||||
aengine.get(), convert_to_c(kind), handle),
|
||||
"could not create a memory");
|
||||
return memory(c_memory);
|
||||
}
|
||||
|
||||
/// Constructs a memory object from an OpenCL buffer.
|
||||
///
|
||||
/// @param memory_desc Memory descriptor.
|
||||
/// @param aengine Engine to use.
|
||||
/// @param mem_object An OpenCL buffer to use.
|
||||
///
|
||||
/// @returns Created memory object.
|
||||
inline memory make_memory(const memory::desc &memory_desc,
|
||||
const engine &aengine, cl_mem mem_object) {
|
||||
memory amemory(memory_desc, aengine, DNNL_MEMORY_NONE);
|
||||
set_mem_object(amemory, mem_object);
|
||||
return amemory;
|
||||
}
|
||||
#endif
|
||||
|
||||
/// Executes computations specified by the primitive in a specified stream and
|
||||
/// returns a SYCL event.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2020-2024 Intel Corporation
|
||||
* Copyright 2020-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -95,7 +95,6 @@ dnnl_status_t DNNL_API dnnl_sycl_interop_memory_create(dnnl_memory_t *memory,
|
||||
const_dnnl_memory_desc_t memory_desc, dnnl_engine_t engine,
|
||||
dnnl_sycl_interop_memory_kind_t memory_kind, void *handle);
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
/// Creates a memory object with multiple handles.
|
||||
///
|
||||
/// @param memory Output memory object.
|
||||
@ -123,7 +122,6 @@ dnnl_status_t DNNL_API dnnl_sycl_interop_memory_create_v2(dnnl_memory_t *memory,
|
||||
const_dnnl_memory_desc_t memory_desc, dnnl_engine_t engine,
|
||||
dnnl_sycl_interop_memory_kind_t memory_kind, int nhandles,
|
||||
void **handles);
|
||||
#endif
|
||||
|
||||
/// Returns the memory allocation kind associated with a memory object.
|
||||
///
|
||||
|
@ -206,7 +206,6 @@ inline memory_kind get_memory_kind(const memory &amemory) {
|
||||
return static_cast<memory_kind>(ckind);
|
||||
}
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
/// Creates a memory object with multiple handles.
|
||||
///
|
||||
/// @param memory_desc Memory descriptor.
|
||||
@ -281,47 +280,6 @@ inline memory make_memory(const memory::desc &memory_desc,
|
||||
return make_memory(
|
||||
memory_desc, aengine, kind, std::vector<void *> {handle});
|
||||
}
|
||||
#else
|
||||
|
||||
/// Creates a memory object.
|
||||
///
|
||||
/// Unless @p handle is equal to DNNL_MEMORY_NONE or DNNL_MEMORY_ALLOCATE, the
|
||||
/// constructed memory object will have the underlying buffer set. In this
|
||||
/// case, the buffer will be initialized as if:
|
||||
/// - dnnl::memory::set_data_handle() had been called, if @p memory_kind is
|
||||
/// equal to dnnl::sycl_interop::memory_kind::usm, or
|
||||
/// - dnnl::sycl_interop::set_buffer() has been called, if @p memory_kind is
|
||||
/// equal to dnnl::sycl_interop::memory_kind::buffer.
|
||||
///
|
||||
/// @param memory_desc Memory descriptor.
|
||||
/// @param aengine Engine to use.
|
||||
/// @param kind Memory allocation kind to specify the type of handle.
|
||||
/// @param handle Handle of the memory buffer to use as an underlying storage.
|
||||
/// - A USM pointer to the user-allocated buffer. In this case the library
|
||||
/// doesn't own the buffer. Requires @p memory_kind to be equal to
|
||||
/// dnnl::sycl_interop::memory_kind::usm.
|
||||
/// - A pointer to SYCL buffer. In this case the library doesn't own the
|
||||
/// buffer. Requires @p memory_kind be equal to be equal to
|
||||
/// dnnl::sycl_interop::memory_kind::buffer.
|
||||
/// - The DNNL_MEMORY_ALLOCATE special value. Instructs the library to
|
||||
/// allocate the buffer that corresponds to the memory allocation kind
|
||||
/// @p memory_kind for the memory object. In this case the library
|
||||
/// owns the buffer.
|
||||
/// - The DNNL_MEMORY_NONE specific value. Instructs the library to
|
||||
/// create memory object without an underlying buffer.
|
||||
///
|
||||
/// @returns Created memory object.
|
||||
inline memory make_memory(const memory::desc &memory_desc,
|
||||
const engine &aengine, memory_kind kind,
|
||||
void *handle = DNNL_MEMORY_ALLOCATE) {
|
||||
dnnl_memory_t c_memory;
|
||||
error::wrap_c_api(
|
||||
dnnl_sycl_interop_memory_create(&c_memory, memory_desc.get(),
|
||||
aengine.get(), convert_to_c(kind), handle),
|
||||
"could not create a memory");
|
||||
return memory(c_memory);
|
||||
}
|
||||
#endif
|
||||
|
||||
/// Constructs a memory object from a SYCL buffer.
|
||||
///
|
||||
|
@ -52,18 +52,16 @@ typedef enum {
|
||||
dnnl_blocked,
|
||||
/// A special format kind that indicates that tensor format is opaque.
|
||||
dnnl_format_kind_opaque,
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
/// Format kind for sparse tensors.
|
||||
dnnl_format_kind_sparse,
|
||||
#endif
|
||||
/// Parameter to allow internal only format kinds without undefined
|
||||
/// behavior. This parameter is chosen to be valid for so long as
|
||||
/// sizeof(int) >= 2.
|
||||
dnnl_format_kind_max = 0x7fff,
|
||||
} dnnl_format_kind_t;
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
/// Sparse encodings.
|
||||
/// @sa @ref dev_guide_sparsity
|
||||
typedef enum {
|
||||
/// Undefined sparse encoding kind, used for empty memory descriptors.
|
||||
dnnl_sparse_encoding_undef = 0,
|
||||
@ -78,7 +76,6 @@ typedef enum {
|
||||
/// Coordinate Sparse Encoding (COO).
|
||||
dnnl_coo,
|
||||
} dnnl_sparse_encoding_t;
|
||||
#endif
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_PROFILING
|
||||
/// Profiling data kind.
|
||||
@ -2831,12 +2828,9 @@ typedef enum {
|
||||
dnnl_query_inner_nblks_s32, ///< number of innermost blocks
|
||||
dnnl_query_inner_blks, ///< vector of sizes of the innermost blocks
|
||||
dnnl_query_inner_idxs, ///< vector of logical indices of the blocks
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
dnnl_query_sparse_encoding, ///< Sparse encoding
|
||||
dnnl_query_nnz_s64, ///< Number of non-zero entries
|
||||
dnnl_query_num_handles_s32, ///< Number of buffers required for a memory
|
||||
/// descriptor
|
||||
#endif
|
||||
dnnl_query_num_handles_s32, ///< Number of buffers required for a memory descriptor
|
||||
// Max value to prevent UB for internal use only dnnl_query_t
|
||||
dnnl_query_max = 0x7fff,
|
||||
} dnnl_query_t;
|
||||
|
@ -10,8 +10,7 @@ should be executed to re-generate the debug header and relevant source code.
|
||||
|
||||
```sh
|
||||
# Generate dnnl_config.h
|
||||
# -DDNNL_EXPERIMENTAL_SPARSE=ON is required to preserve sparse-specific symbols
|
||||
$ (mkdir -p build && cd build && cmake -DONEDNN_BUILD_GRAPH=OFF -DDNNL_EXPERIMENTAL_SPARSE=ON ..)
|
||||
$ (mkdir -p build && cd build && cmake -DONEDNN_BUILD_GRAPH=OFF ..)
|
||||
|
||||
# Generate types.xml
|
||||
# CastXML can be found at https://github.com/CastXML/CastXML
|
||||
|
@ -103,9 +103,7 @@ const char *dt2str(dnnl_data_type_t dt);
|
||||
const char *fmt_tag2str(dnnl_format_tag_t tag);
|
||||
|
||||
/* encoding */
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
const char *sparse_encoding2str(dnnl_sparse_encoding_t encoding);
|
||||
#endif
|
||||
|
||||
/* engine kind */
|
||||
const char *engine_kind2str(dnnl_engine_kind_t kind);
|
||||
@ -155,11 +153,9 @@ const char *fmt_tag2str(dnnl_format_tag_t tag) {
|
||||
return dnnl_fmt_tag2str(tag);
|
||||
}
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
const char *sparse_encoding2str(dnnl_sparse_encoding_t encoding) {
|
||||
return dnnl_sparse_encoding2str(encoding);
|
||||
}
|
||||
#endif
|
||||
|
||||
const char *engine_kind2str(dnnl_engine_kind_t kind) {
|
||||
return dnnl_engine_kind2str(kind);
|
||||
@ -307,31 +303,15 @@ def generate(ifile, banners):
|
||||
continue
|
||||
values = [v_value.attrib["name"] for v_value in v_enum.findall("EnumValue")]
|
||||
|
||||
if enum in ["dnnl_sparse_encoding_t"]:
|
||||
h_body += "#ifdef DNNL_EXPERIMENTAL_SPARSE\n"
|
||||
s_body += "#ifdef DNNL_EXPERIMENTAL_SPARSE\n"
|
||||
|
||||
h_body += func_to_str_decl(enum, is_header=True) + ";\n"
|
||||
s_body += func_to_str(enum, values) + "\n"
|
||||
|
||||
if enum in ["dnnl_sparse_encoding_t"]:
|
||||
h_body += "#endif\n"
|
||||
s_body += "#endif\n"
|
||||
|
||||
if enum in ["dnnl_format_tag_t", "dnnl_data_type_t", "dnnl_sparse_encoding_t"]:
|
||||
if enum in ["dnnl_sparse_encoding_t"]:
|
||||
h_benchdnn_body += "#ifdef DNNL_EXPERIMENTAL_SPARSE\n"
|
||||
s_benchdnn_body += "#ifdef DNNL_EXPERIMENTAL_SPARSE\n"
|
||||
|
||||
h_benchdnn_body += (
|
||||
str_to_func_decl(enum, is_header=True, is_dnnl=False) + ";\n"
|
||||
)
|
||||
s_benchdnn_body += str_to_func(enum, values, is_dnnl=False) + "\n"
|
||||
|
||||
if enum in ["dnnl_sparse_encoding_t"]:
|
||||
h_benchdnn_body += "#endif\n"
|
||||
s_benchdnn_body += "#endif\n"
|
||||
|
||||
bodies = [
|
||||
header(h_body),
|
||||
source(s_body),
|
||||
@ -349,8 +329,7 @@ def usage():
|
||||
Generates oneDNN debug header and source files with enum to string mapping.
|
||||
Input types.xml file can be obtained with CastXML[1]:
|
||||
$ castxml --castxml-cc-gnu-c clang --castxml-output=1 \\
|
||||
-DDNNL_EXPERIMENTAL_SPARSE -Iinclude -Ibuild/include \\
|
||||
include/oneapi/dnnl/dnnl_types.h -o types.xml
|
||||
-Iinclude -Ibuild/include include/oneapi/dnnl/dnnl_types.h -o types.xml
|
||||
|
||||
[1] https://github.com/CastXML/CastXML"""
|
||||
% sys.argv[0]
|
||||
|
@ -76,10 +76,6 @@ if(DNNL_EXPERIMENTAL)
|
||||
message(STATUS "Experimental features are enabled")
|
||||
endif()
|
||||
|
||||
if(DNNL_EXPERIMENTAL_SPARSE)
|
||||
message(STATUS "Experimental functionality for sparse domain is enabled")
|
||||
endif()
|
||||
|
||||
if(DNNL_EXPERIMENTAL_UKERNEL)
|
||||
if(DNNL_TARGET_ARCH STREQUAL "X64" OR DNNL_TARGET_ARCH STREQUAL "AARCH64")
|
||||
message(STATUS "Experimental functionality for ukernels is enabled")
|
||||
|
@ -206,7 +206,6 @@ const rounding_mode_t environment = dnnl_rounding_mode_environment;
|
||||
const rounding_mode_t stochastic = dnnl_rounding_mode_stochastic;
|
||||
} // namespace rounding_mode
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
using sparse_encoding_t = dnnl_sparse_encoding_t;
|
||||
namespace sparse_encoding {
|
||||
const sparse_encoding_t undef = dnnl_sparse_encoding_undef;
|
||||
@ -214,16 +213,6 @@ const sparse_encoding_t csr = dnnl_csr;
|
||||
const sparse_encoding_t coo = dnnl_coo;
|
||||
const sparse_encoding_t packed = dnnl_packed;
|
||||
} // namespace sparse_encoding
|
||||
#else
|
||||
// Declare dummy values to avoid guarding internal implementation.
|
||||
using sparse_encoding_t = int;
|
||||
namespace sparse_encoding {
|
||||
const sparse_encoding_t undef = 0;
|
||||
const sparse_encoding_t csr = 1;
|
||||
const sparse_encoding_t packed = 2;
|
||||
const sparse_encoding_t coo = 3;
|
||||
} // namespace sparse_encoding
|
||||
#endif
|
||||
|
||||
using format_kind_t = dnnl_format_kind_t;
|
||||
namespace format_kind {
|
||||
@ -231,11 +220,7 @@ const format_kind_t undef = dnnl_format_kind_undef;
|
||||
const format_kind_t any = dnnl_format_kind_any;
|
||||
const format_kind_t blocked = dnnl_blocked;
|
||||
const format_kind_t opaque = dnnl_format_kind_opaque;
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
const format_kind_t sparse = dnnl_format_kind_sparse;
|
||||
#else
|
||||
const format_kind_t sparse = static_cast<format_kind_t>(4);
|
||||
#endif
|
||||
|
||||
// Internal only format kinds.
|
||||
const format_kind_t internal_only_start = (format_kind_t)(1 << 8);
|
||||
@ -2054,15 +2039,9 @@ const query_t inner_nblks_s32 = dnnl_query_inner_nblks_s32;
|
||||
const query_t inner_blks = dnnl_query_inner_blks;
|
||||
const query_t inner_idxs = dnnl_query_inner_idxs;
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
const query_t sparse_encoding = dnnl_query_sparse_encoding;
|
||||
const query_t nnz_s64 = dnnl_query_nnz_s64;
|
||||
const query_t num_handles_s32 = dnnl_query_num_handles_s32;
|
||||
#else
|
||||
const query_t sparse_encoding = static_cast<query_t>(266);
|
||||
const query_t nnz_s64 = static_cast<query_t>(267);
|
||||
const query_t num_handles_s32 = static_cast<query_t>(268);
|
||||
#endif
|
||||
|
||||
// Internal only query kinds.
|
||||
const query_t internal_only_start = (query_t)(1 << 12);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2019-2023 Intel Corporation
|
||||
* Copyright 2019-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -46,9 +46,7 @@ const char *dnnl_fmt_kind2str(dnnl_format_kind_t v) {
|
||||
if (v == dnnl_format_kind_undef) return "undef";
|
||||
if (v == dnnl_format_kind_any) return "any";
|
||||
if (v == dnnl_blocked) return "blocked";
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
if (v == dnnl_format_kind_sparse) return "sparse";
|
||||
#endif
|
||||
if (v == format_kind::wino || v == format_kind::rnn_packed
|
||||
|| v == format_kind::cublaslt_blocked)
|
||||
return "opaque";
|
||||
|
@ -94,7 +94,6 @@ const char *dnnl_engine_kind2str(dnnl_engine_kind_t v) {
|
||||
return "unknown engine_kind";
|
||||
}
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
const char *dnnl_sparse_encoding2str(dnnl_sparse_encoding_t v) {
|
||||
if (v == dnnl_sparse_encoding_undef) return "undef";
|
||||
if (v == dnnl_csr) return "csr";
|
||||
@ -104,7 +103,6 @@ const char *dnnl_sparse_encoding2str(dnnl_sparse_encoding_t v) {
|
||||
return "unknown sparse_encoding";
|
||||
}
|
||||
|
||||
#endif
|
||||
const char *dnnl_fmt_tag2str(dnnl_format_tag_t v) {
|
||||
if (v == dnnl_format_tag_undef) return "undef";
|
||||
if (v == dnnl_format_tag_any) return "any";
|
||||
|
@ -91,7 +91,6 @@ dnnl_memory::dnnl_memory(dnnl::impl::engine_t *engine,
|
||||
this->reset_memory_storage(std::move(memory_storage));
|
||||
}
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
dnnl_memory::dnnl_memory(dnnl::impl::engine_t *engine,
|
||||
const dnnl::impl::memory_desc_t *md,
|
||||
std::vector<std::unique_ptr<dnnl::impl::memory_storage_t>>
|
||||
@ -100,7 +99,6 @@ dnnl_memory::dnnl_memory(dnnl::impl::engine_t *engine,
|
||||
, md_(*md)
|
||||
, memory_storages_(std::move(memory_storages))
|
||||
, counter_(1) {}
|
||||
#endif
|
||||
|
||||
status_t dnnl_memory::set_data_handle(void *handle, int index) const {
|
||||
using namespace dnnl::impl;
|
||||
@ -170,7 +168,6 @@ status_t dnnl_memory_create(memory_t **memory, const memory_desc_t *md,
|
||||
return success;
|
||||
}
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
status_t dnnl_memory_create_v2(memory_t **memory, const memory_desc_t *md,
|
||||
engine_t *engine, int nhandles, void **handles) {
|
||||
const bool args_ok = !any_null(memory, engine, handles) && nhandles > 0;
|
||||
@ -213,7 +210,6 @@ status_t dnnl_memory_create_v2(memory_t **memory, const memory_desc_t *md,
|
||||
*memory = _memory;
|
||||
return success;
|
||||
}
|
||||
#endif
|
||||
|
||||
status_t dnnl_memory_get_memory_desc(
|
||||
const memory_t *memory, const memory_desc_t **md) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2018-2024 Intel Corporation
|
||||
* Copyright 2018-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -55,12 +55,10 @@ struct dnnl_memory : public dnnl::impl::c_compatible {
|
||||
dnnl_memory(dnnl::impl::engine_t *engine,
|
||||
const dnnl::impl::memory_desc_t *md,
|
||||
std::unique_ptr<dnnl::impl::memory_storage_t> &&memory_storage);
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
dnnl_memory(dnnl::impl::engine_t *engine,
|
||||
const dnnl::impl::memory_desc_t *md,
|
||||
std::vector<std::unique_ptr<dnnl::impl::memory_storage_t>>
|
||||
&&memory_storage);
|
||||
#endif
|
||||
|
||||
/** returns memory's engine */
|
||||
dnnl::impl::engine_t *engine() const { return engine_; }
|
||||
|
@ -755,7 +755,6 @@ status_t dnnl_memory_desc_query(
|
||||
return status::success;
|
||||
}
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
status_t dnnl_memory_desc_query_v2(
|
||||
const memory_desc_t *md, query_t what, int index, void *result) {
|
||||
if (any_null(md, result)) return invalid_arguments;
|
||||
@ -801,7 +800,6 @@ status_t dnnl_memory_desc_query_v2(
|
||||
}
|
||||
return status::success;
|
||||
}
|
||||
#endif
|
||||
|
||||
status_t dnnl_memory_desc_destroy(memory_desc_t *memory_desc) {
|
||||
delete memory_desc;
|
||||
|
@ -174,7 +174,6 @@ size_t get_md_hash(const memory_desc_t &md) {
|
||||
seed, md.format_desc.rnn_packed_desc.offset_compensation);
|
||||
seed = hash_combine(seed, md.format_desc.rnn_packed_desc.size);
|
||||
break;
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
case format_kind::sparse:
|
||||
seed = hash_combine(seed,
|
||||
static_cast<size_t>(md.format_desc.sparse_desc.encoding));
|
||||
@ -185,7 +184,6 @@ size_t get_md_hash(const memory_desc_t &md) {
|
||||
// User cannot initialize `packed_desc` therefore `packed_desc`
|
||||
// is always zero initialized.
|
||||
break;
|
||||
#endif
|
||||
default: assert(!"unknown format_kind");
|
||||
}
|
||||
|
||||
|
@ -998,10 +998,8 @@ inline bool operator==(const sdpa_desc_t &lhs, const sdpa_desc_t &rhs) {
|
||||
|
||||
inline bool is_dense_format_kind(
|
||||
const std::vector<const memory_desc_t *> &mds) {
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
for (const auto *md : mds)
|
||||
if (md->format_kind == format_kind::sparse) return false;
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -131,13 +131,6 @@ void print_header() noexcept {
|
||||
verbose_printf("info,GPU convolution v2 is %s\n",
|
||||
experimental::use_gpu_conv_v2() ? "enabled" : "disabled");
|
||||
#endif
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
verbose_printf(
|
||||
"info,experimental functionality for sparse domain is "
|
||||
"enabled\n");
|
||||
#endif
|
||||
|
||||
verbose_printf(
|
||||
"primitive,info,template:%soperation,engine,primitive,"
|
||||
"implementation,prop_kind,memory_descriptors,attributes,"
|
||||
@ -354,12 +347,10 @@ std::ostream &operator<<(std::ostream &ss, format_kind_t format_kind) {
|
||||
return ss;
|
||||
}
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
std::ostream &operator<<(std::ostream &ss, sparse_encoding_t encoding) {
|
||||
ss << dnnl_sparse_encoding2str(encoding);
|
||||
return ss;
|
||||
}
|
||||
#endif
|
||||
|
||||
std::string normalization_flags2str(unsigned flags) {
|
||||
std::string s;
|
||||
|
@ -32,6 +32,9 @@ namespace aarch64 {
|
||||
status_t acl_binary_t::pd_t::init(engine_t *engine) {
|
||||
using namespace acl_utils;
|
||||
|
||||
if (!impl::is_dense_format_kind({src_md(0), src_md(1), dst_md()}))
|
||||
return status::unimplemented;
|
||||
|
||||
// Only support f16/f32/s32 for now
|
||||
data_type_t ddt = dst_md(0)->data_type;
|
||||
if (!utils::one_of(ddt, data_type::f16, data_type::f32, data_type::s32))
|
||||
|
@ -50,7 +50,8 @@ status_t acl_depthwise_convolution_fwd_t::pd_t::init(engine_t *engine) {
|
||||
primitive_attr_t::skip_mask_t::post_ops, f32);
|
||||
bool ok = is_fwd() && set_default_alg_kind(alg_kind::convolution_direct)
|
||||
&& utils::one_of(true, is_fp16_ok, is_fp32_ok)
|
||||
&& !has_zero_dim_memory();
|
||||
&& !has_zero_dim_memory()
|
||||
&& impl::is_dense_format_kind({src_md(), weights_md(), dst_md()});
|
||||
if (!ok) return status::unimplemented;
|
||||
|
||||
if (weights_md_.ndims != 5) return status::unimplemented;
|
||||
|
@ -73,8 +73,9 @@ status_t acl_eltwise_fwd_t::pd_t::init(engine_t *engine) {
|
||||
|
||||
bool ok = is_fwd() && one_of(src_d.data_type(), f32, f16, s32, s8)
|
||||
&& !has_zero_dim_memory() && attr()->has_default_values()
|
||||
&& set_default_formats_common() && src_d.is_dense()
|
||||
&& src_d == memory_desc_wrapper(dst_md());
|
||||
&& set_default_formats_common()
|
||||
&& src_d == memory_desc_wrapper(dst_md())
|
||||
&& impl::is_dense_format_kind({src_md(), dst_md()});
|
||||
if (!ok) return status::unimplemented;
|
||||
|
||||
// Workaround for the inaccuracies caused by
|
||||
|
@ -58,7 +58,8 @@ status_t acl_gemm_convolution_fwd_t<src_t, wei_t, dst_t, bia_t>::pd_t::init(
|
||||
&& expect_data_types(src_t, wei_t, bia_t, dst_t, undef)
|
||||
&& !has_zero_dim_memory()
|
||||
&& attr()->has_default_values(
|
||||
smask_t::post_ops | smask_t::fpmath_mode, dst_t);
|
||||
smask_t::post_ops | smask_t::fpmath_mode, dst_t)
|
||||
&& impl::is_dense_format_kind({src_md(), weights_md(), dst_md()});
|
||||
if (!ok) return status::unimplemented;
|
||||
|
||||
if (weights_md_.ndims != 4) return status::unimplemented;
|
||||
|
@ -103,7 +103,8 @@ status_t acl_indirect_gemm_convolution_fwd_t::pd_t::init(engine_t *engine) {
|
||||
smask_t::post_ops | smask_t::fpmath_mode, f32);
|
||||
bool ok = is_fwd() && set_default_alg_kind(alg_kind::convolution_direct)
|
||||
&& utils::one_of(true, is_fp16_ok, is_bf16_ok, is_fp32_ok)
|
||||
&& !has_zero_dim_memory();
|
||||
&& !has_zero_dim_memory()
|
||||
&& impl::is_dense_format_kind({src_md(), weights_md(), dst_md()});
|
||||
if (!ok) return status::unimplemented;
|
||||
|
||||
CHECK(init_conf());
|
||||
|
@ -34,7 +34,8 @@ status_t acl_softmax_fwd_t::pd_t::init(engine_t *engine) {
|
||||
&& *src_md() == *dst_md()
|
||||
&& utils::one_of(
|
||||
src_md()->data_type, data_type::f32, data_type::f16)
|
||||
&& attr()->has_default_values();
|
||||
&& attr()->has_default_values()
|
||||
&& impl::is_dense_format_kind({src_md(), dst_md()});
|
||||
if (!ok) return status::unimplemented;
|
||||
|
||||
// Get memory desc to find sizes and dims
|
||||
|
@ -117,7 +117,8 @@ status_t brdgmm_dw_convolution_fwd_t<isa>::pd_t::init(engine_t *engine) {
|
||||
one_of(bia_type, data_type::undef, f32, s32, s8, u8))
|
||||
&& IMPLICATION(!is_int8,
|
||||
one_of(bia_type, data_type::undef, src_type, dst_type))
|
||||
&& attr()->has_default_values(skip_mask) && !has_zero_dim_memory();
|
||||
&& attr()->has_default_values(skip_mask) && !has_zero_dim_memory()
|
||||
&& impl::is_dense_format_kind({src_md(), weights_md(), dst_md()});
|
||||
if (!ok) { return status::unimplemented; }
|
||||
|
||||
auto &jcp = jcp_;
|
||||
|
@ -67,7 +67,8 @@ status_t brgemm_1x1_convolution_fwd_t<isa>::pd_t::init(engine_t *engine) {
|
||||
one_of(bias_md_.data_type, data_type::undef, f32, src_type))
|
||||
&& attr()->has_default_values(skip_mask, dst_type)
|
||||
&& attr()->post_ops_.check_sum_consistency(dst_type, is_int8)
|
||||
&& !has_zero_dim_memory() && zero_points_ok() && arg_scales_ok();
|
||||
&& !has_zero_dim_memory() && zero_points_ok() && arg_scales_ok()
|
||||
&& impl::is_dense_format_kind({src_md(), weights_md(), dst_md()});
|
||||
if (!ok) return status::unimplemented;
|
||||
|
||||
CHECK(brgemm_convolution_utils::init_1x1_conf(jcp_, isa, *desc(), src_md_,
|
||||
|
@ -322,7 +322,8 @@ status_t brgemm_convolution_fwd_t<isa>::pd_t::init(engine_t *engine) {
|
||||
one_of(bias_md_.data_type, data_type::undef, f32, src_type))
|
||||
&& attr()->has_default_values(skip_mask, dst_type)
|
||||
&& attr()->post_ops_.check_sum_consistency(dst_type, is_int8)
|
||||
&& !has_zero_dim_memory() && zero_points_ok() && arg_scales_ok();
|
||||
&& !has_zero_dim_memory() && zero_points_ok() && arg_scales_ok()
|
||||
&& impl::is_dense_format_kind({src_md(), weights_md(), dst_md()});
|
||||
if (!ok) return status::unimplemented;
|
||||
|
||||
CHECK(brgemm_convolution_utils::init_conf(jcp_, isa, *desc(), src_md_,
|
||||
|
@ -108,6 +108,10 @@ status_t brgemm_convolution_bwd_t<isa>::pd_t::init(engine_t *engine) {
|
||||
VERBOSE_BAD_ALGORITHM);
|
||||
VDISPATCH_CONV(!has_zero_dim_memory(), VERBOSE_EMPTY_TENSOR, "");
|
||||
VDISPATCH_CONV(attr()->has_default_values(), VERBOSE_UNSUPPORTED_ATTR);
|
||||
VDISPATCH_CONV(
|
||||
impl::is_dense_format_kind({src_md(), diff_src_md(), weights_md(0),
|
||||
weights_md(1), dst_md(), diff_dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
convolution_desc_t fwd_conv_d = convolution_desc_t();
|
||||
CHECK(fwd_conv_desc_create(&fwd_conv_d, desc()));
|
||||
|
@ -2288,7 +2288,8 @@ status_t jit_uni_batch_normalization_fwd_t<isa>::pd_t::init(engine_t *engine) {
|
||||
&& (attr()->has_default_values()
|
||||
|| with_relu_post_op(is_training()))
|
||||
&& set_default_formats_common()
|
||||
&& memory_desc_wrapper(src_md()) == memory_desc_wrapper(dst_md());
|
||||
&& memory_desc_wrapper(src_md()) == memory_desc_wrapper(dst_md())
|
||||
&& impl::is_dense_format_kind({src_md(), dst_md()});
|
||||
if (!ok) return status::unimplemented;
|
||||
|
||||
// BN+Add+Relu fusion is not currently implemented
|
||||
@ -2394,7 +2395,9 @@ status_t jit_uni_batch_normalization_bwd_t<isa>::pd_t::init(engine_t *engine) {
|
||||
&& check_scale_shift_data_type() && attr()->has_default_values()
|
||||
&& set_default_formats_common()
|
||||
&& memory_desc_wrapper(diff_src_md())
|
||||
== memory_desc_wrapper(diff_dst_md());
|
||||
== memory_desc_wrapper(diff_dst_md())
|
||||
&& impl::is_dense_format_kind(
|
||||
{src_md(), diff_src_md(), dst_md(), diff_dst_md()});
|
||||
if (!ok) return status::unimplemented;
|
||||
|
||||
// BN+Add+Relu fusion is not currently implemented
|
||||
|
@ -419,7 +419,8 @@ status_t jit_uni_batch_normalization_s8_fwd_t<isa>::pd_t::init(
|
||||
&& memory_desc_matches_tag(*src_md(), desired_fmt_tag)
|
||||
&& (attr()->has_default_values() || this->with_relu_post_op(false))
|
||||
&& set_default_formats_common()
|
||||
&& memory_desc_wrapper(src_md()) == memory_desc_wrapper(dst_md());
|
||||
&& memory_desc_wrapper(src_md()) == memory_desc_wrapper(dst_md())
|
||||
&& impl::is_dense_format_kind({src_md(), dst_md()});
|
||||
if (!ok) return status::unimplemented;
|
||||
|
||||
// BN+Add+Relu fusion is not currently implemented
|
||||
|
@ -354,7 +354,7 @@ status_t jit_uni_eltwise_int_fwd_t<isa, d_type>::pd_t::init(engine_t *engine) {
|
||||
&& utils::one_of(desc()->alg_kind, alg_kind::eltwise_relu,
|
||||
alg_kind::eltwise_linear)
|
||||
&& !has_zero_dim_memory()
|
||||
&& memory_desc_wrapper(src_md()).is_dense(true)
|
||||
&& impl::is_dense_format_kind({src_md(), dst_md()})
|
||||
&& attr()->has_default_values() && set_default_formats_common()
|
||||
&& memory_desc_wrapper(src_md()) == memory_desc_wrapper(dst_md());
|
||||
|
||||
|
@ -64,6 +64,7 @@ status_t acl_lowp_matmul_t::pd_t::init(engine_t *engine) {
|
||||
VDISPATCH_MATMUL(attr()->has_default_values(smask_t::scales
|
||||
| smask_t::zero_points | smask_t::post_ops),
|
||||
"only scale, zero point and post-ops attrs supported");
|
||||
VDISPATCH_MATMUL(is_dense_format_kind(), VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
static const std::vector<int> supported_args {
|
||||
DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST};
|
||||
|
@ -51,6 +51,7 @@ status_t acl_lowp_matmul_sq_t::pd_t::init(engine_t *engine) {
|
||||
VDISPATCH_MATMUL(attr()->has_default_values(smask_t::scales
|
||||
| smask_t::zero_points | smask_t::post_ops),
|
||||
"only scale, zero point and post-ops attrs supported");
|
||||
VDISPATCH_MATMUL(is_dense_format_kind(), VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
static const std::vector<int> supported_args {
|
||||
DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST};
|
||||
|
@ -636,6 +636,7 @@ status_t jit_int8_matmul_t::pd_t::init(engine_t *engine) {
|
||||
|
||||
VDISPATCH_MATMUL(
|
||||
no_runtime_dims_or_strides, VERBOSE_RUNTIMEDIM_UNSUPPORTED);
|
||||
VDISPATCH_MATMUL(is_dense_format_kind(), VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
bool is_s8_wei = utils::everyone_is(s8, wei_type);
|
||||
bool is_u8 = utils::everyone_is(u8, src_type, wei_type);
|
||||
|
@ -40,6 +40,10 @@ status_t jit_uni_shuffle_t<isa>::pd_t::init(engine_t *engine) {
|
||||
const memory_desc_wrapper src_d(is_fwd() ? src_md() : diff_src_md());
|
||||
const memory_desc_wrapper dst_d(is_fwd() ? dst_md() : diff_dst_md());
|
||||
|
||||
if (!impl::is_dense_format_kind({is_fwd() ? src_md() : diff_src_md(),
|
||||
is_fwd() ? dst_md() : diff_dst_md()}))
|
||||
return status::unimplemented;
|
||||
|
||||
conf_.data_type = src_d.data_type();
|
||||
|
||||
const bool ok = is_superset(get_max_cpu_isa(), isa)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2019-2024 Intel Corporation
|
||||
* Copyright 2019-2025 Intel Corporation
|
||||
* Copyright 2024-2025 FUJITSU LIMITED
|
||||
* Copyright 2021-2025 Arm Ltd. and affiliates
|
||||
*
|
||||
@ -51,30 +51,8 @@ namespace {
|
||||
using namespace dnnl::impl::data_type;
|
||||
using namespace dnnl::impl::cpu::matmul;
|
||||
|
||||
// Some compilers do not allow guarding implementations with macros
|
||||
// in the impl list.
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
|
||||
#define CPU_INSTANCE_SPARSE(...) \
|
||||
impl_list_item_t( \
|
||||
impl_list_item_t::type_deduction_helper_t<__VA_ARGS__::pd_t>()),
|
||||
|
||||
#if DNNL_X64
|
||||
#define CPU_INSTANCE_SPARSE_X64(...) \
|
||||
impl_list_item_t( \
|
||||
impl_list_item_t::type_deduction_helper_t<__VA_ARGS__::pd_t>()),
|
||||
#else
|
||||
#define CPU_INSTANCE_SPARSE_X64(...)
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define CPU_INSTANCE_SPARSE(...)
|
||||
#define CPU_INSTANCE_SPARSE_X64(...)
|
||||
#endif
|
||||
|
||||
// clang-format off
|
||||
constexpr impl_list_item_t impl_list[] = REG_MATMUL_P({
|
||||
|
||||
CPU_INSTANCE_AARCH64(brgemm_matmul_t<sve_512>)
|
||||
CPU_INSTANCE_AARCH64_ACL(acl_lowp_matmul_sq_t)
|
||||
CPU_INSTANCE_AARCH64_ACL(acl_lowp_matmul_t)
|
||||
@ -96,19 +74,14 @@ constexpr impl_list_item_t impl_list[] = REG_MATMUL_P({
|
||||
CPU_INSTANCE_AVX2(brgemm_matmul_t<avx2>)
|
||||
CPU_INSTANCE(ref_matmul_t)
|
||||
CPU_INSTANCE(ref_matmul_int8_t)
|
||||
// These implementations are enabled only when DNNL_EXPERIMENTAL_SPARSE
|
||||
// macro is defined.
|
||||
CPU_INSTANCE_SPARSE_X64(jit_uni_sparse_matmul_t)
|
||||
CPU_INSTANCE_SPARSE(ref_sparse_matmul_t)
|
||||
CPU_INSTANCE_X64(jit_uni_sparse_matmul_t)
|
||||
CPU_INSTANCE(ref_sparse_matmul_t)
|
||||
/* eol */
|
||||
nullptr,
|
||||
});
|
||||
// clang-format on
|
||||
} // namespace
|
||||
|
||||
#undef CPU_INSTANCE_SPARSE
|
||||
#undef CPU_INSTANCE_SPARSE_X64
|
||||
|
||||
const impl_list_item_t *get_matmul_impl_list(const matmul_desc_t *desc) {
|
||||
UNUSED(desc);
|
||||
return impl_list;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2020-2024 Intel Corporation
|
||||
* Copyright 2020-2025 Intel Corporation
|
||||
* Copyright 2023 Arm Ltd. and affiliates
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -99,24 +99,6 @@ extern const impl_list_map_t &comp_s8_s8_impl_list_map();
|
||||
|
||||
// clang-format off
|
||||
|
||||
// Some compilers do not allow guarding implementations with macros
|
||||
// in the impl list.
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
|
||||
#if DNNL_X64
|
||||
#define REG_SPARSE_SR_X64(idt, ifmt, odt, ofmt) \
|
||||
impl_list_item_t(impl_list_item_t::reorder_type_deduction_helper_t< \
|
||||
simple_sparse_reorder_t<idt, \
|
||||
std::remove_const<decltype(ifmt)>::type, ifmt, odt, \
|
||||
std::remove_const<decltype(ofmt)>::type, ofmt>::pd_t>()),
|
||||
#else
|
||||
#define REG_SPARSE_SR_X64(...)
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define REG_SPARSE_SR_X64(...)
|
||||
#endif
|
||||
|
||||
#define REG_SR(idt, ifmt, odt, ofmt, ...) \
|
||||
impl_list_item_t(impl_list_item_t::reorder_type_deduction_helper_t< \
|
||||
simple_reorder_t<idt, ifmt, odt, ofmt, __VA_ARGS__>::pd_t>()),
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2020-2024 Intel Corporation
|
||||
* Copyright 2020-2025 Intel Corporation
|
||||
* Copyright 2022 FUJITSU LIMITED
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -46,7 +46,7 @@ const impl_list_map_t ®ular_f32_s8_impl_list_map() {
|
||||
|
||||
REG_SR(f32, any, s8, any, fmt_order::any, spec::reference)
|
||||
|
||||
REG_SPARSE_SR_X64(f32, any, s8, any)
|
||||
DNNL_X64_ONLY(CPU_REORDER_INSTANCE(simple_sparse_reorder_t<f32, impl::format_tag_t, any, s8, impl::format_tag_t, any>))
|
||||
|
||||
nullptr,
|
||||
}},
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2020-2024 Intel Corporation
|
||||
* Copyright 2020-2025 Intel Corporation
|
||||
* Copyright 2022 FUJITSU LIMITED
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -66,7 +66,7 @@ const impl_list_map_t ®ular_s8_impl_list_map() {
|
||||
REG_SR(s8, any, s8, any, fmt_order::any, spec::reference)
|
||||
REG_SR(s8, any, u8, any, fmt_order::any, spec::reference)
|
||||
|
||||
REG_SPARSE_SR_X64(s8, any, s8, any)
|
||||
DNNL_X64_ONLY(CPU_REORDER_INSTANCE(simple_sparse_reorder_t<s8, impl::format_tag_t, any, s8, impl::format_tag_t, any>))
|
||||
|
||||
nullptr,
|
||||
}},
|
||||
|
@ -63,6 +63,8 @@ status_t simple_layer_normalization_fwd_t::pd_t::init(engine_t *engine) {
|
||||
// plain format, last logical dim is last physical
|
||||
VDISPATCH_LNORM(src_d.blocking_desc().strides[ndims() - 1] == 1,
|
||||
VERBOSE_BLOCKING_FAIL, "bad stride value");
|
||||
VDISPATCH_LNORM(impl::is_dense_format_kind({src_md(), dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
CHECK(fill_compatible_stats_md(*src_md(), reordered_stat_md_));
|
||||
|
||||
@ -264,6 +266,9 @@ status_t simple_layer_normalization_bwd_t::pd_t::init(engine_t *engine) {
|
||||
// plain format, last logical dim is last physical
|
||||
VDISPATCH_LNORM(src_d.blocking_desc().strides[ndims() - 1] == 1,
|
||||
VERBOSE_BLOCKING_FAIL, "bad stride value");
|
||||
VDISPATCH_LNORM(impl::is_dense_format_kind(
|
||||
{src_md(), diff_src_md(), dst_md(), diff_dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
CHECK(fill_compatible_stats_md(*src_md(), reordered_stat_md_));
|
||||
|
||||
|
@ -164,6 +164,10 @@ status_t brdgmm_dw_convolution_fwd_t::pd_t::init(engine_t *engine) {
|
||||
const memory_desc_wrapper dst_d(&dst_md_);
|
||||
const memory_desc_wrapper bias_d(&bias_md_);
|
||||
|
||||
VDISPATCH_CONV(
|
||||
impl::is_dense_format_kind({src_md(), weights_md(), dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
// Big int (> INT_MAX) values are unsupported and jcp fields may overflow
|
||||
// TODO: change data type of jcp fields to size_t
|
||||
VDISPATCH_CONV_IC(!has_large_size(cd, src_d, weights_d, dst_d),
|
||||
|
@ -52,6 +52,10 @@ status_t brgemm_1x1_convolution_fwd_t<isa>::pd_t::init(engine_t *engine) {
|
||||
const auto dst_type = dst_md(0)->data_type;
|
||||
const bool is_int8 = one_of(src_type, u8, s8);
|
||||
|
||||
VDISPATCH_CONV(
|
||||
impl::is_dense_format_kind({src_md(), weights_md(), dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
using skip_mask_t = primitive_attr_t::skip_mask_t;
|
||||
auto skip_mask = skip_mask_t::post_ops | skip_mask_t::sum_dt
|
||||
| skip_mask_t::zero_points | skip_mask_t::fpmath_mode;
|
||||
|
@ -384,6 +384,9 @@ status_t brgemm_convolution_fwd_t<isa>::pd_t::init(engine_t *engine) {
|
||||
VERBOSE_UNSUPPORTED_POSTOP);
|
||||
VDISPATCH_CONV(zero_points_ok(), VERBOSE_UNSUPPORTED_ZP_CFG);
|
||||
VDISPATCH_CONV(arg_scales_ok(), VERBOSE_UNSUPPORTED_SCALES_CFG);
|
||||
VDISPATCH_CONV(
|
||||
impl::is_dense_format_kind({src_md(0), weights_md(0), dst_md(0)}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
CHECK(brgemm_convolution_utils::init_conf(jcp_, isa, *desc(), src_md_,
|
||||
weights_md_, dst_md_, bias_md_, attr_, dnnl_get_max_threads()));
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2022-2024 Intel Corporation
|
||||
* Copyright 2022-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -110,6 +110,9 @@ status_t brgemm_convolution_bwd_t<isa>::pd_t::init(engine_t *engine) {
|
||||
VERBOSE_BAD_ALGORITHM);
|
||||
VDISPATCH_CONV(!has_zero_dim_memory(), VERBOSE_EMPTY_TENSOR, "");
|
||||
VDISPATCH_CONV(attr()->has_default_values(), VERBOSE_UNSUPPORTED_ATTR);
|
||||
VDISPATCH_CONV(impl::is_dense_format_kind({src_md(0), diff_weights_md(0),
|
||||
diff_weights_md(1), diff_dst_md(0), dst_md(0)}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
convolution_desc_t fwd_conv_d = convolution_desc_t();
|
||||
CHECK(fwd_conv_desc_create(&fwd_conv_d, desc()));
|
||||
|
@ -114,6 +114,9 @@ status_t brgemm_convolution_bwd_strided_t<isa>::pd_t::init(engine_t *engine) {
|
||||
&& everyone_is(f32, diff_src_type, diff_dst_type)
|
||||
&& IMPLICATION(with_bias(), bias_md_.data_type == f32);
|
||||
|
||||
VDISPATCH_CONV(
|
||||
impl::is_dense_format_kind({src_md(), weights_md(), dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
VDISPATCH_CONV(is_bwd_d(), VERBOSE_BAD_PROPKIND);
|
||||
VDISPATCH_CONV(
|
||||
impl_supports_datatype(diff_src_type), VERBOSE_UNSUPPORTED_DT);
|
||||
|
@ -58,6 +58,9 @@ status_t brgemm_convolution_bwd_weights_t::pd_t::init(engine_t *engine) {
|
||||
VERBOSE_BAD_ALGORITHM);
|
||||
VDISPATCH_CONV(!has_zero_dim_memory(), VERBOSE_EMPTY_TENSOR, "");
|
||||
VDISPATCH_CONV(attr()->has_default_values(), VERBOSE_UNSUPPORTED_ATTR);
|
||||
VDISPATCH_CONV(impl::is_dense_format_kind({src_md(0), diff_weights_md(0),
|
||||
diff_weights_md(1), diff_dst_md(0)}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
auto scratchpad = scratchpad_registry().registrar();
|
||||
|
||||
|
@ -176,6 +176,10 @@ status_t brgemm_deconvolution_fwd_t<isa>::pd_t::init(engine_t *engine) {
|
||||
VDISPATCH_DECONVOLUTION(post_ops_ok(), VERBOSE_UNSUPPORTED_POSTOP);
|
||||
VDISPATCH_DECONVOLUTION(zero_points_ok(), VERBOSE_UNSUPPORTED_ZP_CFG);
|
||||
VDISPATCH_DECONVOLUTION(!has_zero_dim_memory(), VERBOSE_EMPTY_TENSOR, "");
|
||||
VDISPATCH_DECONVOLUTION(
|
||||
impl::is_dense_format_kind({src_md(0), diff_weights_md(0),
|
||||
diff_weights_md(1), diff_dst_md(0), dst_md(0)}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
convolution_desc_t conv_d = convolution_desc_t();
|
||||
|
||||
|
@ -2382,6 +2382,8 @@ status_t jit_uni_batch_normalization_fwd_t<isa>::pd_t::init(engine_t *engine) {
|
||||
VDISPATCH_BNORM(
|
||||
memory_desc_wrapper(src_md()) == memory_desc_wrapper(dst_md()),
|
||||
VERBOSE_INCONSISTENT_MDS, "src", "dst");
|
||||
VDISPATCH_BNORM(impl::is_dense_format_kind({src_md(), dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
// BN+Add+Relu fusion is not currently implemented
|
||||
VDISPATCH_BNORM(!fuse_norm_add_relu(), VERBOSE_UNSUPPORTED_FEATURE,
|
||||
@ -2518,6 +2520,10 @@ status_t jit_uni_batch_normalization_bwd_t<isa>::pd_t::init(engine_t *engine) {
|
||||
== memory_desc_wrapper(diff_dst_md()),
|
||||
VERBOSE_INCONSISTENT_MDS, "diff_src", "diff_dst");
|
||||
|
||||
VDISPATCH_BNORM(impl::is_dense_format_kind(
|
||||
{src_md(), diff_src_md(), dst_md(), diff_dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
// BN+Add+Relu fusion is not currently implemented
|
||||
VDISPATCH_BNORM(!(fuse_norm_add_relu()), VERBOSE_UNSUPPORTED_FEATURE,
|
||||
"sum+relu post-ops configuration is not supported");
|
||||
|
@ -719,6 +719,8 @@ status_t jit_uni_batch_normalization_s8_fwd_t<isa>::pd_t::init(
|
||||
VDISPATCH_BNORM(
|
||||
memory_desc_wrapper(src_md()) == memory_desc_wrapper(dst_md()),
|
||||
VERBOSE_INCONSISTENT_MDS, "src", "dst");
|
||||
VDISPATCH_BNORM(impl::is_dense_format_kind({src_md(), dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
// BN+Add+Relu fusion is not currently implemented
|
||||
VDISPATCH_BNORM(!fuse_norm_add_relu(), VERBOSE_UNSUPPORTED_FEATURE,
|
||||
|
@ -819,6 +819,8 @@ status_t jit_uni_group_normalization_fwd_t::pd_t::init(engine_t *engine) {
|
||||
VDISPATCH_GNORM(
|
||||
memory_desc_matches_one_of_tag(*dst_md(), ndhwc, nhwc, nwc, nc),
|
||||
VERBOSE_UNSUPPORTED_TAG_S, "dst");
|
||||
VDISPATCH_GNORM(impl::is_dense_format_kind({src_md(), dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
// Instance Normalization is handled in a different implementation. This
|
||||
// implementation has some turns in the kernel that is done differently
|
||||
|
@ -691,6 +691,8 @@ status_t jit_uni_instance_normalization_fwd_t::pd_t::init(engine_t *engine) {
|
||||
VDISPATCH_GNORM(
|
||||
memory_desc_matches_one_of_tag(*dst_md(), ndhwc, nhwc, nwc, nc),
|
||||
VERBOSE_UNSUPPORTED_TAG_S, "dst");
|
||||
VDISPATCH_GNORM(impl::is_dense_format_kind({src_md(), dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
auto post_ops_ok = [&]() -> bool {
|
||||
const std::vector<injector::post_op_type> accepted_post_ops
|
||||
|
@ -1232,6 +1232,8 @@ status_t jit_uni_layer_normalization_fwd_t::pd_t::init(engine_t *engine) {
|
||||
// plain format, last logical dim is last physical
|
||||
VDISPATCH_LNORM(src_d.blocking_desc().strides[ndims() - 1] == 1,
|
||||
VERBOSE_BLOCKING_FAIL, "bad stride value");
|
||||
VDISPATCH_LNORM(impl::is_dense_format_kind({src_md(), dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
auto post_ops_ok = [&]() -> bool {
|
||||
const std::vector<injector::post_op_type> accepted_post_ops
|
||||
|
@ -232,6 +232,9 @@ status_t jit_uni_ncsp_convolution_fwd_t::pd_t::init(engine_t *engine) {
|
||||
VDISPATCH_CONV(IMPLICATION(with_bias(), weights_md(1)->data_type == f32),
|
||||
VERBOSE_UNSUPPORTED_DT);
|
||||
VDISPATCH_CONV(mayiuse(avx512_core), VERBOSE_UNSUPPORTED_ISA);
|
||||
VDISPATCH_CONV(
|
||||
impl::is_dense_format_kind({src_md(), weights_md(), dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
reduction_helper_ = reduction_helper_t(this);
|
||||
// TODO: Support attributes in matmul-based convolution.
|
||||
@ -400,6 +403,9 @@ status_t jit_uni_ncsp_convolution_bwd_weights_t::pd_t::init(engine_t *engine) {
|
||||
: data_type::f32),
|
||||
VERBOSE_UNSUPPORTED_DT);
|
||||
VDISPATCH_CONV(mayiuse(avx512_core), VERBOSE_UNSUPPORTED_ISA);
|
||||
VDISPATCH_CONV(impl::is_dense_format_kind(
|
||||
{src_md(), diff_src_md(), dst_md(), diff_dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
CHECK(init_convolution(engine));
|
||||
init_name();
|
||||
@ -552,6 +558,10 @@ status_t jit_uni_ncsp_convolution_bwd_data_t::pd_t::init(engine_t *engine) {
|
||||
diff_dst_md()->data_type, weights_md(0)->data_type),
|
||||
VERBOSE_UNSUPPORTED_DT);
|
||||
VDISPATCH_CONV(mayiuse(avx512_core), VERBOSE_UNSUPPORTED_ISA);
|
||||
VDISPATCH_CONV(
|
||||
impl::is_dense_format_kind({src_md(), diff_src_md(), weights_md(0),
|
||||
weights_md(1), dst_md(), diff_dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
if (one_of(data_type::bf16, diff_dst_md_.data_type, weights_md_.data_type)
|
||||
&& !mayiuse(avx512_core_bf16))
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2021-2024 Intel Corporation
|
||||
* Copyright 2021-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -75,6 +75,8 @@ status_t jit_uni_reduction_t::pd_t::init(engine_t *engine) {
|
||||
attr()->has_default_values(sm::post_ops), VERBOSE_UNSUPPORTED_ATTR);
|
||||
VDISPATCH_REDUCTION(attr_.set_default_formats(dst_md(0)) == status::success,
|
||||
VERBOSE_UNSUPPORTED_POSTOP);
|
||||
VDISPATCH_REDUCTION(impl::is_dense_format_kind({src_md(), dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
const auto src_mdw = memory_desc_wrapper(src_md());
|
||||
const auto dst_mdw = memory_desc_wrapper(dst_md());
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2020-2024 Intel Corporation
|
||||
* Copyright 2020-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -95,6 +95,8 @@ status_t jit_uni_resampling_fwd_t::pd_t::init(engine_t *engine) {
|
||||
VERBOSE_UNSUPPORTED_POSTOP);
|
||||
VDISPATCH_RESAMPLING(memory_desc_matches_tag(*dst_md(), conf_.src_tag),
|
||||
VERBOSE_UNSUPPORTED_TAG_S, "dst");
|
||||
VDISPATCH_RESAMPLING(impl::is_dense_format_kind({src_md(), dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
conf_.alg = desc()->alg_kind;
|
||||
conf_.c = C();
|
||||
|
@ -2478,6 +2478,8 @@ status_t jit_uni_tbb_batch_normalization_fwd_t<isa>::pd_t::init(
|
||||
VDISPATCH_BNORM(
|
||||
memory_desc_wrapper(src_md()) == memory_desc_wrapper(dst_md()),
|
||||
VERBOSE_INCONSISTENT_MDS, "src", "dst");
|
||||
VDISPATCH_BNORM(impl::is_dense_format_kind({src_md(), dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
// BN+Add+Relu fusion is not currently implemented
|
||||
VDISPATCH_BNORM(!(fuse_norm_add_relu()), VERBOSE_UNSUPPORTED_FEATURE,
|
||||
@ -2609,6 +2611,9 @@ status_t jit_uni_tbb_batch_normalization_bwd_t<isa>::pd_t::init(
|
||||
VDISPATCH_BNORM(memory_desc_wrapper(diff_src_md())
|
||||
== memory_desc_wrapper(diff_dst_md()),
|
||||
VERBOSE_INCONSISTENT_MDS, "diff_src", "diff_dst");
|
||||
VDISPATCH_BNORM(impl::is_dense_format_kind(
|
||||
{src_md(), diff_src_md(), dst_md(), diff_dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
// BN+Add+Relu fusion is not currently implemented
|
||||
VDISPATCH_BNORM(!(fuse_norm_add_relu()), VERBOSE_UNSUPPORTED_FEATURE,
|
||||
|
@ -1418,6 +1418,9 @@ status_t jit_uni_x8s8s32x_deconvolution_fwd_t<isa>::pd_t::init(
|
||||
| skip_mask_t::post_ops | skip_mask_t::zero_points),
|
||||
VERBOSE_UNSUPPORTED_ATTR);
|
||||
VDISPATCH_DECONVOLUTION(attr_scales_ok(), VERBOSE_UNSUPPORTED_SCALES_CFG);
|
||||
VDISPATCH_DECONVOLUTION(impl::is_dense_format_kind({src_md(0),
|
||||
weights_md(0), weights_md(1), dst_md(0)}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
CHECK(jit_uni_x8s8s32x_deconv_fwd_kernel<isa>::init_conf(jcp_, *desc(),
|
||||
src_md_, weights_md_, dst_md_, with_bias(), bias_md_, attr_,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2017-2024 Intel Corporation
|
||||
* Copyright 2017-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -55,6 +55,8 @@ status_t jit_avx512_common_lrn_fwd_t<d_type>::pd_t::init(engine_t *engine) {
|
||||
VDISPATCH_LRN(attr()->has_default_values(), VERBOSE_UNSUPPORTED_ATTR);
|
||||
VDISPATCH_LRN(set_default_formats_common(), VERBOSE_UNSUPPORTED_TAG);
|
||||
VDISPATCH_LRN(src_d == dst_d, VERBOSE_INCONSISTENT_MDS, "src", "dst");
|
||||
VDISPATCH_LRN(impl::is_dense_format_kind({src_md(), dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
const auto fmt_tag
|
||||
= src_d.matches_one_of_tag(format_tag::nhwc, format_tag::nChw16c);
|
||||
@ -120,6 +122,9 @@ status_t jit_avx512_common_lrn_bwd_t<d_type>::pd_t::init(engine_t *engine) {
|
||||
src_d == diff_dst_d, VERBOSE_INCONSISTENT_MDS, "src", "diff_dst");
|
||||
VDISPATCH_LRN(diff_dst_d == diff_src_d, VERBOSE_INCONSISTENT_MDS,
|
||||
"diff_src", "diff_dst");
|
||||
VDISPATCH_LRN(impl::is_dense_format_kind(
|
||||
{src_md(), diff_src_md(), diff_dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
const dims_t ws_dims = {MB(), C(), H(), 2 * W()};
|
||||
const auto fmt_tag
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2016-2024 Intel Corporation
|
||||
* Copyright 2016-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -191,6 +191,8 @@ status_t jit_uni_lrn_fwd_t<isa, d_type>::pd_t::init(engine_t *engine) {
|
||||
&& src_d.dims()[1] >= 2 * VECTOR_LENGTH,
|
||||
"src has inconsistent dimensions with vector length");
|
||||
VDISPATCH_LRN(desc()->lrn_beta == 0.75, VERBOSE_BAD_PARAM, "lrn_beta");
|
||||
VDISPATCH_LRN(impl::is_dense_format_kind({src_md(), dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
dat_tag_ = memory_desc_matches_one_of_tag(
|
||||
*src_md(), nChw16c, nChw8c, nchw, nhwc);
|
||||
@ -375,6 +377,9 @@ status_t jit_uni_lrn_bwd_t<isa, d_type>::pd_t::init(engine_t *engine) {
|
||||
&& src_d.dims()[1] >= 2 * VECTOR_LENGTH),
|
||||
"src has inconsistent dimensions with vector length");
|
||||
VDISPATCH_LRN(desc()->lrn_beta == 0.75, VERBOSE_BAD_PARAM, "lrn_beta");
|
||||
VDISPATCH_LRN(impl::is_dense_format_kind(
|
||||
{src_md(), diff_src_md(), diff_dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
dat_tag_ = memory_desc_matches_one_of_tag(
|
||||
*src_md(), nChw16c, nChw8c, nchw, nhwc);
|
||||
|
@ -65,6 +65,10 @@ status_t jit_uni_shuffle_t<isa>::pd_t::init(engine_t *engine) {
|
||||
VDISPATCH_SHUFFLE(axis() == 1, VERBOSE_BAD_AXIS);
|
||||
VDISPATCH_SHUFFLE(set_default_formats_common(), VERBOSE_UNSUPPORTED_TAG);
|
||||
VDISPATCH_SHUFFLE(src_d == dst_d, VERBOSE_INCONSISTENT_MDS, "src", "dst");
|
||||
VDISPATCH_SHUFFLE(
|
||||
impl::is_dense_format_kind({is_fwd() ? src_md() : diff_src_md(),
|
||||
is_fwd() ? dst_md() : diff_dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
conf_.isa = isa;
|
||||
if (isa == avx) conf_.isa = mayiuse(avx2) ? avx2 : avx;
|
||||
|
@ -49,6 +49,8 @@ status_t cross_engine_reorder_t::pd_t::init(impl::engine_t *engine,
|
||||
VERBOSE_BAD_ENGINE_KIND);
|
||||
VDISPATCH_REORDER(attr_ok(), VERBOSE_UNSUPPORTED_ATTR);
|
||||
VDISPATCH_REORDER(extra_ok(true), VERBOSE_UNSUPPORTED_MD_FLAG, "extra_ok");
|
||||
VDISPATCH_REORDER(impl::is_dense_format_kind({src_md(), dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
memory_desc_wrapper src_mdw(src_md());
|
||||
memory_desc_wrapper dst_mdw(dst_md());
|
||||
|
@ -48,6 +48,8 @@ status_t ref_group_normalization_fwd_t::pd_t::init(impl::engine_t *engine) {
|
||||
"scale / shift data type must be data_type::f32");
|
||||
VDISPATCH_GNORM(
|
||||
sycl_post_ops_t::post_ops_ok(attr()), VERBOSE_UNSUPPORTED_POSTOP);
|
||||
VDISPATCH_GNORM(impl::is_dense_format_kind({src_md(), dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
const auto &dims = src_mdw.dims();
|
||||
const auto num_groups = desc()->groups;
|
||||
@ -126,6 +128,9 @@ status_t ref_group_normalization_bwd_t::pd_t::init(impl::engine_t *engine) {
|
||||
VERBOSE_UNSUPPORTED_DT);
|
||||
VDISPATCH_GNORM(utils::one_of(diff_dst_mdw.data_type(), f32, bf16, f16),
|
||||
VERBOSE_UNSUPPORTED_DT);
|
||||
VDISPATCH_GNORM(impl::is_dense_format_kind(
|
||||
{src_md(), diff_src_md(), diff_dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
auto device = utils::downcast<const impl::xpu::sycl::engine_impl_t *>(
|
||||
engine->impl())
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2024 Intel Corporation
|
||||
* Copyright 2024-2025 Intel Corporation
|
||||
* Copyright 2024 Codeplay Software Limited
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -190,6 +190,9 @@ status_t ref_inner_product_fwd_t::pd_t::init(impl::engine_t *engine) {
|
||||
"weight memory descriptor is not a plain memory format");
|
||||
VDISPATCH_INNER_PRODUCT(dst_wrapper.is_plain(),
|
||||
"destination memory descriptor is not a plain memory format");
|
||||
VDISPATCH_INNER_PRODUCT(
|
||||
impl::is_dense_format_kind({src_md(), weights_md(), dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
// if anything contains a zero dimension, return success as this will be converted
|
||||
// to a no-op
|
||||
@ -340,6 +343,9 @@ status_t ref_inner_product_bwd_data_t::pd_t::init(impl::engine_t *engine) {
|
||||
src_wrapper.is_plain(), "Blocked memory format is not supported");
|
||||
VDISPATCH_INNER_PRODUCT(
|
||||
dst_wrapper.is_plain(), "Blocked memory format is not supported");
|
||||
VDISPATCH_INNER_PRODUCT(impl::is_dense_format_kind({diff_src_md(),
|
||||
weights_md(), diff_dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
if (src_wrapper.has_zero_dim() || wei_wrapper.has_zero_dim()
|
||||
|| dst_wrapper.has_zero_dim()) {
|
||||
@ -472,6 +478,9 @@ status_t ref_inner_product_bwd_weights_t::pd_t::init(impl::engine_t *engine) {
|
||||
wei_wrapper.is_plain(), "blocked memory format is not supported");
|
||||
VDISPATCH_INNER_PRODUCT(
|
||||
dst_wrapper.is_plain(), "blocked memory format is not supported");
|
||||
VDISPATCH_INNER_PRODUCT(
|
||||
impl::is_dense_format_kind({src_md(), weights_md(), dst_md()}),
|
||||
VERBOSE_UNSUPPORTED_SPARSE_CFG);
|
||||
|
||||
format_tag_t wei_format_tag = format_tag::ab;
|
||||
format_tag_t dst_format_tag = format_tag::ab;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2019-2024 Intel Corporation
|
||||
* Copyright 2019-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -29,7 +29,6 @@
|
||||
using namespace dnnl::impl;
|
||||
using namespace dnnl::impl::xpu::ocl;
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
status_t dnnl_ocl_interop_memory_create_v2(memory_t **memory,
|
||||
const memory_desc_t *md, engine_t *engine, memory_kind_t memory_kind,
|
||||
int nhandles, void **handles) {
|
||||
@ -83,7 +82,6 @@ status_t dnnl_ocl_interop_memory_create_v2(memory_t **memory,
|
||||
return safe_ptr_assign(
|
||||
*memory, new memory_t(engine, md, std::move(mem_storages)));
|
||||
}
|
||||
#endif
|
||||
|
||||
status_t dnnl_ocl_interop_memory_create(memory_t **memory,
|
||||
const memory_desc_t *md, engine_t *engine, memory_kind_t memory_kind,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2020-2024 Intel Corporation
|
||||
* Copyright 2020-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -35,7 +35,6 @@ using dnnl::impl::status_t;
|
||||
using ::sycl::context;
|
||||
using ::sycl::get_pointer_type;
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
status_t dnnl_sycl_interop_memory_create_v2(memory_t **memory,
|
||||
const memory_desc_t *md, engine_t *engine, memory_kind_t memory_kind,
|
||||
int nhandles, void **handles) {
|
||||
@ -94,7 +93,6 @@ status_t dnnl_sycl_interop_memory_create_v2(memory_t **memory,
|
||||
return safe_ptr_assign(
|
||||
*memory, new memory_t(engine, md, std::move(mem_storages)));
|
||||
}
|
||||
#endif
|
||||
|
||||
status_t dnnl_sycl_interop_memory_create(memory_t **memory,
|
||||
const memory_desc_t *md, engine_t *engine, memory_kind_t memory_kind,
|
||||
|
@ -170,10 +170,6 @@ foreach(driver ${all_drivers})
|
||||
register_all_tests(cpu "${driver}" "${test_files_smoke}")
|
||||
endif()
|
||||
elseif(DNNL_TEST_SET_COVERAGE EQUAL DNNL_TEST_SET_CI)
|
||||
if(NOT DNNL_EXPERIMENTAL_SPARSE)
|
||||
list(REMOVE_ITEM test_files_ci "test_matmul_sparse_ci")
|
||||
endif()
|
||||
|
||||
# gpu_ci files may happen if cpu coverage can not be used on gpu
|
||||
# Filter out gpu_ci inputs from ci
|
||||
foreach(test_file ${test_files_gpu_ci})
|
||||
@ -192,11 +188,6 @@ foreach(driver ${all_drivers})
|
||||
register_all_tests(cpu "${driver}" "${test_files_ci}")
|
||||
endif()
|
||||
elseif(DNNL_TEST_SET_COVERAGE EQUAL DNNL_TEST_SET_NIGHTLY)
|
||||
if(NOT DNNL_EXPERIMENTAL_SPARSE)
|
||||
list(REMOVE_ITEM test_files_cpu "test_matmul_sparse")
|
||||
list(REMOVE_ITEM test_files_gpu "test_matmul_sparse_gpu")
|
||||
endif()
|
||||
|
||||
## Filter out gpu, large cpu and invalid inputs from cpu
|
||||
foreach(test_file ${test_files_large_cpu} ${test_files_gpu_ci}
|
||||
${test_files_gpu} ${test_files_ci} ${test_files_smoke})
|
||||
|
@ -833,7 +833,6 @@ std::ostream &operator<<(std::ostream &s, const attr_t &attr) {
|
||||
return s;
|
||||
}
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
std::ostream &operator<<(std::ostream &s, dnnl_sparse_encoding_t se) {
|
||||
s << sparse_encoding2str(se);
|
||||
return s;
|
||||
@ -861,7 +860,6 @@ std::ostream &operator<<(
|
||||
}
|
||||
return s;
|
||||
}
|
||||
#endif
|
||||
|
||||
std::ostream &operator<<(std::ostream &s, memory_kind_ext_t memory_kind) {
|
||||
switch (memory_kind) {
|
||||
@ -1714,7 +1712,6 @@ void update_cpu_ref_attrs(attr_t &attr, dnnl_data_type_t dst_dt) {
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
int sparse_options_t::from_str(const std::string &s) {
|
||||
*this = sparse_options_t();
|
||||
if (s.empty()) return OK;
|
||||
@ -1759,4 +1756,3 @@ int sparse_options_t::from_str(const std::string &s) {
|
||||
static const int expected_num_options = 3;
|
||||
return options_count == expected_num_options ? OK : FAIL;
|
||||
}
|
||||
#endif
|
||||
|
@ -482,7 +482,6 @@ struct isa_hints_t {
|
||||
|
||||
using policy_t = attr_t::policy_t;
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
struct sparse_options_t {
|
||||
static constexpr dnnl_sparse_encoding_t def_encoding
|
||||
= dnnl_sparse_encoding_undef;
|
||||
@ -552,8 +551,6 @@ private:
|
||||
|
||||
std::ostream &operator<<(
|
||||
std::ostream &s, const sparse_options_t &sparse_options);
|
||||
#endif
|
||||
|
||||
std::ostream &operator<<(std::ostream &s, const policy_t &policy);
|
||||
std::ostream &operator<<(
|
||||
std::ostream &s, const attr_t::zero_points_t &zero_points);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2017-2024 Intel Corporation
|
||||
* Copyright 2017-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -25,9 +25,7 @@
|
||||
#include "oneapi/dnnl/dnnl.h"
|
||||
|
||||
dnnl_data_type_t str2dt(const char *str);
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
dnnl_sparse_encoding_t str2sparse_encoding(const char *str);
|
||||
#endif
|
||||
dnnl_format_tag_t str2fmt_tag(const char *str);
|
||||
|
||||
/* status */
|
||||
@ -40,9 +38,7 @@ const char *dt2str(dnnl_data_type_t dt);
|
||||
const char *fmt_tag2str(dnnl_format_tag_t tag);
|
||||
|
||||
/* encoding */
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
const char *sparse_encoding2str(dnnl_sparse_encoding_t encoding);
|
||||
#endif
|
||||
|
||||
/* engine kind */
|
||||
const char *engine_kind2str(dnnl_engine_kind_t kind);
|
||||
|
@ -60,7 +60,6 @@ dnnl_data_type_t str2dt(const char *str) {
|
||||
return dnnl_data_type_undef;
|
||||
}
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
dnnl_sparse_encoding_t str2sparse_encoding(const char *str) {
|
||||
#define CASE(_case) do { \
|
||||
if (!strcmp(STRINGIFY(_case), str) \
|
||||
@ -78,7 +77,6 @@ dnnl_sparse_encoding_t str2sparse_encoding(const char *str) {
|
||||
return dnnl_sparse_encoding_undef;
|
||||
}
|
||||
|
||||
#endif
|
||||
dnnl_format_tag_t str2fmt_tag(const char *str) {
|
||||
#define CASE(_case) do { \
|
||||
if (!strcmp(STRINGIFY(_case), str) \
|
||||
@ -1731,11 +1729,9 @@ const char *fmt_tag2str(dnnl_format_tag_t tag) {
|
||||
return dnnl_fmt_tag2str(tag);
|
||||
}
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
const char *sparse_encoding2str(dnnl_sparse_encoding_t encoding) {
|
||||
return dnnl_sparse_encoding2str(encoding);
|
||||
}
|
||||
#endif
|
||||
|
||||
const char *engine_kind2str(dnnl_engine_kind_t kind) {
|
||||
return dnnl_engine_kind2str(kind);
|
||||
|
@ -409,41 +409,25 @@ static int init_memory(
|
||||
|
||||
const int nhandles = query_md_num_handles(md);
|
||||
std::vector<void *> handles(nhandles);
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
for (int i = 0; i < nhandles; i++)
|
||||
DNN_SAFE(dnnl_memory_get_data_handle_v2(mem, &handles[i], i), CRIT);
|
||||
#else
|
||||
DNN_SAFE(dnnl_memory_get_data_handle(mem, &handles[0]), CRIT);
|
||||
#endif
|
||||
|
||||
if (is_opencl) {
|
||||
#if DNNL_GPU_RUNTIME == DNNL_RUNTIME_OCL
|
||||
dnnl_ocl_interop_memory_kind_t mem_kind;
|
||||
DNN_SAFE(dnnl_ocl_interop_memory_get_memory_kind(mem, &mem_kind), CRIT);
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
DNN_SAFE(dnnl_ocl_interop_memory_create_v2(ret, md, engine, mem_kind,
|
||||
(int)handles.size(), handles.data()),
|
||||
CRIT);
|
||||
#else
|
||||
DNN_SAFE(dnnl_ocl_interop_memory_create(
|
||||
ret, md, engine, mem_kind, handles[0]),
|
||||
CRIT);
|
||||
#endif
|
||||
#endif
|
||||
} else if (is_sycl) {
|
||||
#ifdef DNNL_WITH_SYCL
|
||||
dnnl_sycl_interop_memory_kind_t mem_kind;
|
||||
DNN_SAFE(
|
||||
dnnl_sycl_interop_memory_get_memory_kind(mem, &mem_kind), CRIT);
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
DNN_SAFE(dnnl_sycl_interop_memory_create_v2(ret, md, engine, mem_kind,
|
||||
(int)handles.size(), handles.data()),
|
||||
CRIT);
|
||||
#else
|
||||
DNN_SAFE(dnnl_sycl_interop_memory_create(
|
||||
ret, md, engine, mem_kind, handles[0]),
|
||||
CRIT);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -463,19 +447,10 @@ void dnn_mem_t::map() const {
|
||||
const int nhandles = query_md_num_handles(md_);
|
||||
mapped_ptrs_.resize(nhandles);
|
||||
for (int i = 0; i < nhandles; i++) {
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
auto st = dnnl_memory_map_data_v2(mem, &mapped_ptrs_[i], i);
|
||||
#else
|
||||
auto st = dnnl_memory_map_data(mem, &mapped_ptrs_[i]);
|
||||
#endif
|
||||
if (st != dnnl_success) {
|
||||
for (int j = 0; j < i; j++) {
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
for (int j = 0; j < i; j++)
|
||||
DNN_SAFE_V(dnnl_memory_unmap_data_v2(mem, mapped_ptrs_[i], i));
|
||||
#else
|
||||
DNN_SAFE_V(dnnl_memory_unmap_data(mem, mapped_ptrs_[i]));
|
||||
#endif
|
||||
}
|
||||
DNN_SAFE_V(st);
|
||||
}
|
||||
}
|
||||
@ -489,11 +464,7 @@ void dnn_mem_t::unmap() const {
|
||||
auto mem = m_padded_ ? m_padded_ : m_;
|
||||
const int nhandles = query_md_num_handles(md_);
|
||||
for (int i = 0; i < nhandles; i++) {
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
DNN_SAFE_V(dnnl_memory_unmap_data_v2(mem, mapped_ptrs_[i], i));
|
||||
#else
|
||||
DNN_SAFE_V(dnnl_memory_unmap_data(mem, mapped_ptrs_[i]));
|
||||
#endif
|
||||
mapped_ptrs_[i] = nullptr;
|
||||
}
|
||||
}
|
||||
@ -594,10 +565,8 @@ size_t dnn_mem_t::pad_memory_size(
|
||||
dnnl_memory_desc_t dnn_mem_t::pad_memory_desc(const_dnnl_memory_desc_t md,
|
||||
dnnl_engine_kind_t engine_kind, bool *was_padded) {
|
||||
if (was_padded) *was_padded = false;
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
// TODO: add padded memory descriptor support for sparse memory.
|
||||
if (query_md_format_kind(md) == dnnl_format_kind_sparse) return nullptr;
|
||||
#endif
|
||||
size_t old_sz = dnnl_memory_desc_get_size(md);
|
||||
if (old_sz == 0 || !has_bench_mode_bit(mode_bit_t::corr)
|
||||
|| engine_kind == dnnl_cpu)
|
||||
@ -642,7 +611,6 @@ benchdnn_dnnl_wrapper_t<dnnl_memory_desc_t> dnn_mem_t::init_md(int ndims,
|
||||
return md;
|
||||
}
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
benchdnn_dnnl_wrapper_t<dnnl_memory_desc_t> dnn_mem_t::init_csr_md(int ndims,
|
||||
const dnnl_dims_t dims, dnnl_data_type_t data_type, dnnl_dim_t nnz,
|
||||
dnnl_data_type_t indices_dt, dnnl_data_type_t pointers_dt) {
|
||||
@ -669,24 +637,17 @@ benchdnn_dnnl_wrapper_t<dnnl_memory_desc_t> dnn_mem_t::init_sparse_packed_md(
|
||||
&md, ndims, dims, data_type, nnz));
|
||||
return md;
|
||||
}
|
||||
#endif
|
||||
|
||||
int dnn_mem_t::initialize_memory_create_sycl(const handle_info_t &handle_info) {
|
||||
#ifdef DNNL_WITH_SYCL
|
||||
if (handle_info.is_host_ptr) {
|
||||
// Ignore memory_kind with host pointers and force USM.
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
const int nhandles = query_md_num_handles(md_);
|
||||
std::vector<void *> handles(nhandles, handle_info.ptr);
|
||||
DNN_SAFE(dnnl_sycl_interop_memory_create_v2(&m_, md_, engine_,
|
||||
dnnl_sycl_interop_usm, (int)handles.size(),
|
||||
handles.data()),
|
||||
CRIT);
|
||||
#else
|
||||
DNN_SAFE(dnnl_sycl_interop_memory_create(&m_, md_, engine_,
|
||||
dnnl_sycl_interop_usm, handle_info.ptr),
|
||||
CRIT);
|
||||
#endif
|
||||
return OK;
|
||||
}
|
||||
|
||||
@ -700,18 +661,12 @@ int dnn_mem_t::initialize_memory_create_sycl(const handle_info_t &handle_info) {
|
||||
= (memory_kind == memory_kind_ext_t::usm
|
||||
? dnnl_sycl_interop_usm
|
||||
: dnnl_sycl_interop_buffer);
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
const int nhandles = query_md_num_handles(md_);
|
||||
std::vector<void *> handles(nhandles, handle_info.ptr);
|
||||
DNN_SAFE(dnnl_sycl_interop_memory_create_v2(&m_padded_, md_padded,
|
||||
engine_, mem_kind, (int)handles.size(),
|
||||
handles.data()),
|
||||
CRIT);
|
||||
#else
|
||||
DNN_SAFE(dnnl_sycl_interop_memory_create(&m_padded_, md_padded,
|
||||
engine_, mem_kind, handle_info.ptr),
|
||||
CRIT);
|
||||
#endif
|
||||
SAFE(init_memory(&m_, md_, m_padded_), CRIT);
|
||||
break;
|
||||
}
|
||||
@ -726,11 +681,7 @@ int dnn_mem_t::initialize_memory_create_sycl(const handle_info_t &handle_info) {
|
||||
|
||||
const int nhandles = query_md_num_handles(md_);
|
||||
for (int i = 0; i < nhandles; i++) {
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
size_t sz = dnnl_memory_desc_get_size_v2(md_padded, i);
|
||||
#else
|
||||
size_t sz = dnnl_memory_desc_get_size(md_padded);
|
||||
#endif
|
||||
if (memory_kind == memory_kind_ext_t::usm_device) {
|
||||
data_.push_back(::sycl::malloc_device(sz, dev, ctx));
|
||||
} else {
|
||||
@ -742,16 +693,10 @@ int dnn_mem_t::initialize_memory_create_sycl(const handle_info_t &handle_info) {
|
||||
DNN_SAFE(dnnl_out_of_memory, CRIT);
|
||||
}
|
||||
}
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
DNN_SAFE(dnnl_sycl_interop_memory_create_v2(&m_padded_, md_padded,
|
||||
engine_, dnnl_sycl_interop_usm, (int)data_.size(),
|
||||
data_.data()),
|
||||
CRIT);
|
||||
#else
|
||||
DNN_SAFE(dnnl_sycl_interop_memory_create(&m_padded_, md_padded,
|
||||
engine_, dnnl_sycl_interop_usm, data_[0]),
|
||||
CRIT);
|
||||
#endif
|
||||
SAFE(init_memory(&m_, md_, m_padded_), CRIT);
|
||||
break;
|
||||
}
|
||||
@ -770,18 +715,12 @@ int dnn_mem_t::initialize_memory_create_opencl(
|
||||
#if DNNL_GPU_RUNTIME == DNNL_RUNTIME_OCL
|
||||
if (handle_info.is_host_ptr) {
|
||||
// Ignore memory_kind with host pointers and force USM.
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
const int nhandles = query_md_num_handles(md_);
|
||||
std::vector<void *> handles(nhandles, handle_info.ptr);
|
||||
DNN_SAFE(dnnl_ocl_interop_memory_create_v2(&m_, md_, engine_,
|
||||
dnnl_ocl_interop_usm, (int)handles.size(),
|
||||
handles.data()),
|
||||
CRIT);
|
||||
#else
|
||||
DNN_SAFE(dnnl_ocl_interop_memory_create(&m_, md_, engine_,
|
||||
dnnl_ocl_interop_usm, handle_info.ptr),
|
||||
CRIT);
|
||||
#endif
|
||||
return OK;
|
||||
}
|
||||
|
||||
@ -797,18 +736,12 @@ int dnn_mem_t::initialize_memory_create_opencl(
|
||||
= (memory_kind == memory_kind_ext_t::usm
|
||||
? dnnl_ocl_interop_usm
|
||||
: dnnl_ocl_interop_buffer);
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
const int nhandles = query_md_num_handles(md_);
|
||||
std::vector<void *> handles(nhandles, handle_info.ptr);
|
||||
DNN_SAFE(dnnl_ocl_interop_memory_create_v2(&m_padded_, md_padded,
|
||||
engine_, mem_kind, (int)handles.size(),
|
||||
handles.data()),
|
||||
CRIT);
|
||||
#else
|
||||
DNN_SAFE(dnnl_ocl_interop_memory_create(&m_padded_, md_padded,
|
||||
engine_, mem_kind, handle_info.ptr),
|
||||
CRIT);
|
||||
#endif
|
||||
SAFE(init_memory(&m_, md_, m_padded_), CRIT);
|
||||
break;
|
||||
}
|
||||
@ -818,11 +751,7 @@ int dnn_mem_t::initialize_memory_create_opencl(
|
||||
|
||||
const int nhandles = query_md_num_handles(md_);
|
||||
for (int i = 0; i < nhandles; i++) {
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
size_t sz = dnnl_memory_desc_get_size_v2(md_padded, i);
|
||||
#else
|
||||
size_t sz = dnnl_memory_desc_get_size(md_padded);
|
||||
#endif
|
||||
if (memory_kind == memory_kind_ext_t::usm_device) {
|
||||
data_.push_back(dnnl::impl::xpu::ocl::usm::malloc_device(
|
||||
engine_, sz));
|
||||
@ -837,16 +766,10 @@ int dnn_mem_t::initialize_memory_create_opencl(
|
||||
DNN_SAFE(dnnl_out_of_memory, CRIT);
|
||||
}
|
||||
}
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
DNN_SAFE(dnnl_ocl_interop_memory_create_v2(&m_padded_, md_padded,
|
||||
engine_, dnnl_ocl_interop_usm, (int)data_.size(),
|
||||
data_.data()),
|
||||
CRIT);
|
||||
#else
|
||||
DNN_SAFE(dnnl_ocl_interop_memory_create(&m_padded_, md_padded,
|
||||
engine_, dnnl_ocl_interop_usm, data_[0]),
|
||||
CRIT);
|
||||
#endif
|
||||
SAFE(init_memory(&m_, md_, m_padded_), CRIT);
|
||||
break;
|
||||
}
|
||||
@ -876,11 +799,7 @@ int dnn_mem_t::initialize_memory_create(const handle_info_t &handle_info) {
|
||||
|
||||
const int nhandles = query_md_num_handles(md_);
|
||||
for (int i = 0; i < nhandles; i++) {
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
size_t sz = dnnl_memory_desc_get_size_v2(md_, i);
|
||||
#else
|
||||
size_t sz = dnnl_memory_desc_get_size(md_);
|
||||
#endif
|
||||
data_.push_back(zmalloc(sz, alignment));
|
||||
}
|
||||
if (std::any_of(
|
||||
@ -889,13 +808,9 @@ int dnn_mem_t::initialize_memory_create(const handle_info_t &handle_info) {
|
||||
zfree(p);
|
||||
DNN_SAFE(dnnl_out_of_memory, CRIT);
|
||||
}
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
DNN_SAFE(dnnl_memory_create_v2(
|
||||
&m_, md_, engine_, (int)data_.size(), data_.data()),
|
||||
CRIT);
|
||||
#else
|
||||
DNN_SAFE(dnnl_memory_create(&m_, md_, engine_, data_[0]), CRIT);
|
||||
#endif
|
||||
|
||||
} else if (is_sycl) {
|
||||
SAFE(initialize_memory_create_sycl(handle_info), CRIT);
|
||||
@ -905,13 +820,9 @@ int dnn_mem_t::initialize_memory_create(const handle_info_t &handle_info) {
|
||||
is_data_owner_ = false;
|
||||
const int nhandles = query_md_num_handles(md_);
|
||||
std::vector<void *> handles(nhandles, handle_info.ptr);
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
DNN_SAFE(dnnl_memory_create_v2(&m_, md_, engine_, (int)handles.size(),
|
||||
handles.data()),
|
||||
CRIT);
|
||||
#else
|
||||
DNN_SAFE(dnnl_memory_create(&m_, md_, engine_, handles[0]), CRIT);
|
||||
#endif
|
||||
}
|
||||
return OK;
|
||||
}
|
||||
@ -940,11 +851,7 @@ int dnn_mem_t::initialize(
|
||||
|
||||
const int nhandles = query_md_num_handles(md_);
|
||||
for (int i = 0; i < nhandles; i++) {
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
size_t sz = dnnl_memory_desc_get_size_v2(md_, i);
|
||||
#else
|
||||
size_t sz = dnnl_memory_desc_get_size(md_);
|
||||
#endif
|
||||
if (is_canary_protected_) sz = pad_memory_size(sz, engine_kind_);
|
||||
// Do not fill a memory if its size is zero. Moreover, memset
|
||||
// expects defined pointer, nullptr is not allowed.
|
||||
|
@ -175,7 +175,6 @@ struct dnn_mem_t {
|
||||
static benchdnn_dnnl_wrapper_t<dnnl_memory_desc_t> init_md(int ndims,
|
||||
const dnnl_dims_t dims, dnnl_data_type_t data_type,
|
||||
const std::string &tag, const dims_t &strides_ = {});
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
// Initializes memory descriptor for CSR encoding.
|
||||
static benchdnn_dnnl_wrapper_t<dnnl_memory_desc_t> init_csr_md(int ndims,
|
||||
const dnnl_dims_t dims, dnnl_data_type_t data_type, dnnl_dim_t nnz,
|
||||
@ -188,7 +187,6 @@ struct dnn_mem_t {
|
||||
static benchdnn_dnnl_wrapper_t<dnnl_memory_desc_t> init_sparse_packed_md(
|
||||
int ndims, const dnnl_dims_t dims, dnnl_data_type_t data_type,
|
||||
dnnl_dim_t nnz);
|
||||
#endif
|
||||
|
||||
/* fields */
|
||||
dnnl_memory_desc_t md_ {};
|
||||
|
@ -34,9 +34,7 @@ where *matmul-knobs* are:
|
||||
tensors with option values other than `0`, a correspondent memory
|
||||
format tag must be specified.
|
||||
- `--encoding=STRING` - sparse encodings and sparsity. No encodings are set by
|
||||
default. Refer to [encodings](knobs_encoding.md) for details. This
|
||||
is an experimental feature that must be enabled via a build time
|
||||
CMake option `DNNL_EXPERIMENTAL_SPARSE`.
|
||||
default. Refer to [encodings](knobs_encoding.md) for details.
|
||||
- `--match=REGEX` -- skip problems not matching the regular expression in
|
||||
`REGEX`. By default no pattern is applied (run everything).
|
||||
Note: Windows may interpret only string arguments surrounded by
|
||||
|
@ -43,9 +43,7 @@ void check_correctness(
|
||||
for_(const auto &i_stag : s.stag)
|
||||
for_(const auto &i_wtag : s.wtag)
|
||||
for_(const auto &i_dtag : s.dtag)
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
for_(const auto &i_sparse_options : s.sparse_options)
|
||||
#endif
|
||||
for_(const auto &i_strides : s.strides)
|
||||
for_(const auto &i_rt_dims_masks : s.rt_dims_masks)
|
||||
for_(const auto &i_attr : s.attributes)
|
||||
@ -54,10 +52,7 @@ void check_correctness(
|
||||
for (const auto &i_bia_cfg : bia_cfg) {
|
||||
const prb_t prb(s.prb_vdims, i_dt, i_stag, i_wtag, i_dtag, i_strides,
|
||||
i_bia_cfg.first, i_bia_cfg.second, i_rt_dims_masks,
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
i_sparse_options,
|
||||
#endif
|
||||
i_attr, i_ctx_init, i_ctx_exe, s.impl_filter);
|
||||
i_sparse_options, i_attr, i_ctx_init, i_ctx_exe, s.impl_filter);
|
||||
if (s.pattern && !match_regex(prb.str(), s.pattern)) return;
|
||||
|
||||
task_executor.submit(prb, s.perf_template, createit, checkit, doit);
|
||||
@ -165,9 +160,7 @@ int bench(int argc, char **argv) {
|
||||
|| parse_tag(s.stag, def.stag, argv[0], "stag")
|
||||
|| parse_tag(s.wtag, def.wtag, argv[0], "wtag")
|
||||
|| parse_tag(s.dtag, def.dtag, argv[0], "dtag")
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
|| parse_encoding(s.sparse_options, argv[0], "encoding")
|
||||
#endif
|
||||
|| parse_strides(s.strides, def.strides, argv[0], "strides")
|
||||
|| parse_dt(s.bia_dt, def.bia_dt, argv[0], "bia-dt")
|
||||
// TODO: remove this later
|
||||
|
@ -51,7 +51,6 @@ benchdnn_dnnl_wrapper_t<dnnl_memory_desc_t> create_md(const prb_t *prb,
|
||||
if (dt == dnnl_data_type_undef) dt = prb->src_dt();
|
||||
const auto &src_rt_dims = get_runtime_dims(
|
||||
prb->src_dims(), prb->src_runtime_dim_mask());
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
auto src_encoding = prb->sparse_options.get_encoding(DNNL_ARG_SRC);
|
||||
auto src_sparsity = prb->sparse_options.get_sparsity(DNNL_ARG_SRC);
|
||||
if (src_encoding != dnnl_sparse_encoding_undef) {
|
||||
@ -69,7 +68,6 @@ benchdnn_dnnl_wrapper_t<dnnl_memory_desc_t> create_md(const prb_t *prb,
|
||||
default: assert(!"unsupported encoding"); return nullptr;
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
return dnn_mem_t::init_md(prb->ndims, src_rt_dims.data(), dt,
|
||||
prb->stag, prb->strides[STRIDES_SRC]);
|
||||
}
|
||||
@ -78,7 +76,6 @@ benchdnn_dnnl_wrapper_t<dnnl_memory_desc_t> create_md(const prb_t *prb,
|
||||
if (dt == dnnl_data_type_undef) dt = prb->wei_dt();
|
||||
const auto &weights_rt_dims = get_runtime_dims(
|
||||
prb->weights_dims(), prb->weights_runtime_dim_mask());
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
auto wei_encoding = prb->sparse_options.get_encoding(DNNL_ARG_WEIGHTS);
|
||||
auto wei_sparsity = prb->sparse_options.get_sparsity(DNNL_ARG_WEIGHTS);
|
||||
|
||||
@ -100,7 +97,6 @@ benchdnn_dnnl_wrapper_t<dnnl_memory_desc_t> create_md(const prb_t *prb,
|
||||
default: assert(!"unsupported encoding"); return nullptr;
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
return dnn_mem_t::init_md(prb->ndims, weights_rt_dims.data(), dt,
|
||||
prb->wtag, prb->strides[STRIDES_WEI]);
|
||||
}
|
||||
@ -180,13 +176,11 @@ int init_prim_ref(benchdnn_dnnl_wrapper_t<dnnl_primitive_t> &prim_ref,
|
||||
if (is_cpu() && (prb->src_dt() == dnnl_f32 && prb->wei_dt() == dnnl_f32))
|
||||
return OK;
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
if (prb->sparse_options.get_encoding(DNNL_ARG_SRC)
|
||||
!= dnnl_sparse_encoding_undef
|
||||
|| prb->sparse_options.get_encoding(DNNL_ARG_WEIGHTS)
|
||||
!= dnnl_sparse_encoding_undef)
|
||||
return OK;
|
||||
#endif
|
||||
|
||||
std::vector<std::vector<dnnl_data_type_t>> prim_ref_dt {
|
||||
prb->dt, {dnnl_f32}};
|
||||
@ -208,11 +202,8 @@ int init_prim_ref(benchdnn_dnnl_wrapper_t<dnnl_primitive_t> &prim_ref,
|
||||
// modifying prb in place.
|
||||
prb_t prb_cpu {*prb, prim_ref_dt_i, tag::any, tag::any, tag::any,
|
||||
{vdims_t(STRIDES_SIZE)}, prim_ref_bia_dt_i, prb->bia_mask,
|
||||
{0, 0, 0},
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
sparse_options_t(),
|
||||
#endif
|
||||
cpu_attr, prb->ctx_init, prb->ctx_exe, prb->impl_filter};
|
||||
{0, 0, 0}, sparse_options_t(), cpu_attr, prb->ctx_init,
|
||||
prb->ctx_exe, prb->impl_filter};
|
||||
|
||||
auto st = init_prim_ref_common(prim_ref, &prb_cpu, res);
|
||||
if (st == OK) return OK;
|
||||
@ -222,7 +213,6 @@ int init_prim_ref(benchdnn_dnnl_wrapper_t<dnnl_primitive_t> &prim_ref,
|
||||
return OK;
|
||||
}
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
// The main idea is to generate values and metadata directly without generating
|
||||
// the dense matrix to avoid excessive memory consumption for large problem
|
||||
// sizes.
|
||||
@ -359,7 +349,6 @@ int fill_sparse_data(data_kind_t kind, const prb_t *prb, dnn_mem_t &mem_dt,
|
||||
|
||||
return OK;
|
||||
}
|
||||
#endif
|
||||
|
||||
int fill_data(data_kind_t kind, const prb_t *prb, const cfg_t &cfg,
|
||||
dnn_mem_t &mem_dt, dnn_mem_t &mem_fp, res_t *res) {
|
||||
@ -370,7 +359,6 @@ int fill_data(data_kind_t kind, const prb_t *prb, const cfg_t &cfg,
|
||||
bool is_sparse_packed = false;
|
||||
bool is_any_sparse = false;
|
||||
std::vector<bool> nnz_mask;
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
const auto sparse_encoding = prb->sparse_options.get_encoding(kind);
|
||||
const bool is_sparse_csr_coo
|
||||
= sparse_encoding == dnnl_csr || sparse_encoding == dnnl_coo;
|
||||
@ -391,7 +379,6 @@ int fill_data(data_kind_t kind, const prb_t *prb, const cfg_t &cfg,
|
||||
std::default_random_engine rng(nnz);
|
||||
std::shuffle(nnz_mask.begin(), nnz_mask.end(), rng);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Refer to modes documentation for filling principles.
|
||||
// Note: sparse filling is more complex than a general one in a sense that
|
||||
@ -489,7 +476,6 @@ void skip_unimplemented_prb(const prb_t *prb, res_t *res) {
|
||||
prb->attr, res, dnnl_matmul, prb->src_dt(), prb->dst_dt());
|
||||
skip_unimplemented_prelu_po(prb->attr, res, dnnl_matmul);
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
if ((is_nvidia_gpu() || is_amd_gpu()) && !prb->sparse_options.is_def()) {
|
||||
BENCHDNN_PRINT(2,
|
||||
"[SKIP][%s:%d]: oneDNN doesn't support sparse matmul for "
|
||||
@ -536,7 +522,6 @@ void skip_unimplemented_prb(const prb_t *prb, res_t *res) {
|
||||
res->reason = skip_reason::case_not_supported;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (is_cpu()) {
|
||||
const bool is_x8s8f16
|
||||
@ -854,7 +839,6 @@ int init_ref_memory_args(dnn_mem_map_t &ref_mem_map, dnn_mem_map_t &mem_map,
|
||||
|
||||
auto &mem = entry.second; // `mem` is modified by filler (reorder).
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
auto src_encoding = prb->sparse_options.get_encoding(DNNL_ARG_SRC);
|
||||
auto wei_encoding = prb->sparse_options.get_encoding(DNNL_ARG_WEIGHTS);
|
||||
|
||||
@ -881,9 +865,7 @@ int init_ref_memory_args(dnn_mem_map_t &ref_mem_map, dnn_mem_map_t &mem_map,
|
||||
auto wei_fp_d = create_md(prb, WEI);
|
||||
ref_mem_map.emplace(exec_arg, dnn_mem_t(wei_fp_d, ref_engine));
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
} else {
|
||||
if (exec_arg == DNNL_ARG_WEIGHTS) {
|
||||
// Switch the format tag from "ab" to "ba" but to handle batched
|
||||
// cases, use strides instead.
|
||||
|
@ -47,10 +47,8 @@ struct settings_t : public base_settings_t {
|
||||
|
||||
std::vector<std::vector<dnnl_data_type_t>> dt {{dnnl_f32}};
|
||||
std::vector<std::string> stag {tag::any}, wtag {tag::any}, dtag {tag::any};
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
std::vector<sparse_options_t> sparse_options {{DNNL_ARG_SRC,
|
||||
sparse_options_t::def_encoding, sparse_options_t::def_sparsity}};
|
||||
#endif
|
||||
std::vector<vdims_t> strides {vdims_t(STRIDES_SIZE)};
|
||||
std::vector<dnnl_data_type_t> bia_dt {dnnl_data_type_undef};
|
||||
std::vector<int> bia_mask {2};
|
||||
@ -76,11 +74,8 @@ struct prb_t : public prb_vdims_t {
|
||||
prb_t(const settings_t &s)
|
||||
: prb_t(s.prb_vdims, s.dt[0], s.stag[0], s.wtag[0], s.dtag[0],
|
||||
s.strides[0], s.bia_dt[0], s.bia_mask[0], s.rt_dims_masks[0],
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
s.sparse_options[0],
|
||||
#endif
|
||||
s.attributes.front(), s.ctx_init[0], s.ctx_exe[0],
|
||||
s.impl_filter) {
|
||||
s.sparse_options[0], s.attributes.front(), s.ctx_init[0],
|
||||
s.ctx_exe[0], s.impl_filter) {
|
||||
SAFE_V(s.has_single_setup() ? OK : FAIL);
|
||||
}
|
||||
|
||||
@ -89,11 +84,9 @@ struct prb_t : public prb_vdims_t {
|
||||
const std::string &dtag, const vdims_t &strides,
|
||||
dnnl_data_type_t bia_dt, int bia_mask,
|
||||
const std::vector<dims_mask_t> &rt_dims_masks,
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
const sparse_options_t &sparse_options,
|
||||
#endif
|
||||
const attr_t &attr, const thr_ctx_t &ctx_init,
|
||||
const thr_ctx_t &ctx_exe, const impl_filter_t &impl_filter)
|
||||
const sparse_options_t &sparse_options, const attr_t &attr,
|
||||
const thr_ctx_t &ctx_init, const thr_ctx_t &ctx_exe,
|
||||
const impl_filter_t &impl_filter)
|
||||
: prb_vdims_t(prb_vdims)
|
||||
, dt(dt)
|
||||
, stag(stag)
|
||||
@ -103,9 +96,7 @@ struct prb_t : public prb_vdims_t {
|
||||
, bia_dt(bia_dt)
|
||||
, bia_mask(bia_mask)
|
||||
, rt_dims_masks(rt_dims_masks)
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
, sparse_options(sparse_options)
|
||||
#endif
|
||||
, attr(attr)
|
||||
, ctx_init(ctx_init)
|
||||
, ctx_exe(ctx_exe)
|
||||
@ -144,9 +135,7 @@ struct prb_t : public prb_vdims_t {
|
||||
dnnl_data_type_t bia_dt;
|
||||
int bia_mask;
|
||||
std::vector<dims_mask_t> rt_dims_masks;
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
sparse_options_t sparse_options;
|
||||
#endif
|
||||
|
||||
bool inplace = false; // Lacks placement, always considered `false`.
|
||||
attr_t attr;
|
||||
|
@ -76,9 +76,7 @@ std::string prb_t::set_repro_line() {
|
||||
|
||||
if (canonical || !has_default_dts) s << "--dt=" << dt << " ";
|
||||
if (canonical || stag != def.stag[0]) s << "--stag=" << stag << " ";
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
s << sparse_options;
|
||||
#endif
|
||||
if (canonical || wtag != def.wtag[0]) s << "--wtag=" << wtag << " ";
|
||||
if (canonical || dtag != def.dtag[0]) s << "--dtag=" << dtag << " ";
|
||||
if (canonical || strides != def.strides[0])
|
||||
|
@ -203,8 +203,6 @@ void compute_ref_matmul(const prb_t *prb, const args_t &args) {
|
||||
});
|
||||
}
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
|
||||
void cvt_coo_indices_to_csr_pointers(const int32_t *indices, int32_t *pointers,
|
||||
const int nnz, const int nrows) {
|
||||
for (int i = 0; i < nnz; ++i) {
|
||||
@ -310,7 +308,6 @@ void compute_ref_sparse_matmul(const prb_t *prb, const args_t &args) {
|
||||
});
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void compute_ref(
|
||||
const prb_t *prb, const args_t &args, dnnl_primitive_t prim_ref) {
|
||||
@ -319,7 +316,6 @@ void compute_ref(
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
const auto src_encoding = prb->sparse_options.get_encoding(DNNL_ARG_SRC);
|
||||
const auto wei_encoding
|
||||
= prb->sparse_options.get_encoding(DNNL_ARG_WEIGHTS);
|
||||
@ -330,9 +326,6 @@ void compute_ref(
|
||||
} else {
|
||||
compute_ref_matmul(prb, args);
|
||||
}
|
||||
#else
|
||||
compute_ref_matmul(prb, args);
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace matmul
|
||||
|
@ -167,7 +167,6 @@ cold_cache_t::cold_cache_t(
|
||||
for (size_t i = 0; i < n_buffers_; i++) {
|
||||
cc_entry[i] = dnn_mem_t(orig_cc_mem_md, get_test_engine());
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
// Sparse memories require this call to replicate the exact original
|
||||
// data distribution because the data structure affects performance
|
||||
// in a direct way.
|
||||
@ -181,9 +180,7 @@ cold_cache_t::cold_cache_t(
|
||||
i, __FILE__, __LINE__);
|
||||
return;
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
} else {
|
||||
// Reorders are expensive. If there are multiple buffers to
|
||||
// fill, simply rely on default memory initialization.
|
||||
if (n_mem_pool_buffers > 100) continue;
|
||||
|
@ -135,7 +135,6 @@ dnnl_engine_kind_t query_engine_kind(const dnnl_engine_t &engine) {
|
||||
return engine_kind;
|
||||
}
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
dnnl_sparse_encoding_t query_md_sparse_encoding(const_dnnl_memory_desc_t md) {
|
||||
dnnl_sparse_encoding_t encoding = dnnl_sparse_encoding_undef;
|
||||
if (!md) return encoding;
|
||||
@ -149,17 +148,12 @@ dnnl_dim_t query_md_nnz(const_dnnl_memory_desc_t md) {
|
||||
dnnl_memory_desc_query_v2(md, dnnl_query_nnz_s64, 0, &nnz);
|
||||
return nnz;
|
||||
}
|
||||
#endif
|
||||
|
||||
int query_md_num_handles(const_dnnl_memory_desc_t md) {
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
int nhandles = 0;
|
||||
if (!md) return nhandles;
|
||||
dnnl_memory_desc_query_v2(md, dnnl_query_num_handles_s32, 0, &nhandles);
|
||||
return nhandles;
|
||||
#else
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
int query_md_ndims(const_dnnl_memory_desc_t md) {
|
||||
@ -188,11 +182,7 @@ dnnl_data_type_t query_md_data_type(
|
||||
const_dnnl_memory_desc_t md, int buffer_index) {
|
||||
dnnl_data_type_t dt = dnnl_data_type_undef;
|
||||
if (!md) return dt;
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
dnnl_memory_desc_query_v2(md, dnnl_query_data_type, buffer_index, &dt);
|
||||
#else
|
||||
dnnl_memory_desc_query(md, dnnl_query_data_type, &dt);
|
||||
#endif
|
||||
return dt;
|
||||
}
|
||||
|
||||
|
@ -56,10 +56,8 @@ const_dnnl_primitive_desc_t query_pd(dnnl_primitive_t prim);
|
||||
|
||||
dnnl_engine_kind_t query_engine_kind(const dnnl_engine_t &engine);
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
dnnl_sparse_encoding_t query_md_sparse_encoding(const_dnnl_memory_desc_t md);
|
||||
dnnl_dim_t query_md_nnz(const_dnnl_memory_desc_t md);
|
||||
#endif
|
||||
int query_md_num_handles(const_dnnl_memory_desc_t md);
|
||||
int query_md_ndims(const_dnnl_memory_desc_t md);
|
||||
int query_md_inner_nblks(const_dnnl_memory_desc_t md);
|
||||
|
@ -195,14 +195,12 @@ int fill_random_real_dense(dnn_mem_t &mem, dnn_mem_t &mem_ref, res_t *res,
|
||||
// This function doesn't handle the predefined set yet.
|
||||
assert(fill_cfg.predefined_set_.empty());
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
// The `nelems()` function returns a product of dims/pdims regardless of
|
||||
// whether the tensor is dense or sparse (this is by design). Because of
|
||||
// that we need to adjust the `nelems` value for the sparse tensor as the
|
||||
// number of elements to fill is equal to `nnz`.
|
||||
if (mem_ref.format_kind() == dnnl_format_kind_sparse)
|
||||
nelems = query_md_nnz(mem_ref.md_);
|
||||
#endif
|
||||
|
||||
// Note: fill_cfg_t drives value distribution, but the final rounding is
|
||||
// in compliance with the memory object the values are inserted. Depending
|
||||
@ -290,7 +288,6 @@ int fill_random_real_dense(dnn_mem_t &mem, dnn_mem_t &mem_ref, res_t *res,
|
||||
return OK;
|
||||
}
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
// Since a sparsity pattern affects performance, it's crucial to keep the
|
||||
// pattern intact and only randomize tensor values. Thus, the function relies on
|
||||
// an assumption that every sparse format contains three handles, where the
|
||||
@ -313,17 +310,14 @@ int fill_random_real_sparse(const_dnnl_memory_t dnnl_memory, dnn_mem_t &mem,
|
||||
|
||||
return fill_random_real_dense(mem, mem_ref, res, fill_cfg);
|
||||
}
|
||||
#endif
|
||||
|
||||
int fill_random_real(dnn_mem_t &mem, dnn_mem_t &mem_ref, res_t *res,
|
||||
const fill_cfg_t &fill_cfg, const_dnnl_memory_t dnnl_memory) {
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
if (mem_ref.format_kind() == dnnl_format_kind_sparse) {
|
||||
assert(dnnl_memory != nullptr);
|
||||
return fill_random_real_sparse(
|
||||
dnnl_memory, mem, mem_ref, res, fill_cfg);
|
||||
}
|
||||
#endif
|
||||
return fill_random_real_dense(mem, mem_ref, res, fill_cfg);
|
||||
}
|
||||
|
||||
|
@ -543,7 +543,6 @@ bool parse_tag(std::vector<std::string> &tag,
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
bool parse_encoding(std::vector<sparse_options_t> &sparse_options,
|
||||
const char *str, const std::string &option_name /* = "encoding"*/) {
|
||||
static const std::string help
|
||||
@ -562,7 +561,6 @@ bool parse_encoding(std::vector<sparse_options_t> &sparse_options,
|
||||
return parse_vector_option(sparse_options, def, parse_sparse_options_func,
|
||||
str, option_name, help);
|
||||
}
|
||||
#endif
|
||||
|
||||
bool parse_multi_tag(std::vector<std::vector<std::string>> &tag,
|
||||
const std::vector<std::vector<std::string>> &def_tag, const char *str,
|
||||
|
@ -218,12 +218,9 @@ bool parse_tag(std::vector<std::string> &tag,
|
||||
const std::vector<std::string> &def_tag, const char *str,
|
||||
const std::string &option_name = "tag");
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
bool parse_encoding(std::vector<sparse_options_t> &sparse_options,
|
||||
const char *str, const std::string &option_name = "encoding");
|
||||
|
||||
#endif
|
||||
|
||||
bool parse_multi_tag(std::vector<std::vector<std::string>> &tag,
|
||||
const std::vector<std::vector<std::string>> &def_tag, const char *str,
|
||||
const std::string &option_name = "stag");
|
||||
|
@ -61,6 +61,7 @@ file(GLOB PRIM_TEST_CASES_SRC
|
||||
test_iface_attr_quantization.cpp
|
||||
test_iface_weights_format.cpp
|
||||
test_iface_wino_convolution.cpp
|
||||
test_iface_sparse.cpp
|
||||
test_memory.cpp
|
||||
test_sum.cpp
|
||||
test_reorder.cpp
|
||||
@ -95,10 +96,6 @@ file(GLOB PRIM_TEST_CASES_SRC
|
||||
test_group_normalization.cpp
|
||||
)
|
||||
|
||||
if(DNNL_EXPERIMENTAL_SPARSE)
|
||||
list(APPEND PRIM_TEST_CASES_SRC test_iface_sparse.cpp)
|
||||
endif()
|
||||
|
||||
if(DNNL_CPU_RUNTIME STREQUAL "NONE")
|
||||
list(APPEND PRIM_TEST_CASES_SRC test_iface_gpu_only.cpp)
|
||||
set_source_files_properties(test_iface_gpu_only.cpp PROPERTIES NO_ENGINE_PARAM true)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2019-2024 Intel Corporation
|
||||
* Copyright 2019-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -213,7 +213,6 @@ HANDLE_EXCEPTIONS_FOR_TEST(ocl_memory_buffer_test_cpp_t, BufferMapUnmap) {
|
||||
TEST_OCL_CHECK(clReleaseMemObject(ocl_mem));
|
||||
}
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
HANDLE_EXCEPTIONS_FOR_TEST(
|
||||
ocl_memory_buffer_test_cpp_t, TestSparseMemoryCreation) {
|
||||
engine eng(engine::kind::gpu, 0);
|
||||
@ -331,6 +330,5 @@ HANDLE_EXCEPTIONS_FOR_TEST(
|
||||
ASSERT_NO_THROW(coo_mem.unmap_data(mapped_row_indices, 1));
|
||||
ASSERT_NO_THROW(coo_mem.unmap_data(mapped_col_indices, 2));
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace dnnl
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2021-2024 Intel Corporation
|
||||
* Copyright 2021-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -207,7 +207,6 @@ HANDLE_EXCEPTIONS_FOR_TEST(ocl_memory_usm_test_t, SharedMapUnmap) {
|
||||
dnnl::impl::xpu::ocl::usm::free);
|
||||
}
|
||||
|
||||
#ifdef DNNL_EXPERIMENTAL_SPARSE
|
||||
HANDLE_EXCEPTIONS_FOR_TEST(ocl_memory_usm_test_t, TestSparseMemoryCreation) {
|
||||
engine eng(engine::kind::gpu, 0);
|
||||
const int nnz = 12;
|
||||
@ -307,6 +306,5 @@ HANDLE_EXCEPTIONS_FOR_TEST(ocl_memory_usm_test_t, TestSparseMemoryMapUnmap) {
|
||||
ASSERT_NO_THROW(coo_mem.unmap_data(mapped_row_indices, 1));
|
||||
ASSERT_NO_THROW(coo_mem.unmap_data(mapped_col_indices, 2));
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace dnnl
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user