common: promote sparse functionality

2025-10-20 18:43:49 +08:00 · 2025-02-20 16:49:18 -08:00
parent 2200ab05a6
commit b85ca83faa
102 changed files with 426 additions and 917 deletions
--- a/.github/automation/x64/build_linters.sh
+++ b/.github/automation/x64/build_linters.sh
@ -13,7 +13,6 @@ if [[ "$ONEDNN_ACTION" == "configure" ]]; then
          -DCMAKE_BUILD_TYPE=debug \
          -DONEDNN_BUILD_GRAPH=ON \
          -DDNNL_EXPERIMENTAL=ON \
-          -DDNNL_EXPERIMENTAL_SPARSE=ON \
          -DDNNL_EXPERIMENTAL_PROFILING=ON \
          -DDNNL_EXPERIMENTAL_UKERNEL=ON \
          -DONEDNN_EXPERIMENTAL_LOGGING=ON \
@ -24,7 +23,7 @@ if [[ "$ONEDNN_ACTION" == "configure" ]]; then
      set +x
    elif [[ "$GITHUB_JOB" == "pr-format-tags" ]]; then
      set -x
-      cmake -B../build -S. -DONEDNN_BUILD_GRAPH=OFF -DDNNL_EXPERIMENTAL_SPARSE=ON
+      cmake -B../build -S. -DONEDNN_BUILD_GRAPH=OFF
      set +x
    else
      echo "Unknown linter job: $GITHUB_JOB"
--- a/cmake/dnnl_compat.cmake
+++ b/cmake/dnnl_compat.cmake
@ -1,5 +1,5 @@
 #===============================================================================
-# Copyright 2021-2024 Intel Corporation
+# Copyright 2021-2025 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -34,7 +34,6 @@ endmacro()

 set(COMPAT_CACHE_BOOL_VARS
    "EXPERIMENTAL"
-    "EXPERIMENTAL_SPARSE"
    "EXPERIMENTAL_UKERNEL"
    "EXPERIMENTAL_LOGGING"
    "VERBOSE"
--- a/cmake/options.cmake
+++ b/cmake/options.cmake
@ -203,11 +203,6 @@ option(DNNL_EXPERIMENTAL
    using environment variables."
    OFF) # disabled by default

-option(DNNL_EXPERIMENTAL_SPARSE
-    "Enable experimental functionality for sparse domain. This option works
-    independently from DNNL_EXPERIMENTAL."
-    OFF) # disabled by default
-
 option(DNNL_EXPERIMENTAL_UKERNEL
    "Enable experimental functionality for ukernels. This option works
    independently from DNNL_EXPERIMENTAL."
--- a/doc/Doxyfile.in
+++ b/doc/Doxyfile.in
@ -1,5 +1,5 @@
 #===============================================================================
-# Copyright 2016-2022 Intel Corporation
+# Copyright 2016-2025 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -1962,7 +1962,7 @@ INCLUDE_FILE_PATTERNS  =
 # recursively expanded use the := operator instead of the = operator.
 # This tag requires that the tag ENABLE_PREPROCESSING is set to YES.

-PREDEFINED             = DOXYGEN_SHOULD_SKIP_THIS DNNL_GPU_RUNTIME=DNNL_RUNTIME_OCL DNNL_WITH_SYCL DNNL_USE_SYCL_BUFFERS DNNL_EXPERIMENTAL_SPARSE DNNL_EXPERIMENTAL_UKERNEL DNNL_EXPERIMENTAL_LOGGING
+PREDEFINED             = DOXYGEN_SHOULD_SKIP_THIS DNNL_GPU_RUNTIME=DNNL_RUNTIME_OCL DNNL_WITH_SYCL DNNL_USE_SYCL_BUFFERS DNNL_EXPERIMENTAL_UKERNEL DNNL_EXPERIMENTAL_LOGGING

 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
 # tag can be used to specify a list of macro names that should be expanded. The
--- a/doc/advanced/experimental.md
+++ b/doc/advanced/experimental.md
@ -27,249 +27,12 @@ Both kinds of experimental features can be enabled simultaneously.

 | Build time option                          | Description                                                        |
 |:-------------------------------------------|:-------------------------------------------------------------------|
-| ONEDNN_EXPERIMENTAL_SPARSE                 | Enable experimental API and functionality for sparse domain.       |
 | ONEDNN_EXPERIMENTAL_UKERNEL                | Enable experimental microkernel APIs and functionalities.          |
 | ONEDNN_EXPERIMENTAL_PROFILING              | Enable experimental profiling API.                                 |
 | ONEDNN_EXPERIMENTAL_LOGGING                | Enable experimental logging support for oneDNN verbose mode.       |

 ## Features details

-### ONEDNN_EXPERIMENTAL_SPARSE
-This option extends the existing API and adds a new one to support sparse
-functionality in oneDNN.
-
-#### API
-
-The main change is in oneDNN memory object semantics. Now, the memory object can
-have multiple underlying buffers. In the case of regular dense computations, the
-memory object always contains a single buffer. But in the case of sparse
-computations, the memory object always contains one buffer for values and an
-arbitrary number of additional buffers for metadata.
-
-The underlying buffers are enumerated starting with 0, meaning that each buffer
-has its own number. The buffer with values always has index 0.
-
-In most cases, the API that works with underlying buffers takes a buffer index. The
-exception is the API for creating a memory object. In that case, the API takes a vector
-of buffers. The order of the buffers in the vector matters and should correspond to
-the buffers' indices.
-
-oneDNN also introduces a new format kind dnnl::memory::format_kind::sparse.
-Sparse encoding (a.k.a. sparse format) is an enumeration type that specifies
-how data is encoded. Currently, oneDNN supports Compressed Sparse Row (CSR),
-Sorted Co-ordinate (COO) Sparse Format, and PACKED sparse encodings
-(dnnl::memory::sparse_encoding::csr, dnnl::memory::sparse_encoding::coo,
-dnnl::memory::sparse_encoding::packed) for CPU engine, and, only sorted
-COO (Co-ordinate Sparse Format) for GPU engine.
-
-The memory descriptor has dedicated static member functions for creating memory
-descriptors for different sparse encodings.
-
-Each encoding defines the number and meaning of the buffers.
-
-| Sparse encoding | Buffers                                                                    |
-|:----------------|:---------------------------------------------------------------------------|
-| CSR             | 0 - values, 1 - indices, 2 - pointers                                      |
-| Sorted COO      | 0 - values, 1 to *ndims* - indices (*ndims* - number of tensor dimensions) |
-| PACKED          | The meaning and content are unspecified                                    |
-
-The pseudocode below demonstrates how to create a memory object
-for the CSR and COO sparse encodings and use the new API to work with the
-underlying handles.
-
-###### CSR Encoding:
-~~~cpp
-    using namespace dnnl;
-    const memory::dim M = 4, N = 6;
-    const memory::dim nnz = 5;
-    const auto values_dt = memory::data_type::f32;
-    const auto indices_dt = memory::data_type::s32;
-    const auto pointers_dt = memory::data_type::s32;
-
-    // Create a memory descriptor for CSR sparse encoding.
-    const auto csr_md = memory::desc::csr(
-            {M, N}, // Dimensions
-            values_dt, // Data type of values
-            nnz, // Number of non-zero entries
-            indices_dt, // Data type of indices (metadata)
-            pointers_dt); // Data type of pointers (metadata)
-
-    // A sparse matrix represented in the CSR format.
-    std::vector<float> csr_values = {2.5f, 1.5f, 1.5f, 2.5f, 2.0f};
-    std::vector<int32_t> csr_indices = {0, 2, 0, 5, 1};
-    std::vector<int32_t> csr_pointers = {0, 1, 2, 4, 5, 5};
-
-    // Create a memory object for the given buffers with values and metadata.
-    memory csr_mem(csr_md, engine, {
-        csr_values.data(), // Buffer with values
-        csr_indices.data(), // Buffer with indices (metadata)
-        csr_pointers.data() // Buffer with pointers (metadata)
-        });
-
-    const auto values_sz = csr_mem.get_size(0);
-    const auto indices_sz = csr_mem.get_size(1);
-    const auto pointers_sz = csr_mem.get_size(2);
-
-    assert(values_sz == csr_values.size() * sizeof(float));
-    assert(indices_sz == csr_indices.size() * sizeof(int32_t));
-    assert(pointers_sz == csr_pointers.size() * sizeof(int32_t));
-
-    void *values_handle = csr_mem.get_data_handle(0);
-    void *indices_handle = csr_mem.get_data_handle(1);
-    void *pointers_handle = csr_mem.get_data_handle(2);
-
-    assert(values_handle == (void *)csr_values.data());
-    assert(indices_handle == (void *)csr_indices.data());
-    assert(pointers_handle == (void *)csr_pointers.data());
-~~~
-
-###### Sorted COO Encoding:
-~~~cpp
-    using namespace dnnl;
-    const memory::dim M = 4, N = 6;
-    const memory::dim nnz = 5;
-    const auto values_dt = memory::data_type::f32;
-    const auto indices_dt = memory::data_type::s32;
-
-    // Create a memory descriptor for COO sparse encoding.
-    const auto coo_md = memory::desc::coo(
-            {M, N}, // Dimensions
-            values_dt, // Data type of values
-            nnz, // Number of non-zero entries
-            indices_dt); // Data type of indices (metadata)
-
-    // A sparse matrix represented in the COO format.
-    std::vector<float> coo_values = {2.5f, 1.5f, 1.5f, 2.5f, 2.0f};
-    std::vector<int32_t> coo_row_indices = {0, 1, 2, 2, 3};
-    std::vector<int32_t> coo_col_indices = {0, 2, 0, 5, 1};
- 
-    // Create a memory object for the given buffers with values and metadata.
-    memory coo_mem(coo_md, engine, {
-        coo_values.data(), // Buffer with values
-        coo_row_indices.data(), // Buffer with row indices (metadata)
-        coo_col_indices.data() // Buffer with column indices (metadata)
-        });
-
-    const auto values_sz = coo_mem.get_size(0);
-    const auto indices_sz = coo_mem.get_size(1);
-
-    assert(values_sz == coo_values.size() * sizeof(float));
-    assert(indices_sz == coo_row_indices.size() * sizeof(int32_t));
-    assert(indices_sz == coo_col_indices.size() * sizeof(int32_t));
-
-    void *values_handle = coo_mem.get_data_handle(0);
-    void *row_indices_handle = coo_mem.get_data_handle(1);
-    void *col_indices_handle = coo_mem.get_data_handle(2);
-
-    assert(values_handle == (void *)coo_values.data());
-    assert(row_indices_handle == (void *)coo_row_indices.data());
-    assert(col_indices_handle == (void *)coo_col_indices.data());
-~~~
-
-A memory descriptor created for the sparse encoding PACKED cannot
-be used to create a memory object. It can only be used to create
-a primitive descriptor to query the actual memory descriptor
-(similar to the format tag `any`).
-
-#### Primitives
-
-##### Matrix Multiplication
-
-This option enables the matmul primitive that can work with
-sparse input tensors.
-
-###### CSR encoding
-Supported only for the CPU engine. Only one of the input tensors can be sparse.
-The output tensor is always dense.
-
-The following data type combinations are supported:
-
-| Values (src, weight, dst)   | Indices  |
-|:----------------------------|:---------|
-| f16, f16, f16               | s32      |
-| f32, f32, f32               | s32      |
-
-The following format tags are supported for dense input/output
-tensors:
-
-* ab
-
-See the example [here](@ref cpu_matmul_csr_cpp).
-
-Benchdnn can be used to test matmul with a CSR input tensor as follows:
-`./benchdnn --matmul --encoding=csr+0.99:: --wtag=ab --dtag=ab 4x1000000:1000000x128`
-
-For the case above, the number of non-zero elements for the source tensor is
-calculated as max(4 * 1000000 * (1 - 0.99), 1).
-
-###### COO encoding
-Supported only for the CPU and GPU engines. Only one of the input tensors can
-be sparse. The output tensor is always dense.
-
-The following data type combinations are supported:
-
-| Values (src, weight, dst)   | Indices  |
-|:----------------------------|:---------|
-| f16, f16, f16               | s32      |
-| f32, f32, f32               | s32      |
-
-The following format tags are supported for dense weights tensor:
-
-* ab
-* ba
-
-The following format tags are supported for dense destination tensor:
-
-* ab
-
-See the example [here](@ref cpu_matmul_coo_cpp).
-
-Benchdnn can be used to test matmul with a COO input tensor as follows:
-`./benchdnn --matmul --encoding=coo+0.99:: --wtag=ab --dtag=ab 4x1000000:1000000x128`
-
-For the case above, the number of non-zero elements for the source tensor is
-calculated as max(4 * 1000000 * (1 - 0.99), 1).
-
-###### PACKED encoding
-
-Only the weights tensor is allowed to be sparse. The other tensors
-are always dense.
-
-In general, it is expected that all matmul related functionality (e.g. post-ops,
-scales, zero-points, etc) that is supported for the dense weights should
-also work for the sparse weights.
-
-Currently, matmul has the following limitations for the PACKED encoding:
-* Supported only for the CPU engine
-* Only Intel Advanced Matrix Extensions (Intel AMX) instruction set
-architecture (ISA) is supported
-* Only `s8` data type for the weights is supported
-* Only 1 batch dimension is supported
-
-See the example [here](@ref cpu_matmul_weights_compression_cpp).
-
-Benchdnn can be used to test matmul with the PACKED weights tensor as follows:
-`./benchdnn --matmul --dt=s8:s8:s32 --encoding=:packed+0.99: 3x512x1024:1x1024x512`
-
-For the case above, the number of non-zero elements for the weights tensor is
-calculated as max(1024 * 512 * (1 - 0.99), 1).
-
-##### Reorder
-
-Currently, there is only one reorder for packing a dense tensor, i.e. converting
-a dense tensor that is in `ab` format to a sparse tensor that is encoded with
-the `PACKED` encoding.
-
-In general, it is expected that all reorder-related functionality
-(e.g. scales, zero-points, etc) that is supported for the dense
-destination tensor should also work for the sparse one.
-
-#### Common Limitations
-* The interoperability API to get/set data handles is not supported. Use the
-runtime agnostic API to do that.
-* Sparse memory and memory descriptor can only be used with the Matrix
-Multiplication and Reorder primitives.
-
 ### ONEDNN_EXPERIMENTAL_UKERNEL

 This option enables a new set of CPU-only APIs to support block-level
--- a/doc/advanced/sparsity.md
+++ b/doc/advanced/sparsity.md
@ -0,0 +1,121 @@
+Sparse memory formats {#dev_guide_sparsity}
+===============================================
+
+# API
+
+oneDNN support format kind dnnl::memory::format_kind::sparse to describe sparse tensors.
+Sparse encoding (a.k.a. sparse format) is an enumeration type that specifies
+how data is encoded. Currently, oneDNN supports Compressed Sparse Row (CSR),
+Sorted Co-ordinate (COO) Sparse Format, and PACKED sparse encodings
+(dnnl::memory::sparse_encoding::csr, dnnl::memory::sparse_encoding::coo,
+dnnl::memory::sparse_encoding::packed) for CPU engine, and, only sorted
+COO (Co-ordinate Sparse Format) for GPU engine.
+
+The memory descriptor has dedicated static member functions for creating memory
+descriptors for different sparse encodings.
+
+Each encoding defines the number and meaning of the buffers.
+
+| Sparse encoding | Buffers                                                                    |
+|:----------------|:---------------------------------------------------------------------------|
+| CSR             | 0 - values, 1 - indices, 2 - pointers                                      |
+| Sorted COO      | 0 - values, 1 to *ndims* - indices (*ndims* - number of tensor dimensions) |
+| PACKED          | The meaning and content are unspecified                                    |
+
+The pseudocode below demonstrates how to create a memory object
+for the CSR and COO sparse encodings and use the new API to work with the
+underlying handles.
+
+# CSR Encoding:
+~~~cpp
+    using namespace dnnl;
+    const memory::dim M = 4, N = 6;
+    const memory::dim nnz = 5;
+    const auto values_dt = memory::data_type::f32;
+    const auto indices_dt = memory::data_type::s32;
+    const auto pointers_dt = memory::data_type::s32;
+
+    // Create a memory descriptor for CSR sparse encoding.
+    const auto csr_md = memory::desc::csr(
+            {M, N}, // Dimensions
+            values_dt, // Data type of values
+            nnz, // Number of non-zero entries
+            indices_dt, // Data type of indices (metadata)
+            pointers_dt); // Data type of pointers (metadata)
+
+    // A sparse matrix represented in the CSR format.
+    std::vector<float> csr_values = {2.5f, 1.5f, 1.5f, 2.5f, 2.0f};
+    std::vector<int32_t> csr_indices = {0, 2, 0, 5, 1};
+    std::vector<int32_t> csr_pointers = {0, 1, 2, 4, 5, 5};
+
+    // Create a memory object for the given buffers with values and metadata.
+    memory csr_mem(csr_md, engine, {
+        csr_values.data(), // Buffer with values
+        csr_indices.data(), // Buffer with indices (metadata)
+        csr_pointers.data() // Buffer with pointers (metadata)
+        });
+
+    const auto values_sz = csr_mem.get_size(0);
+    const auto indices_sz = csr_mem.get_size(1);
+    const auto pointers_sz = csr_mem.get_size(2);
+
+    assert(values_sz == csr_values.size() * sizeof(float));
+    assert(indices_sz == csr_indices.size() * sizeof(int32_t));
+    assert(pointers_sz == csr_pointers.size() * sizeof(int32_t));
+
+    void *values_handle = csr_mem.get_data_handle(0);
+    void *indices_handle = csr_mem.get_data_handle(1);
+    void *pointers_handle = csr_mem.get_data_handle(2);
+
+    assert(values_handle == (void *)csr_values.data());
+    assert(indices_handle == (void *)csr_indices.data());
+    assert(pointers_handle == (void *)csr_pointers.data());
+~~~
+
+# Sorted COO Encoding:
+~~~cpp
+    using namespace dnnl;
+    const memory::dim M = 4, N = 6;
+    const memory::dim nnz = 5;
+    const auto values_dt = memory::data_type::f32;
+    const auto indices_dt = memory::data_type::s32;
+
+    // Create a memory descriptor for COO sparse encoding.
+    const auto coo_md = memory::desc::coo(
+            {M, N}, // Dimensions
+            values_dt, // Data type of values
+            nnz, // Number of non-zero entries
+            indices_dt); // Data type of indices (metadata)
+
+    // A sparse matrix represented in the COO format.
+    std::vector<float> coo_values = {2.5f, 1.5f, 1.5f, 2.5f, 2.0f};
+    std::vector<int32_t> coo_row_indices = {0, 1, 2, 2, 3};
+    std::vector<int32_t> coo_col_indices = {0, 2, 0, 5, 1};
+
+    // Create a memory object for the given buffers with values and metadata.
+    memory coo_mem(coo_md, engine, {
+        coo_values.data(), // Buffer with values
+        coo_row_indices.data(), // Buffer with row indices (metadata)
+        coo_col_indices.data() // Buffer with column indices (metadata)
+        });
+
+    const auto values_sz = coo_mem.get_size(0);
+    const auto indices_sz = coo_mem.get_size(1);
+
+    assert(values_sz == coo_values.size() * sizeof(float));
+    assert(indices_sz == coo_row_indices.size() * sizeof(int32_t));
+    assert(indices_sz == coo_col_indices.size() * sizeof(int32_t));
+
+    void *values_handle = coo_mem.get_data_handle(0);
+    void *row_indices_handle = coo_mem.get_data_handle(1);
+    void *col_indices_handle = coo_mem.get_data_handle(2);
+
+    assert(values_handle == (void *)coo_values.data());
+    assert(row_indices_handle == (void *)coo_row_indices.data());
+    assert(col_indices_handle == (void *)coo_col_indices.data());
+~~~
+
+A memory descriptor created for the sparse encoding PACKED cannot
+be used to create a memory object. It can only be used to create
+a primitive descriptor to query the actual memory descriptor
+(similar to the format tag `any`).
--- a/doc/primitives/matmul.md
+++ b/doc/primitives/matmul.md
@ -171,6 +171,87 @@ memory buffer that shares its shape with the destination buffer).

@note Please check tutorials below to see run-time attributes in use.

+### Sparsity
+
+#### CSR encoding
+Supported only for the CPU engine. Only one of the input tensors can be sparse.
+The output tensor is always dense.
+
+The following data type combinations are supported:
+
+| Values (src, weight, dst)   | Indices  |
+|:----------------------------|:---------|
+| f16, f16, f16               | s32      |
+| f32, f32, f32               | s32      |
+
+The following format tags are supported for dense input/output
+tensors:
+
+* ab
+
+See the example [here](@ref cpu_matmul_csr_cpp).
+
+Benchdnn can be used to test matmul with a CSR input tensor as follows:
+`./benchdnn --matmul --encoding=csr+0.99:: --wtag=ab --dtag=ab 4x1000000:1000000x128`
+
+For the case above, the number of non-zero elements for the source tensor is
+calculated as max(4 * 1000000 * (1 - 0.99), 1).
+
+#### COO encoding
+Supported only for the CPU and GPU engines. Only one of the input tensors can
+be sparse. The output tensor is always dense.
+
+The following data type combinations are supported:
+
+| Values (src, weight, dst)   | Indices  |
+|:----------------------------|:---------|
+| f16, f16, f16               | s32      |
+| f32, f32, f32               | s32      |
+
+The following format tags are supported for dense weights tensor:
+
+* ab
+* ba
+
+The following format tags are supported for dense destination tensor:
+
+* ab
+
+See the example [here](@ref cpu_matmul_coo_cpp).
+
+Benchdnn can be used to test matmul with a COO input tensor as follows:
+`./benchdnn --matmul --encoding=coo+0.99:: --wtag=ab --dtag=ab 4x1000000:1000000x128`
+
+For the case above, the number of non-zero elements for the source tensor is
+calculated as max(4 * 1000000 * (1 - 0.99), 1).
+
+#### PACKED encoding
+
+Only the weights tensor is allowed to be sparse. The other tensors
+are always dense.
+
+In general, it is expected that all matmul related functionality (e.g. post-ops,
+scales, zero-points, etc) that is supported for the dense weights should
+also work for the sparse weights.
+
+Currently, matmul has the following limitations for the PACKED encoding:
+* Supported only for the CPU engine
+* Only Intel Advanced Matrix Extensions (Intel AMX) instruction set
+architecture (ISA) is supported
+* Only `s8` data type for the weights is supported
+* Only 1 batch dimension is supported
+
+See the example [here](@ref cpu_matmul_weights_compression_cpp).
+
+Benchdnn can be used to test matmul with the PACKED weights tensor as follows:
+`./benchdnn --matmul --dt=s8:s8:s32 --encoding=:packed+0.99: 3x512x1024:1x1024x512`
+
+For the case above, the number of non-zero elements for the weights tensor is
+calculated as max(1024 * 512 * (1 - 0.99), 1).
+
+Refer to [Sparsity Advanced Topic](@ref dev_guide_sparsity) page for more
+information on sparse encding.
+
 ## Implementation Limitations

 1. Check @ref dev_guide_data_types.
--- a/doc/primitives/reorder.md
+++ b/doc/primitives/reorder.md
@ -128,6 +128,25 @@ would lead to the following operation:
      multiplication of tensor values by a scale value. Using \f$scale_{dst}\f$
      argument will lead to division of tensor values by a scale value.

+### Sparsity
+
+Currently, there is only one reorder for packing a dense tensor, i.e. converting
+a dense tensor that is in `ab` format to a sparse tensor that is encoded with
+the `PACKED` encoding.
+
+In general, it is expected that all reorder-related functionality
+(e.g. scales, zero-points, etc) that is supported for the dense
+destination tensor should also work for the sparse one.
+
+ Common Limitations
+* The interoperability API to get/set data handles is not supported. Use the
+runtime agnostic API to do that.
+* Sparse memory and memory descriptor can only be used with the Matrix
+Multiplication and Reorder primitives.
+
+Refer to [Sparsity Advanced Topic](@ref dev_guide_sparsity) page for more
+information on sparse encding.
+
 ## Implementation Limitations

 1. Refer to @ref dev_guide_data_types for limitations related to data types
--- a/doc/rst/advanced_topics.rst
+++ b/doc/rst/advanced_topics.rst
@ -10,4 +10,5 @@ Advanced Topics
   dev_guide_primitive_cache
   dev_guide_persistent_cache
   dev_guide_threadpool
+   dev_guide_sparsity
   dev_guide_experimental
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@ -55,12 +55,6 @@ append_host_compiler_options(CMAKE_CXX_FLAGS "${DPCPP_CXX_NOWARN_FLAGS}")
 file(GLOB_RECURSE sources *.cpp *.c)
 file(GLOB_RECURSE headers *.hpp *.h)

-if(NOT DNNL_EXPERIMENTAL_SPARSE)
-    list(REMOVE_ITEM sources ${CMAKE_CURRENT_SOURCE_DIR}/cpu_matmul_csr.cpp)
-    list(REMOVE_ITEM sources ${CMAKE_CURRENT_SOURCE_DIR}/cpu_matmul_coo.cpp)
-    list(REMOVE_ITEM sources ${CMAKE_CURRENT_SOURCE_DIR}/cpu_matmul_weights_compression.cpp)
-endif()
-
 if(NOT DNNL_EXPERIMENTAL_UKERNEL)
    list(REMOVE_ITEM sources ${CMAKE_CURRENT_SOURCE_DIR}/ukernels/cpu_brgemm.cpp)
 endif()
--- a/include/oneapi/dnnl/dnnl.h
+++ b/include/oneapi/dnnl/dnnl.h
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2016-2024 Intel Corporation
+* Copyright 2016-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -881,7 +881,6 @@ dnnl_status_t DNNL_API dnnl_memory_desc_create_with_tag(
        dnnl_memory_desc_t *memory_desc, int ndims, const dnnl_dims_t dims,
        dnnl_data_type_t data_type, dnnl_format_tag_t tag);

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 /// Creates a memory descriptor for CSR encoding.
 ///
 /// @param memory_desc Output memory descriptor.
@ -940,10 +939,10 @@ dnnl_status_t DNNL_API dnnl_memory_desc_create_with_coo_encoding(
 /// @param nnz Number of non-zero entries.
 /// @returns #dnnl_success on success and a status describing the error
 ///     otherwise.
+/// @sa @ref dev_guide_sparsity
 dnnl_status_t DNNL_API dnnl_memory_desc_create_with_packed_encoding(
        dnnl_memory_desc_t *memory_desc, int ndims, const dnnl_dims_t dims,
        dnnl_data_type_t data_type, dnnl_dim_t nnz);
-#endif

 /// Creates a memory descriptor for a region inside an area
 /// described by an existing memory descriptor.
@ -1109,7 +1108,6 @@ dnnl_status_t DNNL_API dnnl_memory_desc_permute_axes(
 dnnl_status_t DNNL_API dnnl_memory_desc_query(
        const_dnnl_memory_desc_t memory_desc, dnnl_query_t what, void *result);

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 /// Queries a memory descriptor for various pieces of information. This version
 /// support additional queries #dnnl_query_sparse_encoding, #dnnl_query_nnz_s64
 /// #dnnl_query_num_handles_s32 and #dnnl_query_data_type for a particular
@ -1164,10 +1162,10 @@ dnnl_status_t DNNL_API dnnl_memory_desc_query(
 ///     it must be a @c dnnl_dims_t** if querying for a strides.
 /// @returns #dnnl_success on success and a status describing the error
 ///     otherwise.
+/// @sa @ref dev_guide_sparsity
 dnnl_status_t DNNL_API dnnl_memory_desc_query_v2(
        const_dnnl_memory_desc_t memory_desc, dnnl_query_t what, int index,
        void *result);
-#endif

 /// Compares two memory descriptors.
 ///
@ -1188,7 +1186,6 @@ int DNNL_API dnnl_memory_desc_equal(
 ///     descriptor.
 size_t DNNL_API dnnl_memory_desc_get_size(const_dnnl_memory_desc_t memory_desc);

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 /// Returns the size of the data that corresponds to the given index.
 ///
 /// @param memory_desc Memory descriptor.
@ -1197,7 +1194,6 @@ size_t DNNL_API dnnl_memory_desc_get_size(const_dnnl_memory_desc_t memory_desc);
 /// @returns The number of bytes required for the requested data.
 size_t DNNL_API dnnl_memory_desc_get_size_v2(
        const_dnnl_memory_desc_t memory_desc, int index);
-#endif

 /// Returns the size of data type.
 ///
@ -1229,7 +1225,6 @@ dnnl_status_t DNNL_API dnnl_memory_create(dnnl_memory_t *memory,
        const_dnnl_memory_desc_t memory_desc, dnnl_engine_t engine,
        void *handle);

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 /// Creates a memory object with multiple handles.
 ///
 /// @param memory Output memory object.
@ -1250,7 +1245,6 @@ dnnl_status_t DNNL_API dnnl_memory_create(dnnl_memory_t *memory,
 dnnl_status_t DNNL_API dnnl_memory_create_v2(dnnl_memory_t *memory,
        const_dnnl_memory_desc_t memory_desc, dnnl_engine_t engine,
        int nhandles, void **handles);
-#endif

 /// Returns the memory descriptor for a memory object.
 ///
@ -1296,7 +1290,6 @@ dnnl_status_t DNNL_API dnnl_memory_get_engine(
 dnnl_status_t DNNL_API dnnl_memory_map_data(
        const_dnnl_memory_t memory, void **mapped_ptr);

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 /// Maps a memory object and returns a host-side pointer to a memory buffer
 /// with a copy of its contents. The memory buffer corresponds to the given
 /// index.
@ -1324,7 +1317,6 @@ dnnl_status_t DNNL_API dnnl_memory_map_data(
 ///     otherwise.
 dnnl_status_t DNNL_API dnnl_memory_map_data_v2(
        const_dnnl_memory_t memory, void **mapped_ptr, int index);
-#endif

 /// Unmaps a memory object and writes back any changes made to the previously
 /// mapped memory buffer. The pointer to the mapped buffer must be obtained
@ -1343,7 +1335,6 @@ dnnl_status_t DNNL_API dnnl_memory_map_data_v2(
 dnnl_status_t DNNL_API dnnl_memory_unmap_data(
        const_dnnl_memory_t memory, void *mapped_ptr);

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 /// Unmaps a memory object and writes back any changes made to the previously
 /// mapped memory buffer. The pointer to the mapped buffer must be obtained
 /// via the dnnl_memory_map_data() call. The buffer corresponds to the given
@ -1362,7 +1353,6 @@ dnnl_status_t DNNL_API dnnl_memory_unmap_data(
 ///     otherwise.
 dnnl_status_t DNNL_API dnnl_memory_unmap_data_v2(
        const_dnnl_memory_t memory, void *mapped_ptr, int index);
-#endif

 /// Returns memory object's data handle.
 ///
@ -1385,7 +1375,6 @@ dnnl_status_t DNNL_API dnnl_memory_get_data_handle(
 dnnl_status_t DNNL_API dnnl_memory_set_data_handle(
        dnnl_memory_t memory, void *handle);

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 /// Returns an underlying memory buffer that corresponds to the given index.
 ///
 /// @param memory Memory object.
@ -1409,7 +1398,6 @@ dnnl_status_t DNNL_API dnnl_memory_get_data_handle_v2(
 ///     otherwise.
 dnnl_status_t DNNL_API dnnl_memory_set_data_handle_v2(
        dnnl_memory_t memory, void *handle, int index);
-#endif

 /// Destroys a memory object.
 ///
--- a/include/oneapi/dnnl/dnnl.hpp
+++ b/include/oneapi/dnnl/dnnl.hpp
@ -738,14 +738,12 @@ enum class query {
    inner_blks = dnnl_query_inner_blks,
    /// vector of logical indices of the blocks
    inner_idxs = dnnl_query_inner_idxs,
-#ifdef DNNL_EXPERIMENTAL_SPARSE
    /// Sparse encoding
    sparse_encoding = dnnl_query_sparse_encoding,
    /// Number of non-zero entries
    nnz_s64 = dnnl_query_nnz_s64,
    /// Number of buffers required for a memory descriptor
    num_handles_s32 = dnnl_query_num_handles_s32,
-#endif
 };

 /// Converts query enum value from C++ API to C API type.
@ -905,31 +903,28 @@ struct memory : public handle<dnnl_memory_t> {
        /// A tensor in a generic format described by the stride and blocking
        /// values in each dimension.
        blocked = dnnl_blocked,
-#ifdef DNNL_EXPERIMENTAL_SPARSE
        /// Format kind for sparse tensors.
        sparse = dnnl_format_kind_sparse,
-#endif
        /// A special format kind that indicates that tensor format is opaque.
        opaque = dnnl_format_kind_opaque,
    };

-#ifdef DNNL_EXPERIMENTAL_SPARSE
    /// Sparse encodings.
+    /// @sa @ref dev_guide_sparsity
    enum class sparse_encoding {
-            /// Undefined sparse encoding kind, used for empty memory descriptors.
-            undef = dnnl_sparse_encoding_undef,
-            /// Compressed Sparse Row (CSR) encoding.
-            csr = dnnl_csr,
-            /// An encoding that is used for an opaque storage schema for
-            /// tensors with unstructured sparsity. A memory descriptor with the
-            /// packed encoding cannot be used to create a memory object. It can
-            /// only be used to create a primitive descriptor to query the
-            /// actual memory descriptor (similar to the format tag `any`).
-            packed = dnnl_packed,
-            /// Coordinate Sparse (COO) encoding.
-            coo = dnnl_coo,
+        /// Undefined sparse encoding kind, used for empty memory descriptors.
+        undef = dnnl_sparse_encoding_undef,
+        /// Compressed Sparse Row (CSR) encoding.
+        csr = dnnl_csr,
+        /// An encoding that is used for an opaque storage schema for
+        /// tensors with unstructured sparsity. A memory descriptor with the
+        /// packed encoding cannot be used to create a memory object. It can
+        /// only be used to create a primitive descriptor to query the
+        /// actual memory descriptor (similar to the format tag `any`).
+        packed = dnnl_packed,
+        /// Coordinate Sparse (COO) encoding.
+        coo = dnnl_coo,
    };
-#endif

    /// Memory format tag specification.
    ///
@ -2823,7 +2818,7 @@ struct memory : public handle<dnnl_memory_t> {
                        "strides");
            reset(md);
        }
-#ifdef DNNL_EXPERIMENTAL_SPARSE
+
        /// Function for creating a memory descriptor for CSR sparse encoding.
        ///
        /// The created memory descriptor will describe a memory object that
@ -2842,6 +2837,7 @@ struct memory : public handle<dnnl_memory_t> {
        ///     allowed to fail without throwing an exception. In this case a
        ///     zero memory descriptor will be constructed. This flag is
        ///     optional and defaults to false.
+        /// @sa @ref dev_guide_sparsity
        static desc csr(const dims &adims, data_type adata_type, dim nnz,
                data_type index_dt, data_type pointer_dt,
                bool allow_empty = false) {
@ -2876,6 +2872,7 @@ struct memory : public handle<dnnl_memory_t> {
        ///     allowed to fail without throwing an exception. In this case a
        ///     zero memory descriptor will be constructed. This flag is
        ///     optional and defaults to false.
+        /// @sa @ref dev_guide_sparsity
        static desc coo(const dims &adims, data_type adata_type, dim nnz,
                data_type index_dt, bool allow_empty = false) {
            validate_dims(adims);
@ -2910,6 +2907,7 @@ struct memory : public handle<dnnl_memory_t> {
        ///     allowed to fail without throwing an exception. In this case a
        ///     zero memory descriptor will be constructed. This flag is
        ///     optional and defaults to false.
+        /// @sa @ref dev_guide_sparsity
        static desc packed(const dims &adims, data_type adata_type, dim nnz,
                bool allow_empty = false) {
            validate_dims(adims);
@ -2923,7 +2921,7 @@ struct memory : public handle<dnnl_memory_t> {
                        "sparse encoding");
            return desc {md};
        }
-#endif
+
        /// Construct a memory descriptor from a C API ::dnnl_memory_desc_t
        /// handle. The resulting handle is not weak and the C handle will be
        /// destroyed during the destruction of the C++ object.
@ -3146,7 +3144,6 @@ struct memory : public handle<dnnl_memory_t> {
            return query_dims(query::inner_idxs);
        }

-#ifdef DNNL_EXPERIMENTAL_SPARSE
        /// Returns number of handles.
        ///
        /// @returns A number of handles.
@ -3170,6 +3167,7 @@ struct memory : public handle<dnnl_memory_t> {
        /// Returns the sparse encoding of the memory descriptor.
        ///
        /// @returns the sparse encoding kind.
+        /// @sa @ref dev_guide_sparsity
        memory::sparse_encoding get_sparse_encoding() const {
            dnnl_sparse_encoding_t sparse_encoding;
            dnnl_status_t status = dnnl_memory_desc_query_v2(
@ -3186,14 +3184,6 @@ struct memory : public handle<dnnl_memory_t> {
        memory::data_type get_data_type(int index = 0) const {
            return query_data_type(query::data_type, index);
        }
-#else
-        /// Returns the data type of the memory descriptor.
-        ///
-        /// @returns The data type.
-        memory::data_type get_data_type() const {
-            return query_data_type(query::data_type);
-        }
-#endif

        /// Returns the format kind of the memory descriptor.
        ///
@ -3213,7 +3203,6 @@ struct memory : public handle<dnnl_memory_t> {
        /// @returns A copy of the dimensions vector.
        memory::dims get_dims() const { return query_dims(query::dims); }

-#ifdef DNNL_EXPERIMENTAL_SPARSE
        /// Returns size of the memory descriptor in bytes.
        /// @param index Data index. Defaults to 0.
        /// @returns The number of bytes required to allocate a memory buffer
@ -3222,13 +3211,6 @@ struct memory : public handle<dnnl_memory_t> {
        size_t get_size(int index = 0) const {
            return dnnl_memory_desc_get_size_v2(get(), index);
        }
-#else
-        /// Returns size of the memory descriptor in bytes.
-        /// @returns The number of bytes required to allocate a memory buffer
-        ///     for the memory object described by this memory descriptor
-        ///     including the padding area.
-        size_t get_size() const { return dnnl_memory_desc_get_size(get()); }
-#endif

        /// Returns a binary blob associated with the given memory descriptor
        /// @returns The memory descriptor blob associated with the memory descriptor
@ -3265,7 +3247,6 @@ struct memory : public handle<dnnl_memory_t> {
        bool operator!=(const desc &other) const { return !operator==(other); }

    private:
-#ifdef DNNL_EXPERIMENTAL_SPARSE
        memory::data_type query_data_type(query what, int index) const {
            dnnl_data_type_t data_type;
            dnnl_status_t status = dnnl_memory_desc_query_v2(
@ -3274,16 +3255,6 @@ struct memory : public handle<dnnl_memory_t> {
                    ? static_cast<dnnl::memory::data_type>(data_type)
                    : dnnl::memory::data_type::undef;
        }
-#else
-        memory::data_type query_data_type(query what) const {
-            dnnl_data_type_t data_type;
-            dnnl_status_t status = dnnl_memory_desc_query(
-                    get(), dnnl::convert_to_c(what), &data_type);
-            return status == dnnl_success
-                    ? static_cast<dnnl::memory::data_type>(data_type)
-                    : dnnl::memory::data_type::undef;
-        }
-#endif

        int query_s32(query what) const {
            int res;
@ -3314,7 +3285,6 @@ struct memory : public handle<dnnl_memory_t> {
    /// absence of a parameter.
    memory() = default;

-#ifdef DNNL_EXPERIMENTAL_SPARSE
    /// Constructs a memory object.
    ///
    /// Unless @p handle is equal to #DNNL_MEMORY_NONE, the constructed memory
@ -3383,43 +3353,6 @@ struct memory : public handle<dnnl_memory_t> {
        error::wrap_c_api(status, "could not create a memory object");
        reset(result);
    }
-#else
-    /// Constructs a memory object.
-    ///
-    /// Unless @p handle is equal to #DNNL_MEMORY_NONE, the constructed memory
-    /// object will have the underlying buffer set. In this case, the buffer
-    /// will be initialized as if #dnnl::memory::set_data_handle() had been
-    /// called.
-    ///
-    /// @sa memory::set_data_handle()
-    ///
-    /// @param md Memory descriptor.
-    /// @param aengine Engine to store the data on.
-    /// @param handle Handle of the memory buffer to use.
-    ///     - A pointer to the user-allocated buffer. In this case the library
-    ///       doesn't own the buffer.
-    ///     - The #DNNL_MEMORY_ALLOCATE special value. Instructs the library to
-    ///       allocate the buffer for the memory object. In this case the
-    ///       library owns the buffer.
-    ///     - #DNNL_MEMORY_NONE to create dnnl::memory without an underlying
-    ///       buffer.
-    memory(const desc &md, const engine &aengine, void *handle) {
-        dnnl_memory_t result;
-        error::wrap_c_api(
-                dnnl_memory_create(&result, md.get(), aengine.get(), handle),
-                "could not create a memory object");
-        reset(result);
-    }
-
-    /// Constructs a memory object.
-    ///
-    /// The underlying buffer for the memory will be allocated by the library.
-    ///
-    /// @param md Memory descriptor.
-    /// @param aengine Engine to store the data on.
-    memory(const desc &md, const engine &aengine)
-        : memory(md, aengine, DNNL_MEMORY_ALLOCATE) {}
-#endif

    /// Returns the associated memory descriptor.
    desc get_desc() const {
@ -3440,7 +3373,6 @@ struct memory : public handle<dnnl_memory_t> {
        return engine(c_engine, true);
    }

-#ifdef DNNL_EXPERIMENTAL_SPARSE
    /// Returns an underlying memory buffer that corresponds to the given index.
    ///
    /// On the CPU engine, or when using USM, this is a pointer to the
@ -3511,73 +3443,6 @@ struct memory : public handle<dnnl_memory_t> {
        error::wrap_c_api(dnnl_memory_unmap_data_v2(get(), mapped_ptr, index),
                "could not unmap memory object data");
    }
-#else
-    /// Returns the underlying memory buffer.
-    ///
-    /// On the CPU engine, or when using USM, this is a pointer to the
-    /// allocated memory.
-    void *get_data_handle() const {
-        void *handle;
-        error::wrap_c_api(dnnl_memory_get_data_handle(get(), &handle),
-                "could not get a native handle from a memory object");
-        return handle;
-    }
-
-    /// Sets the underlying memory buffer.
-    ///
-    /// @param handle Memory buffer to use. On the CPU engine or when USM is
-    ///     used, the memory buffer is a pointer to the actual data. For OpenCL
-    ///     it is a cl_mem. It must have at least
-    ///     #dnnl::memory::desc::get_size() bytes allocated.
-    void set_data_handle(void *handle) const {
-        error::wrap_c_api(dnnl_memory_set_data_handle(get(), handle),
-                "could not set native handle of a memory object");
-    }
-
-    /// Maps a memory object and returns a host-side pointer to a memory
-    /// buffer with a copy of its contents.
-    ///
-    /// Mapping enables read/write directly from/to the memory contents for
-    /// engines that do not support direct memory access.
-    ///
-    /// Mapping is an exclusive operation - a memory object cannot be used in
-    /// other operations until it is unmapped via #dnnl::memory::unmap_data()
-    /// call.
-    ///
-    /// @note
-    ///     Any primitives working with the memory should be completed before
-    ///     the memory is mapped. Use #dnnl::stream::wait() to synchronize the
-    ///     corresponding execution stream.
-    ///
-    /// @note
-    ///     The map_data and unmap_data functions are provided mainly for
-    ///     debug and testing purposes and their performance may be suboptimal.
-    ///
-    /// @tparam T Data type to return a pointer to.
-    /// @returns Pointer to the mapped memory.
-    template <typename T = void>
-    T *map_data() const {
-        void *mapped_ptr;
-        error::wrap_c_api(dnnl_memory_map_data(get(), &mapped_ptr),
-                "could not map memory object data");
-        return static_cast<T *>(mapped_ptr);
-    }
-
-    /// Unmaps a memory object and writes back any changes made to the
-    /// previously mapped memory buffer.
-    ///
-    /// @note
-    ///     The map_data and unmap_data functions are provided mainly for
-    ///     debug and testing purposes and their performance may be
-    ///     suboptimal.
-    ///
-    /// @param mapped_ptr A pointer previously returned by
-    ///     #dnnl::memory::map_data().
-    void unmap_data(void *mapped_ptr) const {
-        error::wrap_c_api(dnnl_memory_unmap_data(get(), mapped_ptr),
-                "could not unmap memory object data");
-    }
-#endif

    static dnnl_data_type_t convert_to_c(data_type adata_type) {
        return static_cast<dnnl_data_type_t>(adata_type);
--- a/include/oneapi/dnnl/dnnl_config.h.in
+++ b/include/oneapi/dnnl/dnnl_config.h.in
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -163,9 +163,6 @@
 // When defined, experimental features are enabled.
 #cmakedefine DNNL_EXPERIMENTAL

-// When defined, experimental functionality for sparse domain is enabled.
-#cmakedefine DNNL_EXPERIMENTAL_SPARSE
-
 // When defined, experimental functionality for ukernels is enabled.
 #cmakedefine DNNL_EXPERIMENTAL_UKERNEL

--- a/include/oneapi/dnnl/dnnl_debug.h
+++ b/include/oneapi/dnnl/dnnl_debug.h
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2018-2024 Intel Corporation
+* Copyright 2018-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -37,9 +37,7 @@ const char DNNL_API *dnnl_dt2str(dnnl_data_type_t v);
 const char DNNL_API *dnnl_fpmath_mode2str(dnnl_fpmath_mode_t v);
 const char DNNL_API *dnnl_accumulation_mode2str(dnnl_accumulation_mode_t v);
 const char DNNL_API *dnnl_engine_kind2str(dnnl_engine_kind_t v);
-#ifdef DNNL_EXPERIMENTAL_SPARSE
 const char DNNL_API *dnnl_sparse_encoding2str(dnnl_sparse_encoding_t v);
-#endif
 const char DNNL_API *dnnl_fmt_tag2str(dnnl_format_tag_t v);
 const char DNNL_API *dnnl_prop_kind2str(dnnl_prop_kind_t v);
 const char DNNL_API *dnnl_prim_kind2str(dnnl_primitive_kind_t v);
--- a/include/oneapi/dnnl/dnnl_ocl.h
+++ b/include/oneapi/dnnl/dnnl_ocl.h
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -75,7 +75,6 @@ dnnl_status_t DNNL_API dnnl_ocl_interop_memory_create(dnnl_memory_t *memory,
        const_dnnl_memory_desc_t memory_desc, dnnl_engine_t engine,
        dnnl_ocl_interop_memory_kind_t memory_kind, void *handle);

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 /// Creates a memory object with multiple handles.
 ///
 /// @param memory Output memory object.
@ -102,7 +101,6 @@ dnnl_status_t DNNL_API dnnl_ocl_interop_memory_create_v2(dnnl_memory_t *memory,
        const_dnnl_memory_desc_t memory_desc, dnnl_engine_t engine,
        dnnl_ocl_interop_memory_kind_t memory_kind, int nhandles,
        void **handles);
-#endif

 /// Returns the memory allocation kind associated with a memory object.
 ///
--- a/include/oneapi/dnnl/dnnl_ocl.hpp
+++ b/include/oneapi/dnnl/dnnl_ocl.hpp
@ -236,7 +236,6 @@ inline memory_kind get_memory_kind(const memory &amemory) {
    return static_cast<memory_kind>(ckind);
 }

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 /// Creates a memory object with multiple handles.
 ///
 /// @param memory_desc Memory descriptor.
@ -340,61 +339,6 @@ inline memory make_memory(const memory::desc &memory_desc,
        const engine &aengine, cl_mem mem_object) {
    return make_memory(memory_desc, aengine, std::vector<cl_mem> {mem_object});
 }
-#else
-
-/// Creates a memory object.
-///
-/// Unless @p handle is equal to DNNL_MEMORY_NONE or DNNL_MEMORY_ALLOCATE, the
-/// constructed memory object will have the underlying buffer set. In this
-/// case, the buffer will be initialized as if:
-/// - dnnl::memory::set_data_handle() had been called, if @p memory_kind is
-///   equal to dnnl::ocl_interop::memory_kind::usm, or
-/// - dnnl::ocl_interop::set_mem_object() has been called, if @p memory_kind is
-///   equal to dnnl::ocl_interop::memory_kind::buffer.
-///
-/// @param memory_desc Memory descriptor.
-/// @param aengine Engine to use.
-/// @param kind Memory allocation kind to specify the type of handle.
-/// @param handle Handle of the memory buffer to use as an underlying storage.
-///     - A USM pointer to the user-allocated buffer. In this case the library
-///       doesn't own the buffer. Requires @p memory_kind to be equal to
-///       dnnl::ocl_interop::memory_kind::usm.
-///     - An OpenCL buffer. In this case the library doesn't own the buffer.
-///       Requires @p memory_kind be equal to be equal to
-///       dnnl::ocl_interop::memory_kind::buffer.
-///     - The DNNL_MEMORY_ALLOCATE special value. Instructs the library to
-///       allocate the buffer that corresponds to the memory allocation kind
-///       @p memory_kind for the memory object. In this case the library
-///       owns the buffer.
-///     - The DNNL_MEMORY_NONE specific value. Instructs the library to
-///       create memory object without an underlying buffer.
-///
-/// @returns Created memory object.
-inline memory make_memory(const memory::desc &memory_desc,
-        const engine &aengine, memory_kind kind,
-        void *handle = DNNL_MEMORY_ALLOCATE) {
-    dnnl_memory_t c_memory;
-    error::wrap_c_api(
-            dnnl_ocl_interop_memory_create(&c_memory, memory_desc.get(),
-                    aengine.get(), convert_to_c(kind), handle),
-            "could not create a memory");
-    return memory(c_memory);
-}
-
-/// Constructs a memory object from an OpenCL buffer.
-///
-/// @param memory_desc Memory descriptor.
-/// @param aengine Engine to use.
-/// @param mem_object An OpenCL buffer to use.
-///
-/// @returns Created memory object.
-inline memory make_memory(const memory::desc &memory_desc,
-        const engine &aengine, cl_mem mem_object) {
-    memory amemory(memory_desc, aengine, DNNL_MEMORY_NONE);
-    set_mem_object(amemory, mem_object);
-    return amemory;
-}
-#endif

 /// Executes computations specified by the primitive in a specified stream and
 /// returns a SYCL event.
--- a/include/oneapi/dnnl/dnnl_sycl.h
+++ b/include/oneapi/dnnl/dnnl_sycl.h
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -95,7 +95,6 @@ dnnl_status_t DNNL_API dnnl_sycl_interop_memory_create(dnnl_memory_t *memory,
        const_dnnl_memory_desc_t memory_desc, dnnl_engine_t engine,
        dnnl_sycl_interop_memory_kind_t memory_kind, void *handle);

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 /// Creates a memory object with multiple handles.
 ///
 /// @param memory Output memory object.
@ -123,7 +122,6 @@ dnnl_status_t DNNL_API dnnl_sycl_interop_memory_create_v2(dnnl_memory_t *memory,
        const_dnnl_memory_desc_t memory_desc, dnnl_engine_t engine,
        dnnl_sycl_interop_memory_kind_t memory_kind, int nhandles,
        void **handles);
-#endif

 /// Returns the memory allocation kind associated with a memory object.
 ///
--- a/include/oneapi/dnnl/dnnl_sycl.hpp
+++ b/include/oneapi/dnnl/dnnl_sycl.hpp
@ -206,7 +206,6 @@ inline memory_kind get_memory_kind(const memory &amemory) {
    return static_cast<memory_kind>(ckind);
 }

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 /// Creates a memory object with multiple handles.
 ///
 /// @param memory_desc Memory descriptor.
@ -281,47 +280,6 @@ inline memory make_memory(const memory::desc &memory_desc,
    return make_memory(
            memory_desc, aengine, kind, std::vector<void *> {handle});
 }
-#else
-
-/// Creates a memory object.
-///
-/// Unless @p handle is equal to DNNL_MEMORY_NONE or DNNL_MEMORY_ALLOCATE, the
-/// constructed memory object will have the underlying buffer set. In this
-/// case, the buffer will be initialized as if:
-/// - dnnl::memory::set_data_handle() had been called, if @p memory_kind is
-///   equal to dnnl::sycl_interop::memory_kind::usm, or
-/// - dnnl::sycl_interop::set_buffer() has been called, if @p memory_kind is
-///   equal to dnnl::sycl_interop::memory_kind::buffer.
-///
-/// @param memory_desc Memory descriptor.
-/// @param aengine Engine to use.
-/// @param kind Memory allocation kind to specify the type of handle.
-/// @param handle Handle of the memory buffer to use as an underlying storage.
-///     - A USM pointer to the user-allocated buffer. In this case the library
-///       doesn't own the buffer. Requires @p memory_kind to be equal to
-///       dnnl::sycl_interop::memory_kind::usm.
-///     - A pointer to SYCL buffer. In this case the library doesn't own the
-///       buffer. Requires @p memory_kind be equal to be equal to
-///       dnnl::sycl_interop::memory_kind::buffer.
-///     - The DNNL_MEMORY_ALLOCATE special value. Instructs the library to
-///       allocate the buffer that corresponds to the memory allocation kind
-///       @p memory_kind for the memory object. In this case the library
-///       owns the buffer.
-///     - The DNNL_MEMORY_NONE specific value. Instructs the library to
-///       create memory object without an underlying buffer.
-///
-/// @returns Created memory object.
-inline memory make_memory(const memory::desc &memory_desc,
-        const engine &aengine, memory_kind kind,
-        void *handle = DNNL_MEMORY_ALLOCATE) {
-    dnnl_memory_t c_memory;
-    error::wrap_c_api(
-            dnnl_sycl_interop_memory_create(&c_memory, memory_desc.get(),
-                    aengine.get(), convert_to_c(kind), handle),
-            "could not create a memory");
-    return memory(c_memory);
-}
-#endif

 /// Constructs a memory object from a SYCL buffer.
 ///
--- a/include/oneapi/dnnl/dnnl_types.h
+++ b/include/oneapi/dnnl/dnnl_types.h
@ -52,18 +52,16 @@ typedef enum {
    dnnl_blocked,
    /// A special format kind that indicates that tensor format is opaque.
    dnnl_format_kind_opaque,
-#ifdef DNNL_EXPERIMENTAL_SPARSE
    /// Format kind for sparse tensors.
    dnnl_format_kind_sparse,
-#endif
    /// Parameter to allow internal only format kinds without undefined
    /// behavior. This parameter is chosen to be valid for so long as
    /// sizeof(int) >= 2.
    dnnl_format_kind_max = 0x7fff,
 } dnnl_format_kind_t;

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 /// Sparse encodings.
+/// @sa @ref dev_guide_sparsity
 typedef enum {
    /// Undefined sparse encoding kind, used for empty memory descriptors.
    dnnl_sparse_encoding_undef = 0,
@ -78,7 +76,6 @@ typedef enum {
    /// Coordinate Sparse Encoding (COO).
    dnnl_coo,
 } dnnl_sparse_encoding_t;
-#endif

 #ifdef DNNL_EXPERIMENTAL_PROFILING
 /// Profiling data kind.
@ -2831,12 +2828,9 @@ typedef enum {
    dnnl_query_inner_nblks_s32, ///< number of innermost blocks
    dnnl_query_inner_blks, ///< vector of sizes of the innermost blocks
    dnnl_query_inner_idxs, ///< vector of logical indices of the blocks
-#ifdef DNNL_EXPERIMENTAL_SPARSE
    dnnl_query_sparse_encoding, ///< Sparse encoding
    dnnl_query_nnz_s64, ///< Number of non-zero entries
-    dnnl_query_num_handles_s32, ///< Number of buffers required for a memory
-///  descriptor
-#endif
+    dnnl_query_num_handles_s32, ///< Number of buffers required for a memory descriptor
    // Max value to prevent UB for internal use only dnnl_query_t
    dnnl_query_max = 0x7fff,
 } dnnl_query_t;
--- a/scripts/README.md
+++ b/scripts/README.md
@ -10,8 +10,7 @@ should be executed to re-generate the debug header and relevant source code.

 ```sh
 # Generate dnnl_config.h
-# -DDNNL_EXPERIMENTAL_SPARSE=ON is required to preserve sparse-specific symbols
-$ (mkdir -p build && cd build && cmake -DONEDNN_BUILD_GRAPH=OFF -DDNNL_EXPERIMENTAL_SPARSE=ON ..)
+$ (mkdir -p build && cd build && cmake -DONEDNN_BUILD_GRAPH=OFF ..)

 # Generate types.xml
 # CastXML can be found at https://github.com/CastXML/CastXML
--- a/scripts/generate_dnnl_debug.py
+++ b/scripts/generate_dnnl_debug.py
@ -103,9 +103,7 @@ const char *dt2str(dnnl_data_type_t dt);
 const char *fmt_tag2str(dnnl_format_tag_t tag);

 /* encoding */
-#ifdef DNNL_EXPERIMENTAL_SPARSE
 const char *sparse_encoding2str(dnnl_sparse_encoding_t encoding);
-#endif

 /* engine kind */
 const char *engine_kind2str(dnnl_engine_kind_t kind);
@ -155,11 +153,9 @@ const char *fmt_tag2str(dnnl_format_tag_t tag) {
    return dnnl_fmt_tag2str(tag);
 }

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 const char *sparse_encoding2str(dnnl_sparse_encoding_t encoding) {
    return dnnl_sparse_encoding2str(encoding);
 }
-#endif

 const char *engine_kind2str(dnnl_engine_kind_t kind) {
    return dnnl_engine_kind2str(kind);
@ -307,31 +303,15 @@ def generate(ifile, banners):
            continue
        values = [v_value.attrib["name"] for v_value in v_enum.findall("EnumValue")]

-        if enum in ["dnnl_sparse_encoding_t"]:
-            h_body += "#ifdef DNNL_EXPERIMENTAL_SPARSE\n"
-            s_body += "#ifdef DNNL_EXPERIMENTAL_SPARSE\n"
-
        h_body += func_to_str_decl(enum, is_header=True) + ";\n"
        s_body += func_to_str(enum, values) + "\n"

-        if enum in ["dnnl_sparse_encoding_t"]:
-            h_body += "#endif\n"
-            s_body += "#endif\n"
-
        if enum in ["dnnl_format_tag_t", "dnnl_data_type_t", "dnnl_sparse_encoding_t"]:
-            if enum in ["dnnl_sparse_encoding_t"]:
-                h_benchdnn_body += "#ifdef DNNL_EXPERIMENTAL_SPARSE\n"
-                s_benchdnn_body += "#ifdef DNNL_EXPERIMENTAL_SPARSE\n"
-
            h_benchdnn_body += (
                str_to_func_decl(enum, is_header=True, is_dnnl=False) + ";\n"
            )
            s_benchdnn_body += str_to_func(enum, values, is_dnnl=False) + "\n"

-            if enum in ["dnnl_sparse_encoding_t"]:
-                h_benchdnn_body += "#endif\n"
-                s_benchdnn_body += "#endif\n"
-
    bodies = [
        header(h_body),
        source(s_body),
@ -349,8 +329,7 @@ def usage():
 Generates oneDNN debug header and source files with enum to string mapping.
 Input types.xml file can be obtained with CastXML[1]:
 $ castxml --castxml-cc-gnu-c clang --castxml-output=1 \\
-        -DDNNL_EXPERIMENTAL_SPARSE -Iinclude -Ibuild/include \\
-        include/oneapi/dnnl/dnnl_types.h -o types.xml
+        -Iinclude -Ibuild/include include/oneapi/dnnl/dnnl_types.h -o types.xml

 [1] https://github.com/CastXML/CastXML"""
        % sys.argv[0]
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -76,10 +76,6 @@ if(DNNL_EXPERIMENTAL)
    message(STATUS "Experimental features are enabled")
 endif()

-if(DNNL_EXPERIMENTAL_SPARSE)
-    message(STATUS "Experimental functionality for sparse domain is enabled")
-endif()
-
 if(DNNL_EXPERIMENTAL_UKERNEL)
    if(DNNL_TARGET_ARCH STREQUAL "X64" OR DNNL_TARGET_ARCH STREQUAL "AARCH64")
        message(STATUS "Experimental functionality for ukernels is enabled")
--- a/src/common/c_types_map.hpp
+++ b/src/common/c_types_map.hpp
@ -206,7 +206,6 @@ const rounding_mode_t environment = dnnl_rounding_mode_environment;
 const rounding_mode_t stochastic = dnnl_rounding_mode_stochastic;
 } // namespace rounding_mode

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 using sparse_encoding_t = dnnl_sparse_encoding_t;
 namespace sparse_encoding {
 const sparse_encoding_t undef = dnnl_sparse_encoding_undef;
@ -214,16 +213,6 @@ const sparse_encoding_t csr = dnnl_csr;
 const sparse_encoding_t coo = dnnl_coo;
 const sparse_encoding_t packed = dnnl_packed;
 } // namespace sparse_encoding
-#else
-// Declare dummy values to avoid guarding internal implementation.
-using sparse_encoding_t = int;
-namespace sparse_encoding {
-const sparse_encoding_t undef = 0;
-const sparse_encoding_t csr = 1;
-const sparse_encoding_t packed = 2;
-const sparse_encoding_t coo = 3;
-} // namespace sparse_encoding
-#endif

 using format_kind_t = dnnl_format_kind_t;
 namespace format_kind {
@ -231,11 +220,7 @@ const format_kind_t undef = dnnl_format_kind_undef;
 const format_kind_t any = dnnl_format_kind_any;
 const format_kind_t blocked = dnnl_blocked;
 const format_kind_t opaque = dnnl_format_kind_opaque;
-#ifdef DNNL_EXPERIMENTAL_SPARSE
 const format_kind_t sparse = dnnl_format_kind_sparse;
-#else
-const format_kind_t sparse = static_cast<format_kind_t>(4);
-#endif

 // Internal only format kinds.
 const format_kind_t internal_only_start = (format_kind_t)(1 << 8);
@ -2054,15 +2039,9 @@ const query_t inner_nblks_s32 = dnnl_query_inner_nblks_s32;
 const query_t inner_blks = dnnl_query_inner_blks;
 const query_t inner_idxs = dnnl_query_inner_idxs;

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 const query_t sparse_encoding = dnnl_query_sparse_encoding;
 const query_t nnz_s64 = dnnl_query_nnz_s64;
 const query_t num_handles_s32 = dnnl_query_num_handles_s32;
-#else
-const query_t sparse_encoding = static_cast<query_t>(266);
-const query_t nnz_s64 = static_cast<query_t>(267);
-const query_t num_handles_s32 = static_cast<query_t>(268);
-#endif

 // Internal only query kinds.
 const query_t internal_only_start = (query_t)(1 << 12);
--- a/src/common/dnnl_debug.cpp
+++ b/src/common/dnnl_debug.cpp
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2023 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -46,9 +46,7 @@ const char *dnnl_fmt_kind2str(dnnl_format_kind_t v) {
    if (v == dnnl_format_kind_undef) return "undef";
    if (v == dnnl_format_kind_any) return "any";
    if (v == dnnl_blocked) return "blocked";
-#ifdef DNNL_EXPERIMENTAL_SPARSE
    if (v == dnnl_format_kind_sparse) return "sparse";
-#endif
    if (v == format_kind::wino || v == format_kind::rnn_packed
            || v == format_kind::cublaslt_blocked)
        return "opaque";
--- a/src/common/dnnl_debug_autogenerated.cpp
+++ b/src/common/dnnl_debug_autogenerated.cpp
@ -94,7 +94,6 @@ const char *dnnl_engine_kind2str(dnnl_engine_kind_t v) {
    return "unknown engine_kind";
 }

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 const char *dnnl_sparse_encoding2str(dnnl_sparse_encoding_t v) {
    if (v == dnnl_sparse_encoding_undef) return "undef";
    if (v == dnnl_csr) return "csr";
@ -104,7 +103,6 @@ const char *dnnl_sparse_encoding2str(dnnl_sparse_encoding_t v) {
    return "unknown sparse_encoding";
 }

-#endif
 const char *dnnl_fmt_tag2str(dnnl_format_tag_t v) {
    if (v == dnnl_format_tag_undef) return "undef";
    if (v == dnnl_format_tag_any) return "any";
--- a/src/common/memory.cpp
+++ b/src/common/memory.cpp
@ -91,7 +91,6 @@ dnnl_memory::dnnl_memory(dnnl::impl::engine_t *engine,
    this->reset_memory_storage(std::move(memory_storage));
 }

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 dnnl_memory::dnnl_memory(dnnl::impl::engine_t *engine,
        const dnnl::impl::memory_desc_t *md,
        std::vector<std::unique_ptr<dnnl::impl::memory_storage_t>>
@ -100,7 +99,6 @@ dnnl_memory::dnnl_memory(dnnl::impl::engine_t *engine,
    , md_(*md)
    , memory_storages_(std::move(memory_storages))
    , counter_(1) {}
-#endif

 status_t dnnl_memory::set_data_handle(void *handle, int index) const {
    using namespace dnnl::impl;
@ -170,7 +168,6 @@ status_t dnnl_memory_create(memory_t **memory, const memory_desc_t *md,
    return success;
 }

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 status_t dnnl_memory_create_v2(memory_t **memory, const memory_desc_t *md,
        engine_t *engine, int nhandles, void **handles) {
    const bool args_ok = !any_null(memory, engine, handles) && nhandles > 0;
@ -213,7 +210,6 @@ status_t dnnl_memory_create_v2(memory_t **memory, const memory_desc_t *md,
    *memory = _memory;
    return success;
 }
-#endif

 status_t dnnl_memory_get_memory_desc(
        const memory_t *memory, const memory_desc_t **md) {
--- a/src/common/memory.hpp
+++ b/src/common/memory.hpp
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2018-2024 Intel Corporation
+* Copyright 2018-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -55,12 +55,10 @@ struct dnnl_memory : public dnnl::impl::c_compatible {
    dnnl_memory(dnnl::impl::engine_t *engine,
            const dnnl::impl::memory_desc_t *md,
            std::unique_ptr<dnnl::impl::memory_storage_t> &&memory_storage);
-#ifdef DNNL_EXPERIMENTAL_SPARSE
    dnnl_memory(dnnl::impl::engine_t *engine,
            const dnnl::impl::memory_desc_t *md,
            std::vector<std::unique_ptr<dnnl::impl::memory_storage_t>>
                    &&memory_storage);
-#endif

    /** returns memory's engine */
    dnnl::impl::engine_t *engine() const { return engine_; }
--- a/src/common/memory_desc.cpp
+++ b/src/common/memory_desc.cpp
@ -755,7 +755,6 @@ status_t dnnl_memory_desc_query(
    return status::success;
 }

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 status_t dnnl_memory_desc_query_v2(
        const memory_desc_t *md, query_t what, int index, void *result) {
    if (any_null(md, result)) return invalid_arguments;
@ -801,7 +800,6 @@ status_t dnnl_memory_desc_query_v2(
    }
    return status::success;
 }
-#endif

 status_t dnnl_memory_desc_destroy(memory_desc_t *memory_desc) {
    delete memory_desc;
--- a/src/common/primitive_hashing.cpp
+++ b/src/common/primitive_hashing.cpp
@ -174,7 +174,6 @@ size_t get_md_hash(const memory_desc_t &md) {
                    seed, md.format_desc.rnn_packed_desc.offset_compensation);
            seed = hash_combine(seed, md.format_desc.rnn_packed_desc.size);
            break;
-#ifdef DNNL_EXPERIMENTAL_SPARSE
        case format_kind::sparse:
            seed = hash_combine(seed,
                    static_cast<size_t>(md.format_desc.sparse_desc.encoding));
@ -185,7 +184,6 @@ size_t get_md_hash(const memory_desc_t &md) {
            // User cannot initialize `packed_desc` therefore `packed_desc`
            // is always zero initialized.
            break;
-#endif
        default: assert(!"unknown format_kind");
    }

--- a/src/common/type_helpers.hpp
+++ b/src/common/type_helpers.hpp
@ -998,10 +998,8 @@ inline bool operator==(const sdpa_desc_t &lhs, const sdpa_desc_t &rhs) {

 inline bool is_dense_format_kind(
        const std::vector<const memory_desc_t *> &mds) {
-#ifdef DNNL_EXPERIMENTAL_SPARSE
    for (const auto *md : mds)
        if (md->format_kind == format_kind::sparse) return false;
-#endif
    return true;
 }

--- a/src/common/verbose.cpp
+++ b/src/common/verbose.cpp
@ -131,13 +131,6 @@ void print_header() noexcept {
        verbose_printf("info,GPU convolution v2 is %s\n",
                experimental::use_gpu_conv_v2() ? "enabled" : "disabled");
 #endif
-
-#ifdef DNNL_EXPERIMENTAL_SPARSE
-        verbose_printf(
-                "info,experimental functionality for sparse domain is "
-                "enabled\n");
-#endif
-
        verbose_printf(
                "primitive,info,template:%soperation,engine,primitive,"
                "implementation,prop_kind,memory_descriptors,attributes,"
@ -354,12 +347,10 @@ std::ostream &operator<<(std::ostream &ss, format_kind_t format_kind) {
    return ss;
 }

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 std::ostream &operator<<(std::ostream &ss, sparse_encoding_t encoding) {
    ss << dnnl_sparse_encoding2str(encoding);
    return ss;
 }
-#endif

 std::string normalization_flags2str(unsigned flags) {
    std::string s;
--- a/src/cpu/aarch64/acl_binary.cpp
+++ b/src/cpu/aarch64/acl_binary.cpp
@ -32,6 +32,9 @@ namespace aarch64 {
 status_t acl_binary_t::pd_t::init(engine_t *engine) {
    using namespace acl_utils;

+    if (!impl::is_dense_format_kind({src_md(0), src_md(1), dst_md()}))
+        return status::unimplemented;
+
    // Only support f16/f32/s32 for now
    data_type_t ddt = dst_md(0)->data_type;
    if (!utils::one_of(ddt, data_type::f16, data_type::f32, data_type::s32))
--- a/src/cpu/aarch64/acl_depthwise_convolution.cpp
+++ b/src/cpu/aarch64/acl_depthwise_convolution.cpp
@ -50,7 +50,8 @@ status_t acl_depthwise_convolution_fwd_t::pd_t::init(engine_t *engine) {
                    primitive_attr_t::skip_mask_t::post_ops, f32);
    bool ok = is_fwd() && set_default_alg_kind(alg_kind::convolution_direct)
            && utils::one_of(true, is_fp16_ok, is_fp32_ok)
-            && !has_zero_dim_memory();
+            && !has_zero_dim_memory()
+            && impl::is_dense_format_kind({src_md(), weights_md(), dst_md()});
    if (!ok) return status::unimplemented;

    if (weights_md_.ndims != 5) return status::unimplemented;
--- a/src/cpu/aarch64/acl_eltwise.cpp
+++ b/src/cpu/aarch64/acl_eltwise.cpp
@ -73,8 +73,9 @@ status_t acl_eltwise_fwd_t::pd_t::init(engine_t *engine) {

    bool ok = is_fwd() && one_of(src_d.data_type(), f32, f16, s32, s8)
            && !has_zero_dim_memory() && attr()->has_default_values()
-            && set_default_formats_common() && src_d.is_dense()
-            && src_d == memory_desc_wrapper(dst_md());
+            && set_default_formats_common()
+            && src_d == memory_desc_wrapper(dst_md())
+            && impl::is_dense_format_kind({src_md(), dst_md()});
    if (!ok) return status::unimplemented;

    // Workaround for the inaccuracies caused by
--- a/src/cpu/aarch64/acl_gemm_convolution.cpp
+++ b/src/cpu/aarch64/acl_gemm_convolution.cpp
@ -58,7 +58,8 @@ status_t acl_gemm_convolution_fwd_t<src_t, wei_t, dst_t, bia_t>::pd_t::init(
            && expect_data_types(src_t, wei_t, bia_t, dst_t, undef)
            && !has_zero_dim_memory()
            && attr()->has_default_values(
-                    smask_t::post_ops | smask_t::fpmath_mode, dst_t);
+                    smask_t::post_ops | smask_t::fpmath_mode, dst_t)
+            && impl::is_dense_format_kind({src_md(), weights_md(), dst_md()});
    if (!ok) return status::unimplemented;

    if (weights_md_.ndims != 4) return status::unimplemented;
--- a/src/cpu/aarch64/acl_indirect_gemm_convolution.cpp
+++ b/src/cpu/aarch64/acl_indirect_gemm_convolution.cpp
@ -103,7 +103,8 @@ status_t acl_indirect_gemm_convolution_fwd_t::pd_t::init(engine_t *engine) {
                    smask_t::post_ops | smask_t::fpmath_mode, f32);
    bool ok = is_fwd() && set_default_alg_kind(alg_kind::convolution_direct)
            && utils::one_of(true, is_fp16_ok, is_bf16_ok, is_fp32_ok)
-            && !has_zero_dim_memory();
+            && !has_zero_dim_memory()
+            && impl::is_dense_format_kind({src_md(), weights_md(), dst_md()});
    if (!ok) return status::unimplemented;

    CHECK(init_conf());
--- a/src/cpu/aarch64/acl_softmax.cpp
+++ b/src/cpu/aarch64/acl_softmax.cpp
@ -34,7 +34,8 @@ status_t acl_softmax_fwd_t::pd_t::init(engine_t *engine) {
            && *src_md() == *dst_md()
            && utils::one_of(
                    src_md()->data_type, data_type::f32, data_type::f16)
-            && attr()->has_default_values();
+            && attr()->has_default_values()
+            && impl::is_dense_format_kind({src_md(), dst_md()});
    if (!ok) return status::unimplemented;

    // Get memory desc to find sizes and dims
--- a/src/cpu/aarch64/jit_brdgmm_dw_conv.cpp
+++ b/src/cpu/aarch64/jit_brdgmm_dw_conv.cpp
@ -117,7 +117,8 @@ status_t brdgmm_dw_convolution_fwd_t<isa>::pd_t::init(engine_t *engine) {
                    one_of(bia_type, data_type::undef, f32, s32, s8, u8))
            && IMPLICATION(!is_int8,
                    one_of(bia_type, data_type::undef, src_type, dst_type))
-            && attr()->has_default_values(skip_mask) && !has_zero_dim_memory();
+            && attr()->has_default_values(skip_mask) && !has_zero_dim_memory()
+            && impl::is_dense_format_kind({src_md(), weights_md(), dst_md()});
    if (!ok) { return status::unimplemented; }

    auto &jcp = jcp_;
--- a/src/cpu/aarch64/jit_brgemm_1x1_conv.cpp
+++ b/src/cpu/aarch64/jit_brgemm_1x1_conv.cpp
@ -67,7 +67,8 @@ status_t brgemm_1x1_convolution_fwd_t<isa>::pd_t::init(engine_t *engine) {
                    one_of(bias_md_.data_type, data_type::undef, f32, src_type))
            && attr()->has_default_values(skip_mask, dst_type)
            && attr()->post_ops_.check_sum_consistency(dst_type, is_int8)
-            && !has_zero_dim_memory() && zero_points_ok() && arg_scales_ok();
+            && !has_zero_dim_memory() && zero_points_ok() && arg_scales_ok()
+            && impl::is_dense_format_kind({src_md(), weights_md(), dst_md()});
    if (!ok) return status::unimplemented;

    CHECK(brgemm_convolution_utils::init_1x1_conf(jcp_, isa, *desc(), src_md_,
--- a/src/cpu/aarch64/jit_brgemm_conv.cpp
+++ b/src/cpu/aarch64/jit_brgemm_conv.cpp
@ -322,7 +322,8 @@ status_t brgemm_convolution_fwd_t<isa>::pd_t::init(engine_t *engine) {
                    one_of(bias_md_.data_type, data_type::undef, f32, src_type))
            && attr()->has_default_values(skip_mask, dst_type)
            && attr()->post_ops_.check_sum_consistency(dst_type, is_int8)
-            && !has_zero_dim_memory() && zero_points_ok() && arg_scales_ok();
+            && !has_zero_dim_memory() && zero_points_ok() && arg_scales_ok()
+            && impl::is_dense_format_kind({src_md(), weights_md(), dst_md()});
    if (!ok) return status::unimplemented;

    CHECK(brgemm_convolution_utils::init_conf(jcp_, isa, *desc(), src_md_,
--- a/src/cpu/aarch64/jit_brgemm_conv_bwd.cpp
+++ b/src/cpu/aarch64/jit_brgemm_conv_bwd.cpp
@ -108,6 +108,10 @@ status_t brgemm_convolution_bwd_t<isa>::pd_t::init(engine_t *engine) {
            VERBOSE_BAD_ALGORITHM);
    VDISPATCH_CONV(!has_zero_dim_memory(), VERBOSE_EMPTY_TENSOR, "");
    VDISPATCH_CONV(attr()->has_default_values(), VERBOSE_UNSUPPORTED_ATTR);
+    VDISPATCH_CONV(
+            impl::is_dense_format_kind({src_md(), diff_src_md(), weights_md(0),
+                    weights_md(1), dst_md(), diff_dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    convolution_desc_t fwd_conv_d = convolution_desc_t();
    CHECK(fwd_conv_desc_create(&fwd_conv_d, desc()));
--- a/src/cpu/aarch64/jit_uni_batch_normalization.cpp
+++ b/src/cpu/aarch64/jit_uni_batch_normalization.cpp
@ -2288,7 +2288,8 @@ status_t jit_uni_batch_normalization_fwd_t<isa>::pd_t::init(engine_t *engine) {
            && (attr()->has_default_values()
                    || with_relu_post_op(is_training()))
            && set_default_formats_common()
-            && memory_desc_wrapper(src_md()) == memory_desc_wrapper(dst_md());
+            && memory_desc_wrapper(src_md()) == memory_desc_wrapper(dst_md())
+            && impl::is_dense_format_kind({src_md(), dst_md()});
    if (!ok) return status::unimplemented;

    // BN+Add+Relu fusion is not currently implemented
@ -2394,7 +2395,9 @@ status_t jit_uni_batch_normalization_bwd_t<isa>::pd_t::init(engine_t *engine) {
            && check_scale_shift_data_type() && attr()->has_default_values()
            && set_default_formats_common()
            && memory_desc_wrapper(diff_src_md())
-                    == memory_desc_wrapper(diff_dst_md());
+                    == memory_desc_wrapper(diff_dst_md())
+            && impl::is_dense_format_kind(
+                    {src_md(), diff_src_md(), dst_md(), diff_dst_md()});
    if (!ok) return status::unimplemented;

    // BN+Add+Relu fusion is not currently implemented
--- a/src/cpu/aarch64/jit_uni_batch_normalization_s8.cpp
+++ b/src/cpu/aarch64/jit_uni_batch_normalization_s8.cpp
@ -419,7 +419,8 @@ status_t jit_uni_batch_normalization_s8_fwd_t<isa>::pd_t::init(
            && memory_desc_matches_tag(*src_md(), desired_fmt_tag)
            && (attr()->has_default_values() || this->with_relu_post_op(false))
            && set_default_formats_common()
-            && memory_desc_wrapper(src_md()) == memory_desc_wrapper(dst_md());
+            && memory_desc_wrapper(src_md()) == memory_desc_wrapper(dst_md())
+            && impl::is_dense_format_kind({src_md(), dst_md()});
    if (!ok) return status::unimplemented;

    // BN+Add+Relu fusion is not currently implemented
--- a/src/cpu/aarch64/jit_uni_eltwise_int.cpp
+++ b/src/cpu/aarch64/jit_uni_eltwise_int.cpp
@ -354,7 +354,7 @@ status_t jit_uni_eltwise_int_fwd_t<isa, d_type>::pd_t::init(engine_t *engine) {
            && utils::one_of(desc()->alg_kind, alg_kind::eltwise_relu,
                    alg_kind::eltwise_linear)
            && !has_zero_dim_memory()
-            && memory_desc_wrapper(src_md()).is_dense(true)
+            && impl::is_dense_format_kind({src_md(), dst_md()})
            && attr()->has_default_values() && set_default_formats_common()
            && memory_desc_wrapper(src_md()) == memory_desc_wrapper(dst_md());

--- a/src/cpu/aarch64/matmul/acl_lowp_matmul.cpp
+++ b/src/cpu/aarch64/matmul/acl_lowp_matmul.cpp
@ -64,6 +64,7 @@ status_t acl_lowp_matmul_t::pd_t::init(engine_t *engine) {
    VDISPATCH_MATMUL(attr()->has_default_values(smask_t::scales
                             | smask_t::zero_points | smask_t::post_ops),
            "only scale, zero point and post-ops attrs supported");
+    VDISPATCH_MATMUL(is_dense_format_kind(), VERBOSE_UNSUPPORTED_SPARSE_CFG);

    static const std::vector<int> supported_args {
            DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST};
--- a/src/cpu/aarch64/matmul/acl_lowp_matmul_sq.cpp
+++ b/src/cpu/aarch64/matmul/acl_lowp_matmul_sq.cpp
@ -51,6 +51,7 @@ status_t acl_lowp_matmul_sq_t::pd_t::init(engine_t *engine) {
    VDISPATCH_MATMUL(attr()->has_default_values(smask_t::scales
                             | smask_t::zero_points | smask_t::post_ops),
            "only scale, zero point and post-ops attrs supported");
+    VDISPATCH_MATMUL(is_dense_format_kind(), VERBOSE_UNSUPPORTED_SPARSE_CFG);

    static const std::vector<int> supported_args {
            DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST};
--- a/src/cpu/aarch64/matmul/jit_int8_matmul.cpp
+++ b/src/cpu/aarch64/matmul/jit_int8_matmul.cpp
@ -636,6 +636,7 @@ status_t jit_int8_matmul_t::pd_t::init(engine_t *engine) {

    VDISPATCH_MATMUL(
            no_runtime_dims_or_strides, VERBOSE_RUNTIMEDIM_UNSUPPORTED);
+    VDISPATCH_MATMUL(is_dense_format_kind(), VERBOSE_UNSUPPORTED_SPARSE_CFG);

    bool is_s8_wei = utils::everyone_is(s8, wei_type);
    bool is_u8 = utils::everyone_is(u8, src_type, wei_type);
--- a/src/cpu/aarch64/shuffle/jit_uni_shuffle.cpp
+++ b/src/cpu/aarch64/shuffle/jit_uni_shuffle.cpp
@ -40,6 +40,10 @@ status_t jit_uni_shuffle_t<isa>::pd_t::init(engine_t *engine) {
    const memory_desc_wrapper src_d(is_fwd() ? src_md() : diff_src_md());
    const memory_desc_wrapper dst_d(is_fwd() ? dst_md() : diff_dst_md());

+    if (!impl::is_dense_format_kind({is_fwd() ? src_md() : diff_src_md(),
+                is_fwd() ? dst_md() : diff_dst_md()}))
+        return status::unimplemented;
+
    conf_.data_type = src_d.data_type();

    const bool ok = is_superset(get_max_cpu_isa(), isa)
--- a/src/cpu/matmul/cpu_matmul_list.cpp
+++ b/src/cpu/matmul/cpu_matmul_list.cpp
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 * Copyright 2024-2025 FUJITSU LIMITED
 * Copyright 2021-2025 Arm Ltd. and affiliates
 *
@ -51,30 +51,8 @@ namespace {
 using namespace dnnl::impl::data_type;
 using namespace dnnl::impl::cpu::matmul;

-// Some compilers do not allow guarding implementations with macros
-// in the impl list.
-#ifdef DNNL_EXPERIMENTAL_SPARSE
-
-#define CPU_INSTANCE_SPARSE(...) \
-    impl_list_item_t( \
-            impl_list_item_t::type_deduction_helper_t<__VA_ARGS__::pd_t>()),
-
-#if DNNL_X64
-#define CPU_INSTANCE_SPARSE_X64(...) \
-    impl_list_item_t( \
-            impl_list_item_t::type_deduction_helper_t<__VA_ARGS__::pd_t>()),
-#else
-#define CPU_INSTANCE_SPARSE_X64(...)
-#endif
-
-#else
-#define CPU_INSTANCE_SPARSE(...)
-#define CPU_INSTANCE_SPARSE_X64(...)
-#endif
-
 // clang-format off
 constexpr impl_list_item_t impl_list[] = REG_MATMUL_P({
-
        CPU_INSTANCE_AARCH64(brgemm_matmul_t<sve_512>)
        CPU_INSTANCE_AARCH64_ACL(acl_lowp_matmul_sq_t)
        CPU_INSTANCE_AARCH64_ACL(acl_lowp_matmul_t)
@ -96,19 +74,14 @@ constexpr impl_list_item_t impl_list[] = REG_MATMUL_P({
        CPU_INSTANCE_AVX2(brgemm_matmul_t<avx2>)
        CPU_INSTANCE(ref_matmul_t)
        CPU_INSTANCE(ref_matmul_int8_t)
-        // These implementations are enabled only when DNNL_EXPERIMENTAL_SPARSE
-        // macro is defined.
-        CPU_INSTANCE_SPARSE_X64(jit_uni_sparse_matmul_t)
-        CPU_INSTANCE_SPARSE(ref_sparse_matmul_t)
+        CPU_INSTANCE_X64(jit_uni_sparse_matmul_t)
+        CPU_INSTANCE(ref_sparse_matmul_t)
        /* eol */
        nullptr,
 });
 // clang-format on
 } // namespace

-#undef CPU_INSTANCE_SPARSE
-#undef CPU_INSTANCE_SPARSE_X64
-
 const impl_list_item_t *get_matmul_impl_list(const matmul_desc_t *desc) {
    UNUSED(desc);
    return impl_list;
--- a/src/cpu/reorder/cpu_reorder.hpp
+++ b/src/cpu/reorder/cpu_reorder.hpp
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 * Copyright 2023 Arm Ltd. and affiliates
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
@ -99,24 +99,6 @@ extern const impl_list_map_t &comp_s8_s8_impl_list_map();

 // clang-format off

-// Some compilers do not allow guarding implementations with macros
-// in the impl list.
-#ifdef DNNL_EXPERIMENTAL_SPARSE
-
-#if DNNL_X64
-#define REG_SPARSE_SR_X64(idt, ifmt, odt, ofmt) \
-    impl_list_item_t(impl_list_item_t::reorder_type_deduction_helper_t< \
-            simple_sparse_reorder_t<idt, \
-                    std::remove_const<decltype(ifmt)>::type, ifmt, odt, \
-                    std::remove_const<decltype(ofmt)>::type, ofmt>::pd_t>()),
-#else
-#define REG_SPARSE_SR_X64(...)
-#endif
-
-#else
-#define REG_SPARSE_SR_X64(...)
-#endif
-
 #define REG_SR(idt, ifmt, odt, ofmt, ...) \
    impl_list_item_t(impl_list_item_t::reorder_type_deduction_helper_t< \
            simple_reorder_t<idt, ifmt, odt, ofmt, __VA_ARGS__>::pd_t>()),
--- a/src/cpu/reorder/cpu_reorder_regular_f32_s8.cpp
+++ b/src/cpu/reorder/cpu_reorder_regular_f32_s8.cpp
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 * Copyright 2022 FUJITSU LIMITED
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
@ -46,7 +46,7 @@ const impl_list_map_t &regular_f32_s8_impl_list_map() {

            REG_SR(f32, any, s8, any, fmt_order::any, spec::reference)

-            REG_SPARSE_SR_X64(f32, any, s8, any)
+            DNNL_X64_ONLY(CPU_REORDER_INSTANCE(simple_sparse_reorder_t<f32, impl::format_tag_t, any, s8, impl::format_tag_t, any>))

            nullptr,
        }},
--- a/src/cpu/reorder/cpu_reorder_regular_s8.cpp
+++ b/src/cpu/reorder/cpu_reorder_regular_s8.cpp
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 * Copyright 2022 FUJITSU LIMITED
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
@ -66,7 +66,7 @@ const impl_list_map_t &regular_s8_impl_list_map() {
            REG_SR(s8, any, s8, any, fmt_order::any, spec::reference)
            REG_SR(s8, any, u8, any, fmt_order::any, spec::reference)

-            REG_SPARSE_SR_X64(s8, any, s8, any)
+            DNNL_X64_ONLY(CPU_REORDER_INSTANCE(simple_sparse_reorder_t<s8, impl::format_tag_t, any, s8, impl::format_tag_t, any>))

            nullptr,
        }},
--- a/src/cpu/simple_layer_normalization.cpp
+++ b/src/cpu/simple_layer_normalization.cpp
@ -63,6 +63,8 @@ status_t simple_layer_normalization_fwd_t::pd_t::init(engine_t *engine) {
    // plain format, last logical dim is last physical
    VDISPATCH_LNORM(src_d.blocking_desc().strides[ndims() - 1] == 1,
            VERBOSE_BLOCKING_FAIL, "bad stride value");
+    VDISPATCH_LNORM(impl::is_dense_format_kind({src_md(), dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    CHECK(fill_compatible_stats_md(*src_md(), reordered_stat_md_));

@ -264,6 +266,9 @@ status_t simple_layer_normalization_bwd_t::pd_t::init(engine_t *engine) {
    // plain format, last logical dim is last physical
    VDISPATCH_LNORM(src_d.blocking_desc().strides[ndims() - 1] == 1,
            VERBOSE_BLOCKING_FAIL, "bad stride value");
+    VDISPATCH_LNORM(impl::is_dense_format_kind(
+                            {src_md(), diff_src_md(), dst_md(), diff_dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    CHECK(fill_compatible_stats_md(*src_md(), reordered_stat_md_));

--- a/src/cpu/x64/jit_brdgmm_dw_conv.cpp
+++ b/src/cpu/x64/jit_brdgmm_dw_conv.cpp
@ -164,6 +164,10 @@ status_t brdgmm_dw_convolution_fwd_t::pd_t::init(engine_t *engine) {
    const memory_desc_wrapper dst_d(&dst_md_);
    const memory_desc_wrapper bias_d(&bias_md_);

+    VDISPATCH_CONV(
+            impl::is_dense_format_kind({src_md(), weights_md(), dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);
+
    // Big int (> INT_MAX) values are unsupported and jcp fields may overflow
    // TODO: change data type of jcp fields to size_t
    VDISPATCH_CONV_IC(!has_large_size(cd, src_d, weights_d, dst_d),
--- a/src/cpu/x64/jit_brgemm_1x1_conv.cpp
+++ b/src/cpu/x64/jit_brgemm_1x1_conv.cpp
@ -52,6 +52,10 @@ status_t brgemm_1x1_convolution_fwd_t<isa>::pd_t::init(engine_t *engine) {
    const auto dst_type = dst_md(0)->data_type;
    const bool is_int8 = one_of(src_type, u8, s8);

+    VDISPATCH_CONV(
+            impl::is_dense_format_kind({src_md(), weights_md(), dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);
+
    using skip_mask_t = primitive_attr_t::skip_mask_t;
    auto skip_mask = skip_mask_t::post_ops | skip_mask_t::sum_dt
            | skip_mask_t::zero_points | skip_mask_t::fpmath_mode;
--- a/src/cpu/x64/jit_brgemm_conv.cpp
+++ b/src/cpu/x64/jit_brgemm_conv.cpp
@ -384,6 +384,9 @@ status_t brgemm_convolution_fwd_t<isa>::pd_t::init(engine_t *engine) {
            VERBOSE_UNSUPPORTED_POSTOP);
    VDISPATCH_CONV(zero_points_ok(), VERBOSE_UNSUPPORTED_ZP_CFG);
    VDISPATCH_CONV(arg_scales_ok(), VERBOSE_UNSUPPORTED_SCALES_CFG);
+    VDISPATCH_CONV(
+            impl::is_dense_format_kind({src_md(0), weights_md(0), dst_md(0)}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    CHECK(brgemm_convolution_utils::init_conf(jcp_, isa, *desc(), src_md_,
            weights_md_, dst_md_, bias_md_, attr_, dnnl_get_max_threads()));
--- a/src/cpu/x64/jit_brgemm_conv_bwd.cpp
+++ b/src/cpu/x64/jit_brgemm_conv_bwd.cpp
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2022-2024 Intel Corporation
+* Copyright 2022-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -110,6 +110,9 @@ status_t brgemm_convolution_bwd_t<isa>::pd_t::init(engine_t *engine) {
            VERBOSE_BAD_ALGORITHM);
    VDISPATCH_CONV(!has_zero_dim_memory(), VERBOSE_EMPTY_TENSOR, "");
    VDISPATCH_CONV(attr()->has_default_values(), VERBOSE_UNSUPPORTED_ATTR);
+    VDISPATCH_CONV(impl::is_dense_format_kind({src_md(0), diff_weights_md(0),
+                           diff_weights_md(1), diff_dst_md(0), dst_md(0)}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    convolution_desc_t fwd_conv_d = convolution_desc_t();
    CHECK(fwd_conv_desc_create(&fwd_conv_d, desc()));
--- a/src/cpu/x64/jit_brgemm_conv_bwd_strided.cpp
+++ b/src/cpu/x64/jit_brgemm_conv_bwd_strided.cpp
@ -114,6 +114,9 @@ status_t brgemm_convolution_bwd_strided_t<isa>::pd_t::init(engine_t *engine) {
            && everyone_is(f32, diff_src_type, diff_dst_type)
            && IMPLICATION(with_bias(), bias_md_.data_type == f32);

+    VDISPATCH_CONV(
+            impl::is_dense_format_kind({src_md(), weights_md(), dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);
    VDISPATCH_CONV(is_bwd_d(), VERBOSE_BAD_PROPKIND);
    VDISPATCH_CONV(
            impl_supports_datatype(diff_src_type), VERBOSE_UNSUPPORTED_DT);
--- a/src/cpu/x64/jit_brgemm_conv_bwd_w.cpp
+++ b/src/cpu/x64/jit_brgemm_conv_bwd_w.cpp
@ -58,6 +58,9 @@ status_t brgemm_convolution_bwd_weights_t::pd_t::init(engine_t *engine) {
            VERBOSE_BAD_ALGORITHM);
    VDISPATCH_CONV(!has_zero_dim_memory(), VERBOSE_EMPTY_TENSOR, "");
    VDISPATCH_CONV(attr()->has_default_values(), VERBOSE_UNSUPPORTED_ATTR);
+    VDISPATCH_CONV(impl::is_dense_format_kind({src_md(0), diff_weights_md(0),
+                           diff_weights_md(1), diff_dst_md(0)}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    auto scratchpad = scratchpad_registry().registrar();

--- a/src/cpu/x64/jit_brgemm_deconv.cpp
+++ b/src/cpu/x64/jit_brgemm_deconv.cpp
@ -176,6 +176,10 @@ status_t brgemm_deconvolution_fwd_t<isa>::pd_t::init(engine_t *engine) {
    VDISPATCH_DECONVOLUTION(post_ops_ok(), VERBOSE_UNSUPPORTED_POSTOP);
    VDISPATCH_DECONVOLUTION(zero_points_ok(), VERBOSE_UNSUPPORTED_ZP_CFG);
    VDISPATCH_DECONVOLUTION(!has_zero_dim_memory(), VERBOSE_EMPTY_TENSOR, "");
+    VDISPATCH_DECONVOLUTION(
+            impl::is_dense_format_kind({src_md(0), diff_weights_md(0),
+                    diff_weights_md(1), diff_dst_md(0), dst_md(0)}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    convolution_desc_t conv_d = convolution_desc_t();

--- a/src/cpu/x64/jit_uni_batch_normalization.cpp
+++ b/src/cpu/x64/jit_uni_batch_normalization.cpp
@ -2382,6 +2382,8 @@ status_t jit_uni_batch_normalization_fwd_t<isa>::pd_t::init(engine_t *engine) {
    VDISPATCH_BNORM(
            memory_desc_wrapper(src_md()) == memory_desc_wrapper(dst_md()),
            VERBOSE_INCONSISTENT_MDS, "src", "dst");
+    VDISPATCH_BNORM(impl::is_dense_format_kind({src_md(), dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    // BN+Add+Relu fusion is not currently implemented
    VDISPATCH_BNORM(!fuse_norm_add_relu(), VERBOSE_UNSUPPORTED_FEATURE,
@ -2518,6 +2520,10 @@ status_t jit_uni_batch_normalization_bwd_t<isa>::pd_t::init(engine_t *engine) {
                    == memory_desc_wrapper(diff_dst_md()),
            VERBOSE_INCONSISTENT_MDS, "diff_src", "diff_dst");

+    VDISPATCH_BNORM(impl::is_dense_format_kind(
+                            {src_md(), diff_src_md(), dst_md(), diff_dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);
+
    // BN+Add+Relu fusion is not currently implemented
    VDISPATCH_BNORM(!(fuse_norm_add_relu()), VERBOSE_UNSUPPORTED_FEATURE,
            "sum+relu post-ops configuration is not supported");
--- a/src/cpu/x64/jit_uni_batch_normalization_s8.cpp
+++ b/src/cpu/x64/jit_uni_batch_normalization_s8.cpp
@ -719,6 +719,8 @@ status_t jit_uni_batch_normalization_s8_fwd_t<isa>::pd_t::init(
    VDISPATCH_BNORM(
            memory_desc_wrapper(src_md()) == memory_desc_wrapper(dst_md()),
            VERBOSE_INCONSISTENT_MDS, "src", "dst");
+    VDISPATCH_BNORM(impl::is_dense_format_kind({src_md(), dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    // BN+Add+Relu fusion is not currently implemented
    VDISPATCH_BNORM(!fuse_norm_add_relu(), VERBOSE_UNSUPPORTED_FEATURE,
--- a/src/cpu/x64/jit_uni_group_normalization.cpp
+++ b/src/cpu/x64/jit_uni_group_normalization.cpp
@ -819,6 +819,8 @@ status_t jit_uni_group_normalization_fwd_t::pd_t::init(engine_t *engine) {
    VDISPATCH_GNORM(
            memory_desc_matches_one_of_tag(*dst_md(), ndhwc, nhwc, nwc, nc),
            VERBOSE_UNSUPPORTED_TAG_S, "dst");
+    VDISPATCH_GNORM(impl::is_dense_format_kind({src_md(), dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    // Instance Normalization is handled in a different implementation. This
    // implementation has some turns in the kernel that is done differently
--- a/src/cpu/x64/jit_uni_instance_normalization.cpp
+++ b/src/cpu/x64/jit_uni_instance_normalization.cpp
@ -691,6 +691,8 @@ status_t jit_uni_instance_normalization_fwd_t::pd_t::init(engine_t *engine) {
    VDISPATCH_GNORM(
            memory_desc_matches_one_of_tag(*dst_md(), ndhwc, nhwc, nwc, nc),
            VERBOSE_UNSUPPORTED_TAG_S, "dst");
+    VDISPATCH_GNORM(impl::is_dense_format_kind({src_md(), dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    auto post_ops_ok = [&]() -> bool {
        const std::vector<injector::post_op_type> accepted_post_ops
--- a/src/cpu/x64/jit_uni_layer_normalization.cpp
+++ b/src/cpu/x64/jit_uni_layer_normalization.cpp
@ -1232,6 +1232,8 @@ status_t jit_uni_layer_normalization_fwd_t::pd_t::init(engine_t *engine) {
    // plain format, last logical dim is last physical
    VDISPATCH_LNORM(src_d.blocking_desc().strides[ndims() - 1] == 1,
            VERBOSE_BLOCKING_FAIL, "bad stride value");
+    VDISPATCH_LNORM(impl::is_dense_format_kind({src_md(), dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    auto post_ops_ok = [&]() -> bool {
        const std::vector<injector::post_op_type> accepted_post_ops
--- a/src/cpu/x64/jit_uni_ncsp_convolution.cpp
+++ b/src/cpu/x64/jit_uni_ncsp_convolution.cpp
@ -232,6 +232,9 @@ status_t jit_uni_ncsp_convolution_fwd_t::pd_t::init(engine_t *engine) {
    VDISPATCH_CONV(IMPLICATION(with_bias(), weights_md(1)->data_type == f32),
            VERBOSE_UNSUPPORTED_DT);
    VDISPATCH_CONV(mayiuse(avx512_core), VERBOSE_UNSUPPORTED_ISA);
+    VDISPATCH_CONV(
+            impl::is_dense_format_kind({src_md(), weights_md(), dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    reduction_helper_ = reduction_helper_t(this);
    // TODO: Support attributes in matmul-based convolution.
@ -400,6 +403,9 @@ status_t jit_uni_ncsp_convolution_bwd_weights_t::pd_t::init(engine_t *engine) {
                                : data_type::f32),
            VERBOSE_UNSUPPORTED_DT);
    VDISPATCH_CONV(mayiuse(avx512_core), VERBOSE_UNSUPPORTED_ISA);
+    VDISPATCH_CONV(impl::is_dense_format_kind(
+                           {src_md(), diff_src_md(), dst_md(), diff_dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    CHECK(init_convolution(engine));
    init_name();
@ -552,6 +558,10 @@ status_t jit_uni_ncsp_convolution_bwd_data_t::pd_t::init(engine_t *engine) {
                           diff_dst_md()->data_type, weights_md(0)->data_type),
            VERBOSE_UNSUPPORTED_DT);
    VDISPATCH_CONV(mayiuse(avx512_core), VERBOSE_UNSUPPORTED_ISA);
+    VDISPATCH_CONV(
+            impl::is_dense_format_kind({src_md(), diff_src_md(), weights_md(0),
+                    weights_md(1), dst_md(), diff_dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    if (one_of(data_type::bf16, diff_dst_md_.data_type, weights_md_.data_type)
            && !mayiuse(avx512_core_bf16))
--- a/src/cpu/x64/jit_uni_reduction.cpp
+++ b/src/cpu/x64/jit_uni_reduction.cpp
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2021-2024 Intel Corporation
+* Copyright 2021-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -75,6 +75,8 @@ status_t jit_uni_reduction_t::pd_t::init(engine_t *engine) {
            attr()->has_default_values(sm::post_ops), VERBOSE_UNSUPPORTED_ATTR);
    VDISPATCH_REDUCTION(attr_.set_default_formats(dst_md(0)) == status::success,
            VERBOSE_UNSUPPORTED_POSTOP);
+    VDISPATCH_REDUCTION(impl::is_dense_format_kind({src_md(), dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    const auto src_mdw = memory_desc_wrapper(src_md());
    const auto dst_mdw = memory_desc_wrapper(dst_md());
--- a/src/cpu/x64/jit_uni_resampling.cpp
+++ b/src/cpu/x64/jit_uni_resampling.cpp
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -95,6 +95,8 @@ status_t jit_uni_resampling_fwd_t::pd_t::init(engine_t *engine) {
            VERBOSE_UNSUPPORTED_POSTOP);
    VDISPATCH_RESAMPLING(memory_desc_matches_tag(*dst_md(), conf_.src_tag),
            VERBOSE_UNSUPPORTED_TAG_S, "dst");
+    VDISPATCH_RESAMPLING(impl::is_dense_format_kind({src_md(), dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    conf_.alg = desc()->alg_kind;
    conf_.c = C();
--- a/src/cpu/x64/jit_uni_tbb_batch_normalization.cpp
+++ b/src/cpu/x64/jit_uni_tbb_batch_normalization.cpp
@ -2478,6 +2478,8 @@ status_t jit_uni_tbb_batch_normalization_fwd_t<isa>::pd_t::init(
    VDISPATCH_BNORM(
            memory_desc_wrapper(src_md()) == memory_desc_wrapper(dst_md()),
            VERBOSE_INCONSISTENT_MDS, "src", "dst");
+    VDISPATCH_BNORM(impl::is_dense_format_kind({src_md(), dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    // BN+Add+Relu fusion is not currently implemented
    VDISPATCH_BNORM(!(fuse_norm_add_relu()), VERBOSE_UNSUPPORTED_FEATURE,
@ -2609,6 +2611,9 @@ status_t jit_uni_tbb_batch_normalization_bwd_t<isa>::pd_t::init(
    VDISPATCH_BNORM(memory_desc_wrapper(diff_src_md())
                    == memory_desc_wrapper(diff_dst_md()),
            VERBOSE_INCONSISTENT_MDS, "diff_src", "diff_dst");
+    VDISPATCH_BNORM(impl::is_dense_format_kind(
+                            {src_md(), diff_src_md(), dst_md(), diff_dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    // BN+Add+Relu fusion is not currently implemented
    VDISPATCH_BNORM(!(fuse_norm_add_relu()), VERBOSE_UNSUPPORTED_FEATURE,
--- a/src/cpu/x64/jit_uni_x8s8s32x_deconvolution.cpp
+++ b/src/cpu/x64/jit_uni_x8s8s32x_deconvolution.cpp
@ -1418,6 +1418,9 @@ status_t jit_uni_x8s8s32x_deconvolution_fwd_t<isa>::pd_t::init(
                    | skip_mask_t::post_ops | skip_mask_t::zero_points),
            VERBOSE_UNSUPPORTED_ATTR);
    VDISPATCH_DECONVOLUTION(attr_scales_ok(), VERBOSE_UNSUPPORTED_SCALES_CFG);
+    VDISPATCH_DECONVOLUTION(impl::is_dense_format_kind({src_md(0),
+                                    weights_md(0), weights_md(1), dst_md(0)}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    CHECK(jit_uni_x8s8s32x_deconv_fwd_kernel<isa>::init_conf(jcp_, *desc(),
            src_md_, weights_md_, dst_md_, with_bias(), bias_md_, attr_,
--- a/src/cpu/x64/lrn/jit_avx512_common_lrn.cpp
+++ b/src/cpu/x64/lrn/jit_avx512_common_lrn.cpp
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2017-2024 Intel Corporation
+* Copyright 2017-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -55,6 +55,8 @@ status_t jit_avx512_common_lrn_fwd_t<d_type>::pd_t::init(engine_t *engine) {
    VDISPATCH_LRN(attr()->has_default_values(), VERBOSE_UNSUPPORTED_ATTR);
    VDISPATCH_LRN(set_default_formats_common(), VERBOSE_UNSUPPORTED_TAG);
    VDISPATCH_LRN(src_d == dst_d, VERBOSE_INCONSISTENT_MDS, "src", "dst");
+    VDISPATCH_LRN(impl::is_dense_format_kind({src_md(), dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    const auto fmt_tag
            = src_d.matches_one_of_tag(format_tag::nhwc, format_tag::nChw16c);
@ -120,6 +122,9 @@ status_t jit_avx512_common_lrn_bwd_t<d_type>::pd_t::init(engine_t *engine) {
            src_d == diff_dst_d, VERBOSE_INCONSISTENT_MDS, "src", "diff_dst");
    VDISPATCH_LRN(diff_dst_d == diff_src_d, VERBOSE_INCONSISTENT_MDS,
            "diff_src", "diff_dst");
+    VDISPATCH_LRN(impl::is_dense_format_kind(
+                          {src_md(), diff_src_md(), diff_dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    const dims_t ws_dims = {MB(), C(), H(), 2 * W()};
    const auto fmt_tag
--- a/src/cpu/x64/lrn/jit_uni_lrn.cpp
+++ b/src/cpu/x64/lrn/jit_uni_lrn.cpp
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2016-2024 Intel Corporation
+* Copyright 2016-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -191,6 +191,8 @@ status_t jit_uni_lrn_fwd_t<isa, d_type>::pd_t::init(engine_t *engine) {
                    && src_d.dims()[1] >= 2 * VECTOR_LENGTH,
            "src has inconsistent dimensions with vector length");
    VDISPATCH_LRN(desc()->lrn_beta == 0.75, VERBOSE_BAD_PARAM, "lrn_beta");
+    VDISPATCH_LRN(impl::is_dense_format_kind({src_md(), dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    dat_tag_ = memory_desc_matches_one_of_tag(
            *src_md(), nChw16c, nChw8c, nchw, nhwc);
@ -375,6 +377,9 @@ status_t jit_uni_lrn_bwd_t<isa, d_type>::pd_t::init(engine_t *engine) {
                          && src_d.dims()[1] >= 2 * VECTOR_LENGTH),
            "src has inconsistent dimensions with vector length");
    VDISPATCH_LRN(desc()->lrn_beta == 0.75, VERBOSE_BAD_PARAM, "lrn_beta");
+    VDISPATCH_LRN(impl::is_dense_format_kind(
+                          {src_md(), diff_src_md(), diff_dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    dat_tag_ = memory_desc_matches_one_of_tag(
            *src_md(), nChw16c, nChw8c, nchw, nhwc);
--- a/src/cpu/x64/shuffle/jit_uni_shuffle.cpp
+++ b/src/cpu/x64/shuffle/jit_uni_shuffle.cpp
@ -65,6 +65,10 @@ status_t jit_uni_shuffle_t<isa>::pd_t::init(engine_t *engine) {
    VDISPATCH_SHUFFLE(axis() == 1, VERBOSE_BAD_AXIS);
    VDISPATCH_SHUFFLE(set_default_formats_common(), VERBOSE_UNSUPPORTED_TAG);
    VDISPATCH_SHUFFLE(src_d == dst_d, VERBOSE_INCONSISTENT_MDS, "src", "dst");
+    VDISPATCH_SHUFFLE(
+            impl::is_dense_format_kind({is_fwd() ? src_md() : diff_src_md(),
+                    is_fwd() ? dst_md() : diff_dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    conf_.isa = isa;
    if (isa == avx) conf_.isa = mayiuse(avx2) ? avx2 : avx;
--- a/src/gpu/generic/cross_engine_reorder.cpp
+++ b/src/gpu/generic/cross_engine_reorder.cpp
@ -49,6 +49,8 @@ status_t cross_engine_reorder_t::pd_t::init(impl::engine_t *engine,
            VERBOSE_BAD_ENGINE_KIND);
    VDISPATCH_REORDER(attr_ok(), VERBOSE_UNSUPPORTED_ATTR);
    VDISPATCH_REORDER(extra_ok(true), VERBOSE_UNSUPPORTED_MD_FLAG, "extra_ok");
+    VDISPATCH_REORDER(impl::is_dense_format_kind({src_md(), dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    memory_desc_wrapper src_mdw(src_md());
    memory_desc_wrapper dst_mdw(dst_md());
--- a/src/gpu/generic/sycl/ref_group_normalization.cpp
+++ b/src/gpu/generic/sycl/ref_group_normalization.cpp
@ -48,6 +48,8 @@ status_t ref_group_normalization_fwd_t::pd_t::init(impl::engine_t *engine) {
            "scale / shift data type must be data_type::f32");
    VDISPATCH_GNORM(
            sycl_post_ops_t::post_ops_ok(attr()), VERBOSE_UNSUPPORTED_POSTOP);
+    VDISPATCH_GNORM(impl::is_dense_format_kind({src_md(), dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    const auto &dims = src_mdw.dims();
    const auto num_groups = desc()->groups;
@ -126,6 +128,9 @@ status_t ref_group_normalization_bwd_t::pd_t::init(impl::engine_t *engine) {
            VERBOSE_UNSUPPORTED_DT);
    VDISPATCH_GNORM(utils::one_of(diff_dst_mdw.data_type(), f32, bf16, f16),
            VERBOSE_UNSUPPORTED_DT);
+    VDISPATCH_GNORM(impl::is_dense_format_kind(
+                            {src_md(), diff_src_md(), diff_dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    auto device = utils::downcast<const impl::xpu::sycl::engine_impl_t *>(
            engine->impl())
--- a/src/gpu/generic/sycl/ref_inner_product.cpp
+++ b/src/gpu/generic/sycl/ref_inner_product.cpp
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2024 Intel Corporation
+* Copyright 2024-2025 Intel Corporation
 * Copyright 2024 Codeplay Software Limited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
@ -190,6 +190,9 @@ status_t ref_inner_product_fwd_t::pd_t::init(impl::engine_t *engine) {
            "weight memory descriptor is not a plain memory format");
    VDISPATCH_INNER_PRODUCT(dst_wrapper.is_plain(),
            "destination memory descriptor is not a plain memory format");
+    VDISPATCH_INNER_PRODUCT(
+            impl::is_dense_format_kind({src_md(), weights_md(), dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    // if anything contains a zero dimension, return success as this will be converted
    // to a no-op
@ -340,6 +343,9 @@ status_t ref_inner_product_bwd_data_t::pd_t::init(impl::engine_t *engine) {
            src_wrapper.is_plain(), "Blocked memory format is not supported");
    VDISPATCH_INNER_PRODUCT(
            dst_wrapper.is_plain(), "Blocked memory format is not supported");
+    VDISPATCH_INNER_PRODUCT(impl::is_dense_format_kind({diff_src_md(),
+                                    weights_md(), diff_dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    if (src_wrapper.has_zero_dim() || wei_wrapper.has_zero_dim()
            || dst_wrapper.has_zero_dim()) {
@ -472,6 +478,9 @@ status_t ref_inner_product_bwd_weights_t::pd_t::init(impl::engine_t *engine) {
            wei_wrapper.is_plain(), "blocked memory format is not supported");
    VDISPATCH_INNER_PRODUCT(
            dst_wrapper.is_plain(), "blocked memory format is not supported");
+    VDISPATCH_INNER_PRODUCT(
+            impl::is_dense_format_kind({src_md(), weights_md(), dst_md()}),
+            VERBOSE_UNSUPPORTED_SPARSE_CFG);

    format_tag_t wei_format_tag = format_tag::ab;
    format_tag_t dst_format_tag = format_tag::ab;
--- a/src/xpu/ocl/capi/memory.cpp
+++ b/src/xpu/ocl/capi/memory.cpp
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -29,7 +29,6 @@
 using namespace dnnl::impl;
 using namespace dnnl::impl::xpu::ocl;

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 status_t dnnl_ocl_interop_memory_create_v2(memory_t **memory,
        const memory_desc_t *md, engine_t *engine, memory_kind_t memory_kind,
        int nhandles, void **handles) {
@ -83,7 +82,6 @@ status_t dnnl_ocl_interop_memory_create_v2(memory_t **memory,
    return safe_ptr_assign(
            *memory, new memory_t(engine, md, std::move(mem_storages)));
 }
-#endif

 status_t dnnl_ocl_interop_memory_create(memory_t **memory,
        const memory_desc_t *md, engine_t *engine, memory_kind_t memory_kind,
--- a/src/xpu/sycl/capi/capi_memory.cpp
+++ b/src/xpu/sycl/capi/capi_memory.cpp
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2020-2024 Intel Corporation
+* Copyright 2020-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -35,7 +35,6 @@ using dnnl::impl::status_t;
 using ::sycl::context;
 using ::sycl::get_pointer_type;

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 status_t dnnl_sycl_interop_memory_create_v2(memory_t **memory,
        const memory_desc_t *md, engine_t *engine, memory_kind_t memory_kind,
        int nhandles, void **handles) {
@ -94,7 +93,6 @@ status_t dnnl_sycl_interop_memory_create_v2(memory_t **memory,
    return safe_ptr_assign(
            *memory, new memory_t(engine, md, std::move(mem_storages)));
 }
-#endif

 status_t dnnl_sycl_interop_memory_create(memory_t **memory,
        const memory_desc_t *md, engine_t *engine, memory_kind_t memory_kind,
--- a/tests/benchdnn/CMakeLists.txt
+++ b/tests/benchdnn/CMakeLists.txt
@ -170,10 +170,6 @@ foreach(driver ${all_drivers})
            register_all_tests(cpu "${driver}" "${test_files_smoke}")
        endif()
    elseif(DNNL_TEST_SET_COVERAGE EQUAL DNNL_TEST_SET_CI)
-        if(NOT DNNL_EXPERIMENTAL_SPARSE)
-            list(REMOVE_ITEM test_files_ci "test_matmul_sparse_ci")
-        endif()
-
        # gpu_ci files may happen if cpu coverage can not be used on gpu
        # Filter out gpu_ci inputs from ci
        foreach(test_file ${test_files_gpu_ci})
@ -192,11 +188,6 @@ foreach(driver ${all_drivers})
            register_all_tests(cpu "${driver}" "${test_files_ci}")
        endif()
    elseif(DNNL_TEST_SET_COVERAGE EQUAL DNNL_TEST_SET_NIGHTLY)
-        if(NOT DNNL_EXPERIMENTAL_SPARSE)
-            list(REMOVE_ITEM test_files_cpu "test_matmul_sparse")
-            list(REMOVE_ITEM test_files_gpu "test_matmul_sparse_gpu")
-        endif()
-
        ## Filter out gpu, large cpu and invalid inputs from cpu
        foreach(test_file ${test_files_large_cpu} ${test_files_gpu_ci}
            ${test_files_gpu} ${test_files_ci} ${test_files_smoke})
--- a/tests/benchdnn/dnn_types.cpp
+++ b/tests/benchdnn/dnn_types.cpp
@ -833,7 +833,6 @@ std::ostream &operator<<(std::ostream &s, const attr_t &attr) {
    return s;
 }

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 std::ostream &operator<<(std::ostream &s, dnnl_sparse_encoding_t se) {
    s << sparse_encoding2str(se);
    return s;
@ -861,7 +860,6 @@ std::ostream &operator<<(
    }
    return s;
 }
-#endif

 std::ostream &operator<<(std::ostream &s, memory_kind_ext_t memory_kind) {
    switch (memory_kind) {
@ -1714,7 +1712,6 @@ void update_cpu_ref_attrs(attr_t &attr, dnnl_data_type_t dst_dt) {
    }
 }

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 int sparse_options_t::from_str(const std::string &s) {
    *this = sparse_options_t();
    if (s.empty()) return OK;
@ -1759,4 +1756,3 @@ int sparse_options_t::from_str(const std::string &s) {
    static const int expected_num_options = 3;
    return options_count == expected_num_options ? OK : FAIL;
 }
-#endif
--- a/tests/benchdnn/dnn_types.hpp
+++ b/tests/benchdnn/dnn_types.hpp
@ -482,7 +482,6 @@ struct isa_hints_t {

 using policy_t = attr_t::policy_t;

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 struct sparse_options_t {
    static constexpr dnnl_sparse_encoding_t def_encoding
            = dnnl_sparse_encoding_undef;
@ -552,8 +551,6 @@ private:

 std::ostream &operator<<(
        std::ostream &s, const sparse_options_t &sparse_options);
-#endif
-
 std::ostream &operator<<(std::ostream &s, const policy_t &policy);
 std::ostream &operator<<(
        std::ostream &s, const attr_t::zero_points_t &zero_points);
--- a/tests/benchdnn/dnnl_debug.hpp
+++ b/tests/benchdnn/dnnl_debug.hpp
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2017-2024 Intel Corporation
+* Copyright 2017-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -25,9 +25,7 @@
 #include "oneapi/dnnl/dnnl.h"

 dnnl_data_type_t str2dt(const char *str);
-#ifdef DNNL_EXPERIMENTAL_SPARSE
 dnnl_sparse_encoding_t str2sparse_encoding(const char *str);
-#endif
 dnnl_format_tag_t str2fmt_tag(const char *str);

 /* status */
@ -40,9 +38,7 @@ const char *dt2str(dnnl_data_type_t dt);
 const char *fmt_tag2str(dnnl_format_tag_t tag);

 /* encoding */
-#ifdef DNNL_EXPERIMENTAL_SPARSE
 const char *sparse_encoding2str(dnnl_sparse_encoding_t encoding);
-#endif

 /* engine kind */
 const char *engine_kind2str(dnnl_engine_kind_t kind);
--- a/tests/benchdnn/dnnl_debug_autogenerated.cpp
+++ b/tests/benchdnn/dnnl_debug_autogenerated.cpp
@ -60,7 +60,6 @@ dnnl_data_type_t str2dt(const char *str) {
    return dnnl_data_type_undef;
 }

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 dnnl_sparse_encoding_t str2sparse_encoding(const char *str) {
 #define CASE(_case) do { \
    if (!strcmp(STRINGIFY(_case), str) \
@ -78,7 +77,6 @@ dnnl_sparse_encoding_t str2sparse_encoding(const char *str) {
    return dnnl_sparse_encoding_undef;
 }

-#endif
 dnnl_format_tag_t str2fmt_tag(const char *str) {
 #define CASE(_case) do { \
    if (!strcmp(STRINGIFY(_case), str) \
@ -1731,11 +1729,9 @@ const char *fmt_tag2str(dnnl_format_tag_t tag) {
    return dnnl_fmt_tag2str(tag);
 }

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 const char *sparse_encoding2str(dnnl_sparse_encoding_t encoding) {
    return dnnl_sparse_encoding2str(encoding);
 }
-#endif

 const char *engine_kind2str(dnnl_engine_kind_t kind) {
    return dnnl_engine_kind2str(kind);
--- a/tests/benchdnn/dnnl_memory.cpp
+++ b/tests/benchdnn/dnnl_memory.cpp
@ -409,41 +409,25 @@ static int init_memory(

    const int nhandles = query_md_num_handles(md);
    std::vector<void *> handles(nhandles);
-#ifdef DNNL_EXPERIMENTAL_SPARSE
    for (int i = 0; i < nhandles; i++)
        DNN_SAFE(dnnl_memory_get_data_handle_v2(mem, &handles[i], i), CRIT);
-#else
-    DNN_SAFE(dnnl_memory_get_data_handle(mem, &handles[0]), CRIT);
-#endif

    if (is_opencl) {
 #if DNNL_GPU_RUNTIME == DNNL_RUNTIME_OCL
        dnnl_ocl_interop_memory_kind_t mem_kind;
        DNN_SAFE(dnnl_ocl_interop_memory_get_memory_kind(mem, &mem_kind), CRIT);
-#ifdef DNNL_EXPERIMENTAL_SPARSE
        DNN_SAFE(dnnl_ocl_interop_memory_create_v2(ret, md, engine, mem_kind,
                         (int)handles.size(), handles.data()),
                CRIT);
-#else
-        DNN_SAFE(dnnl_ocl_interop_memory_create(
-                         ret, md, engine, mem_kind, handles[0]),
-                CRIT);
-#endif
 #endif
    } else if (is_sycl) {
 #ifdef DNNL_WITH_SYCL
        dnnl_sycl_interop_memory_kind_t mem_kind;
        DNN_SAFE(
                dnnl_sycl_interop_memory_get_memory_kind(mem, &mem_kind), CRIT);
-#ifdef DNNL_EXPERIMENTAL_SPARSE
        DNN_SAFE(dnnl_sycl_interop_memory_create_v2(ret, md, engine, mem_kind,
                         (int)handles.size(), handles.data()),
                CRIT);
-#else
-        DNN_SAFE(dnnl_sycl_interop_memory_create(
-                         ret, md, engine, mem_kind, handles[0]),
-                CRIT);
-#endif
 #endif
    }

@ -463,19 +447,10 @@ void dnn_mem_t::map() const {
    const int nhandles = query_md_num_handles(md_);
    mapped_ptrs_.resize(nhandles);
    for (int i = 0; i < nhandles; i++) {
-#ifdef DNNL_EXPERIMENTAL_SPARSE
        auto st = dnnl_memory_map_data_v2(mem, &mapped_ptrs_[i], i);
-#else
-        auto st = dnnl_memory_map_data(mem, &mapped_ptrs_[i]);
-#endif
        if (st != dnnl_success) {
-            for (int j = 0; j < i; j++) {
-#ifdef DNNL_EXPERIMENTAL_SPARSE
+            for (int j = 0; j < i; j++)
                DNN_SAFE_V(dnnl_memory_unmap_data_v2(mem, mapped_ptrs_[i], i));
-#else
-                DNN_SAFE_V(dnnl_memory_unmap_data(mem, mapped_ptrs_[i]));
-#endif
-            }
            DNN_SAFE_V(st);
        }
    }
@ -489,11 +464,7 @@ void dnn_mem_t::unmap() const {
    auto mem = m_padded_ ? m_padded_ : m_;
    const int nhandles = query_md_num_handles(md_);
    for (int i = 0; i < nhandles; i++) {
-#ifdef DNNL_EXPERIMENTAL_SPARSE
        DNN_SAFE_V(dnnl_memory_unmap_data_v2(mem, mapped_ptrs_[i], i));
-#else
-        DNN_SAFE_V(dnnl_memory_unmap_data(mem, mapped_ptrs_[i]));
-#endif
        mapped_ptrs_[i] = nullptr;
    }
 }
@ -594,10 +565,8 @@ size_t dnn_mem_t::pad_memory_size(
 dnnl_memory_desc_t dnn_mem_t::pad_memory_desc(const_dnnl_memory_desc_t md,
        dnnl_engine_kind_t engine_kind, bool *was_padded) {
    if (was_padded) *was_padded = false;
-#ifdef DNNL_EXPERIMENTAL_SPARSE
    // TODO: add padded memory descriptor support for sparse memory.
    if (query_md_format_kind(md) == dnnl_format_kind_sparse) return nullptr;
-#endif
    size_t old_sz = dnnl_memory_desc_get_size(md);
    if (old_sz == 0 || !has_bench_mode_bit(mode_bit_t::corr)
            || engine_kind == dnnl_cpu)
@ -642,7 +611,6 @@ benchdnn_dnnl_wrapper_t<dnnl_memory_desc_t> dnn_mem_t::init_md(int ndims,
    return md;
 }

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 benchdnn_dnnl_wrapper_t<dnnl_memory_desc_t> dnn_mem_t::init_csr_md(int ndims,
        const dnnl_dims_t dims, dnnl_data_type_t data_type, dnnl_dim_t nnz,
        dnnl_data_type_t indices_dt, dnnl_data_type_t pointers_dt) {
@ -669,24 +637,17 @@ benchdnn_dnnl_wrapper_t<dnnl_memory_desc_t> dnn_mem_t::init_sparse_packed_md(
            &md, ndims, dims, data_type, nnz));
    return md;
 }
-#endif

 int dnn_mem_t::initialize_memory_create_sycl(const handle_info_t &handle_info) {
 #ifdef DNNL_WITH_SYCL
    if (handle_info.is_host_ptr) {
        // Ignore memory_kind with host pointers and force USM.
-#ifdef DNNL_EXPERIMENTAL_SPARSE
        const int nhandles = query_md_num_handles(md_);
        std::vector<void *> handles(nhandles, handle_info.ptr);
        DNN_SAFE(dnnl_sycl_interop_memory_create_v2(&m_, md_, engine_,
                         dnnl_sycl_interop_usm, (int)handles.size(),
                         handles.data()),
                CRIT);
-#else
-        DNN_SAFE(dnnl_sycl_interop_memory_create(&m_, md_, engine_,
-                         dnnl_sycl_interop_usm, handle_info.ptr),
-                CRIT);
-#endif
        return OK;
    }

@ -700,18 +661,12 @@ int dnn_mem_t::initialize_memory_create_sycl(const handle_info_t &handle_info) {
                    = (memory_kind == memory_kind_ext_t::usm
                                    ? dnnl_sycl_interop_usm
                                    : dnnl_sycl_interop_buffer);
-#ifdef DNNL_EXPERIMENTAL_SPARSE
            const int nhandles = query_md_num_handles(md_);
            std::vector<void *> handles(nhandles, handle_info.ptr);
            DNN_SAFE(dnnl_sycl_interop_memory_create_v2(&m_padded_, md_padded,
                             engine_, mem_kind, (int)handles.size(),
                             handles.data()),
                    CRIT);
-#else
-            DNN_SAFE(dnnl_sycl_interop_memory_create(&m_padded_, md_padded,
-                             engine_, mem_kind, handle_info.ptr),
-                    CRIT);
-#endif
            SAFE(init_memory(&m_, md_, m_padded_), CRIT);
            break;
        }
@ -726,11 +681,7 @@ int dnn_mem_t::initialize_memory_create_sycl(const handle_info_t &handle_info) {

            const int nhandles = query_md_num_handles(md_);
            for (int i = 0; i < nhandles; i++) {
-#ifdef DNNL_EXPERIMENTAL_SPARSE
                size_t sz = dnnl_memory_desc_get_size_v2(md_padded, i);
-#else
-                size_t sz = dnnl_memory_desc_get_size(md_padded);
-#endif
                if (memory_kind == memory_kind_ext_t::usm_device) {
                    data_.push_back(::sycl::malloc_device(sz, dev, ctx));
                } else {
@ -742,16 +693,10 @@ int dnn_mem_t::initialize_memory_create_sycl(const handle_info_t &handle_info) {
                    DNN_SAFE(dnnl_out_of_memory, CRIT);
                }
            }
-#ifdef DNNL_EXPERIMENTAL_SPARSE
            DNN_SAFE(dnnl_sycl_interop_memory_create_v2(&m_padded_, md_padded,
                             engine_, dnnl_sycl_interop_usm, (int)data_.size(),
                             data_.data()),
                    CRIT);
-#else
-            DNN_SAFE(dnnl_sycl_interop_memory_create(&m_padded_, md_padded,
-                             engine_, dnnl_sycl_interop_usm, data_[0]),
-                    CRIT);
-#endif
            SAFE(init_memory(&m_, md_, m_padded_), CRIT);
            break;
        }
@ -770,18 +715,12 @@ int dnn_mem_t::initialize_memory_create_opencl(
 #if DNNL_GPU_RUNTIME == DNNL_RUNTIME_OCL
    if (handle_info.is_host_ptr) {
        // Ignore memory_kind with host pointers and force USM.
-#ifdef DNNL_EXPERIMENTAL_SPARSE
        const int nhandles = query_md_num_handles(md_);
        std::vector<void *> handles(nhandles, handle_info.ptr);
        DNN_SAFE(dnnl_ocl_interop_memory_create_v2(&m_, md_, engine_,
                         dnnl_ocl_interop_usm, (int)handles.size(),
                         handles.data()),
                CRIT);
-#else
-        DNN_SAFE(dnnl_ocl_interop_memory_create(&m_, md_, engine_,
-                         dnnl_ocl_interop_usm, handle_info.ptr),
-                CRIT);
-#endif
        return OK;
    }

@ -797,18 +736,12 @@ int dnn_mem_t::initialize_memory_create_opencl(
                    = (memory_kind == memory_kind_ext_t::usm
                                    ? dnnl_ocl_interop_usm
                                    : dnnl_ocl_interop_buffer);
-#ifdef DNNL_EXPERIMENTAL_SPARSE
            const int nhandles = query_md_num_handles(md_);
            std::vector<void *> handles(nhandles, handle_info.ptr);
            DNN_SAFE(dnnl_ocl_interop_memory_create_v2(&m_padded_, md_padded,
                             engine_, mem_kind, (int)handles.size(),
                             handles.data()),
                    CRIT);
-#else
-            DNN_SAFE(dnnl_ocl_interop_memory_create(&m_padded_, md_padded,
-                             engine_, mem_kind, handle_info.ptr),
-                    CRIT);
-#endif
            SAFE(init_memory(&m_, md_, m_padded_), CRIT);
            break;
        }
@ -818,11 +751,7 @@ int dnn_mem_t::initialize_memory_create_opencl(

            const int nhandles = query_md_num_handles(md_);
            for (int i = 0; i < nhandles; i++) {
-#ifdef DNNL_EXPERIMENTAL_SPARSE
                size_t sz = dnnl_memory_desc_get_size_v2(md_padded, i);
-#else
-                size_t sz = dnnl_memory_desc_get_size(md_padded);
-#endif
                if (memory_kind == memory_kind_ext_t::usm_device) {
                    data_.push_back(dnnl::impl::xpu::ocl::usm::malloc_device(
                            engine_, sz));
@ -837,16 +766,10 @@ int dnn_mem_t::initialize_memory_create_opencl(
                    DNN_SAFE(dnnl_out_of_memory, CRIT);
                }
            }
-#ifdef DNNL_EXPERIMENTAL_SPARSE
            DNN_SAFE(dnnl_ocl_interop_memory_create_v2(&m_padded_, md_padded,
                             engine_, dnnl_ocl_interop_usm, (int)data_.size(),
                             data_.data()),
                    CRIT);
-#else
-            DNN_SAFE(dnnl_ocl_interop_memory_create(&m_padded_, md_padded,
-                             engine_, dnnl_ocl_interop_usm, data_[0]),
-                    CRIT);
-#endif
            SAFE(init_memory(&m_, md_, m_padded_), CRIT);
            break;
        }
@ -876,11 +799,7 @@ int dnn_mem_t::initialize_memory_create(const handle_info_t &handle_info) {

        const int nhandles = query_md_num_handles(md_);
        for (int i = 0; i < nhandles; i++) {
-#ifdef DNNL_EXPERIMENTAL_SPARSE
            size_t sz = dnnl_memory_desc_get_size_v2(md_, i);
-#else
-            size_t sz = dnnl_memory_desc_get_size(md_);
-#endif
            data_.push_back(zmalloc(sz, alignment));
        }
        if (std::any_of(
@ -889,13 +808,9 @@ int dnn_mem_t::initialize_memory_create(const handle_info_t &handle_info) {
                zfree(p);
            DNN_SAFE(dnnl_out_of_memory, CRIT);
        }
-#ifdef DNNL_EXPERIMENTAL_SPARSE
        DNN_SAFE(dnnl_memory_create_v2(
                         &m_, md_, engine_, (int)data_.size(), data_.data()),
                CRIT);
-#else
-        DNN_SAFE(dnnl_memory_create(&m_, md_, engine_, data_[0]), CRIT);
-#endif

    } else if (is_sycl) {
        SAFE(initialize_memory_create_sycl(handle_info), CRIT);
@ -905,13 +820,9 @@ int dnn_mem_t::initialize_memory_create(const handle_info_t &handle_info) {
        is_data_owner_ = false;
        const int nhandles = query_md_num_handles(md_);
        std::vector<void *> handles(nhandles, handle_info.ptr);
-#ifdef DNNL_EXPERIMENTAL_SPARSE
        DNN_SAFE(dnnl_memory_create_v2(&m_, md_, engine_, (int)handles.size(),
                         handles.data()),
                CRIT);
-#else
-        DNN_SAFE(dnnl_memory_create(&m_, md_, engine_, handles[0]), CRIT);
-#endif
    }
    return OK;
 }
@ -940,11 +851,7 @@ int dnn_mem_t::initialize(

        const int nhandles = query_md_num_handles(md_);
        for (int i = 0; i < nhandles; i++) {
-#ifdef DNNL_EXPERIMENTAL_SPARSE
            size_t sz = dnnl_memory_desc_get_size_v2(md_, i);
-#else
-            size_t sz = dnnl_memory_desc_get_size(md_);
-#endif
            if (is_canary_protected_) sz = pad_memory_size(sz, engine_kind_);
            // Do not fill a memory if its size is zero. Moreover, memset
            // expects defined pointer, nullptr is not allowed.
--- a/tests/benchdnn/dnnl_memory.hpp
+++ b/tests/benchdnn/dnnl_memory.hpp
@ -175,7 +175,6 @@ struct dnn_mem_t {
    static benchdnn_dnnl_wrapper_t<dnnl_memory_desc_t> init_md(int ndims,
            const dnnl_dims_t dims, dnnl_data_type_t data_type,
            const std::string &tag, const dims_t &strides_ = {});
-#ifdef DNNL_EXPERIMENTAL_SPARSE
    // Initializes memory descriptor for CSR encoding.
    static benchdnn_dnnl_wrapper_t<dnnl_memory_desc_t> init_csr_md(int ndims,
            const dnnl_dims_t dims, dnnl_data_type_t data_type, dnnl_dim_t nnz,
@ -188,7 +187,6 @@ struct dnn_mem_t {
    static benchdnn_dnnl_wrapper_t<dnnl_memory_desc_t> init_sparse_packed_md(
            int ndims, const dnnl_dims_t dims, dnnl_data_type_t data_type,
            dnnl_dim_t nnz);
-#endif

    /* fields */
    dnnl_memory_desc_t md_ {};
--- a/tests/benchdnn/doc/driver_matmul.md
+++ b/tests/benchdnn/doc/driver_matmul.md
@ -34,9 +34,7 @@ where *matmul-knobs* are:
            tensors with option values other than `0`, a correspondent memory
            format tag must be specified.
 - `--encoding=STRING` - sparse encodings and sparsity. No encodings are set by
-            default. Refer to [encodings](knobs_encoding.md) for details. This
-            is an experimental feature that must be enabled via a build time
-            CMake option `DNNL_EXPERIMENTAL_SPARSE`.
+            default. Refer to [encodings](knobs_encoding.md) for details.
 - `--match=REGEX` -- skip problems not matching the regular expression in
            `REGEX`. By default no pattern is applied (run everything).
            Note: Windows may interpret only string arguments surrounded by
--- a/tests/benchdnn/matmul/bench_matmul.cpp
+++ b/tests/benchdnn/matmul/bench_matmul.cpp
@ -43,9 +43,7 @@ void check_correctness(
    for_(const auto &i_stag : s.stag)
    for_(const auto &i_wtag : s.wtag)
    for_(const auto &i_dtag : s.dtag)
-#ifdef DNNL_EXPERIMENTAL_SPARSE
    for_(const auto &i_sparse_options : s.sparse_options)
-#endif
    for_(const auto &i_strides : s.strides)
    for_(const auto &i_rt_dims_masks : s.rt_dims_masks)
    for_(const auto &i_attr : s.attributes)
@ -54,10 +52,7 @@ void check_correctness(
    for (const auto &i_bia_cfg : bia_cfg) {
        const prb_t prb(s.prb_vdims, i_dt, i_stag, i_wtag, i_dtag, i_strides,
                i_bia_cfg.first, i_bia_cfg.second, i_rt_dims_masks,
-#ifdef DNNL_EXPERIMENTAL_SPARSE
-                i_sparse_options,
-#endif
-                i_attr, i_ctx_init, i_ctx_exe, s.impl_filter);
+                i_sparse_options, i_attr, i_ctx_init, i_ctx_exe, s.impl_filter);
        if (s.pattern && !match_regex(prb.str(), s.pattern)) return;

        task_executor.submit(prb, s.perf_template, createit, checkit, doit);
@ -165,9 +160,7 @@ int bench(int argc, char **argv) {
                || parse_tag(s.stag, def.stag, argv[0], "stag")
                || parse_tag(s.wtag, def.wtag, argv[0], "wtag")
                || parse_tag(s.dtag, def.dtag, argv[0], "dtag")
-#ifdef DNNL_EXPERIMENTAL_SPARSE
                || parse_encoding(s.sparse_options, argv[0], "encoding")
-#endif
                || parse_strides(s.strides, def.strides, argv[0], "strides")
                || parse_dt(s.bia_dt, def.bia_dt, argv[0], "bia-dt")
                // TODO: remove this later
--- a/tests/benchdnn/matmul/matmul.cpp
+++ b/tests/benchdnn/matmul/matmul.cpp
@ -51,7 +51,6 @@ benchdnn_dnnl_wrapper_t<dnnl_memory_desc_t> create_md(const prb_t *prb,
        if (dt == dnnl_data_type_undef) dt = prb->src_dt();
        const auto &src_rt_dims = get_runtime_dims(
                prb->src_dims(), prb->src_runtime_dim_mask());
-#ifdef DNNL_EXPERIMENTAL_SPARSE
        auto src_encoding = prb->sparse_options.get_encoding(DNNL_ARG_SRC);
        auto src_sparsity = prb->sparse_options.get_sparsity(DNNL_ARG_SRC);
        if (src_encoding != dnnl_sparse_encoding_undef) {
@ -69,7 +68,6 @@ benchdnn_dnnl_wrapper_t<dnnl_memory_desc_t> create_md(const prb_t *prb,
                default: assert(!"unsupported encoding"); return nullptr;
            }
        } else
-#endif
            return dnn_mem_t::init_md(prb->ndims, src_rt_dims.data(), dt,
                    prb->stag, prb->strides[STRIDES_SRC]);
    }
@ -78,7 +76,6 @@ benchdnn_dnnl_wrapper_t<dnnl_memory_desc_t> create_md(const prb_t *prb,
        if (dt == dnnl_data_type_undef) dt = prb->wei_dt();
        const auto &weights_rt_dims = get_runtime_dims(
                prb->weights_dims(), prb->weights_runtime_dim_mask());
-#ifdef DNNL_EXPERIMENTAL_SPARSE
        auto wei_encoding = prb->sparse_options.get_encoding(DNNL_ARG_WEIGHTS);
        auto wei_sparsity = prb->sparse_options.get_sparsity(DNNL_ARG_WEIGHTS);

@ -100,7 +97,6 @@ benchdnn_dnnl_wrapper_t<dnnl_memory_desc_t> create_md(const prb_t *prb,
                default: assert(!"unsupported encoding"); return nullptr;
            }
        } else
-#endif
            return dnn_mem_t::init_md(prb->ndims, weights_rt_dims.data(), dt,
                    prb->wtag, prb->strides[STRIDES_WEI]);
    }
@ -180,13 +176,11 @@ int init_prim_ref(benchdnn_dnnl_wrapper_t<dnnl_primitive_t> &prim_ref,
    if (is_cpu() && (prb->src_dt() == dnnl_f32 && prb->wei_dt() == dnnl_f32))
        return OK;

-#ifdef DNNL_EXPERIMENTAL_SPARSE
    if (prb->sparse_options.get_encoding(DNNL_ARG_SRC)
                    != dnnl_sparse_encoding_undef
            || prb->sparse_options.get_encoding(DNNL_ARG_WEIGHTS)
                    != dnnl_sparse_encoding_undef)
        return OK;
-#endif

    std::vector<std::vector<dnnl_data_type_t>> prim_ref_dt {
            prb->dt, {dnnl_f32}};
@ -208,11 +202,8 @@ int init_prim_ref(benchdnn_dnnl_wrapper_t<dnnl_primitive_t> &prim_ref,
        // modifying prb in place.
        prb_t prb_cpu {*prb, prim_ref_dt_i, tag::any, tag::any, tag::any,
                {vdims_t(STRIDES_SIZE)}, prim_ref_bia_dt_i, prb->bia_mask,
-                {0, 0, 0},
-#ifdef DNNL_EXPERIMENTAL_SPARSE
-                sparse_options_t(),
-#endif
-                cpu_attr, prb->ctx_init, prb->ctx_exe, prb->impl_filter};
+                {0, 0, 0}, sparse_options_t(), cpu_attr, prb->ctx_init,
+                prb->ctx_exe, prb->impl_filter};

        auto st = init_prim_ref_common(prim_ref, &prb_cpu, res);
        if (st == OK) return OK;
@ -222,7 +213,6 @@ int init_prim_ref(benchdnn_dnnl_wrapper_t<dnnl_primitive_t> &prim_ref,
    return OK;
 }

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 // The main idea is to generate values and metadata directly without generating
 // the dense matrix to avoid excessive memory consumption for large problem
 // sizes.
@ -359,7 +349,6 @@ int fill_sparse_data(data_kind_t kind, const prb_t *prb, dnn_mem_t &mem_dt,

    return OK;
 }
-#endif

 int fill_data(data_kind_t kind, const prb_t *prb, const cfg_t &cfg,
        dnn_mem_t &mem_dt, dnn_mem_t &mem_fp, res_t *res) {
@ -370,7 +359,6 @@ int fill_data(data_kind_t kind, const prb_t *prb, const cfg_t &cfg,
    bool is_sparse_packed = false;
    bool is_any_sparse = false;
    std::vector<bool> nnz_mask;
-#ifdef DNNL_EXPERIMENTAL_SPARSE
    const auto sparse_encoding = prb->sparse_options.get_encoding(kind);
    const bool is_sparse_csr_coo
            = sparse_encoding == dnnl_csr || sparse_encoding == dnnl_coo;
@ -391,7 +379,6 @@ int fill_data(data_kind_t kind, const prb_t *prb, const cfg_t &cfg,
        std::default_random_engine rng(nnz);
        std::shuffle(nnz_mask.begin(), nnz_mask.end(), rng);
    }
-#endif

    // Refer to modes documentation for filling principles.
    // Note: sparse filling is more complex than a general one in a sense that
@ -489,7 +476,6 @@ void skip_unimplemented_prb(const prb_t *prb, res_t *res) {
            prb->attr, res, dnnl_matmul, prb->src_dt(), prb->dst_dt());
    skip_unimplemented_prelu_po(prb->attr, res, dnnl_matmul);

-#ifdef DNNL_EXPERIMENTAL_SPARSE
    if ((is_nvidia_gpu() || is_amd_gpu()) && !prb->sparse_options.is_def()) {
        BENCHDNN_PRINT(2,
                "[SKIP][%s:%d]: oneDNN doesn't support sparse matmul for "
@ -536,7 +522,6 @@ void skip_unimplemented_prb(const prb_t *prb, res_t *res) {
        res->reason = skip_reason::case_not_supported;
        return;
    }
-#endif

    if (is_cpu()) {
        const bool is_x8s8f16
@ -854,7 +839,6 @@ int init_ref_memory_args(dnn_mem_map_t &ref_mem_map, dnn_mem_map_t &mem_map,

        auto &mem = entry.second; // `mem` is modified by filler (reorder).

-#ifdef DNNL_EXPERIMENTAL_SPARSE
        auto src_encoding = prb->sparse_options.get_encoding(DNNL_ARG_SRC);
        auto wei_encoding = prb->sparse_options.get_encoding(DNNL_ARG_WEIGHTS);

@ -881,9 +865,7 @@ int init_ref_memory_args(dnn_mem_map_t &ref_mem_map, dnn_mem_map_t &mem_map,
                auto wei_fp_d = create_md(prb, WEI);
                ref_mem_map.emplace(exec_arg, dnn_mem_t(wei_fp_d, ref_engine));
            }
-        } else
-#endif
-        {
+        } else {
            if (exec_arg == DNNL_ARG_WEIGHTS) {
                // Switch the format tag from "ab" to "ba" but to handle batched
                // cases, use strides instead.
--- a/tests/benchdnn/matmul/matmul.hpp
+++ b/tests/benchdnn/matmul/matmul.hpp
@ -47,10 +47,8 @@ struct settings_t : public base_settings_t {

    std::vector<std::vector<dnnl_data_type_t>> dt {{dnnl_f32}};
    std::vector<std::string> stag {tag::any}, wtag {tag::any}, dtag {tag::any};
-#ifdef DNNL_EXPERIMENTAL_SPARSE
    std::vector<sparse_options_t> sparse_options {{DNNL_ARG_SRC,
            sparse_options_t::def_encoding, sparse_options_t::def_sparsity}};
-#endif
    std::vector<vdims_t> strides {vdims_t(STRIDES_SIZE)};
    std::vector<dnnl_data_type_t> bia_dt {dnnl_data_type_undef};
    std::vector<int> bia_mask {2};
@ -76,11 +74,8 @@ struct prb_t : public prb_vdims_t {
    prb_t(const settings_t &s)
        : prb_t(s.prb_vdims, s.dt[0], s.stag[0], s.wtag[0], s.dtag[0],
                s.strides[0], s.bia_dt[0], s.bia_mask[0], s.rt_dims_masks[0],
-#ifdef DNNL_EXPERIMENTAL_SPARSE
-                s.sparse_options[0],
-#endif
-                s.attributes.front(), s.ctx_init[0], s.ctx_exe[0],
-                s.impl_filter) {
+                s.sparse_options[0], s.attributes.front(), s.ctx_init[0],
+                s.ctx_exe[0], s.impl_filter) {
        SAFE_V(s.has_single_setup() ? OK : FAIL);
    }

@ -89,11 +84,9 @@ struct prb_t : public prb_vdims_t {
            const std::string &dtag, const vdims_t &strides,
            dnnl_data_type_t bia_dt, int bia_mask,
            const std::vector<dims_mask_t> &rt_dims_masks,
-#ifdef DNNL_EXPERIMENTAL_SPARSE
-            const sparse_options_t &sparse_options,
-#endif
-            const attr_t &attr, const thr_ctx_t &ctx_init,
-            const thr_ctx_t &ctx_exe, const impl_filter_t &impl_filter)
+            const sparse_options_t &sparse_options, const attr_t &attr,
+            const thr_ctx_t &ctx_init, const thr_ctx_t &ctx_exe,
+            const impl_filter_t &impl_filter)
        : prb_vdims_t(prb_vdims)
        , dt(dt)
        , stag(stag)
@ -103,9 +96,7 @@ struct prb_t : public prb_vdims_t {
        , bia_dt(bia_dt)
        , bia_mask(bia_mask)
        , rt_dims_masks(rt_dims_masks)
-#ifdef DNNL_EXPERIMENTAL_SPARSE
        , sparse_options(sparse_options)
-#endif
        , attr(attr)
        , ctx_init(ctx_init)
        , ctx_exe(ctx_exe)
@ -144,9 +135,7 @@ struct prb_t : public prb_vdims_t {
    dnnl_data_type_t bia_dt;
    int bia_mask;
    std::vector<dims_mask_t> rt_dims_masks;
-#ifdef DNNL_EXPERIMENTAL_SPARSE
    sparse_options_t sparse_options;
-#endif

    bool inplace = false; // Lacks placement, always considered `false`.
    attr_t attr;
--- a/tests/benchdnn/matmul/matmul_aux.cpp
+++ b/tests/benchdnn/matmul/matmul_aux.cpp
@ -76,9 +76,7 @@ std::string prb_t::set_repro_line() {

    if (canonical || !has_default_dts) s << "--dt=" << dt << " ";
    if (canonical || stag != def.stag[0]) s << "--stag=" << stag << " ";
-#ifdef DNNL_EXPERIMENTAL_SPARSE
    s << sparse_options;
-#endif
    if (canonical || wtag != def.wtag[0]) s << "--wtag=" << wtag << " ";
    if (canonical || dtag != def.dtag[0]) s << "--dtag=" << dtag << " ";
    if (canonical || strides != def.strides[0])
--- a/tests/benchdnn/matmul/ref_matmul.cpp
+++ b/tests/benchdnn/matmul/ref_matmul.cpp
@ -203,8 +203,6 @@ void compute_ref_matmul(const prb_t *prb, const args_t &args) {
    });
 }

-#ifdef DNNL_EXPERIMENTAL_SPARSE
-
 void cvt_coo_indices_to_csr_pointers(const int32_t *indices, int32_t *pointers,
        const int nnz, const int nrows) {
    for (int i = 0; i < nnz; ++i) {
@ -310,7 +308,6 @@ void compute_ref_sparse_matmul(const prb_t *prb, const args_t &args) {
        });
    }
 }
-#endif

 void compute_ref(
        const prb_t *prb, const args_t &args, dnnl_primitive_t prim_ref) {
@ -319,7 +316,6 @@ void compute_ref(
        return;
    }

-#ifdef DNNL_EXPERIMENTAL_SPARSE
    const auto src_encoding = prb->sparse_options.get_encoding(DNNL_ARG_SRC);
    const auto wei_encoding
            = prb->sparse_options.get_encoding(DNNL_ARG_WEIGHTS);
@ -330,9 +326,6 @@ void compute_ref(
    } else {
        compute_ref_matmul(prb, args);
    }
-#else
-    compute_ref_matmul(prb, args);
-#endif
 }

 } // namespace matmul
--- a/tests/benchdnn/utils/cold_cache.cpp
+++ b/tests/benchdnn/utils/cold_cache.cpp
@ -167,7 +167,6 @@ cold_cache_t::cold_cache_t(
        for (size_t i = 0; i < n_buffers_; i++) {
            cc_entry[i] = dnn_mem_t(orig_cc_mem_md, get_test_engine());

-#ifdef DNNL_EXPERIMENTAL_SPARSE
            // Sparse memories require this call to replicate the exact original
            // data distribution because the data structure affects performance
            // in a direct way.
@ -181,9 +180,7 @@ cold_cache_t::cold_cache_t(
                            i, __FILE__, __LINE__);
                    return;
                }
-            } else
-#endif
-            {
+            } else {
                // Reorders are expensive. If there are multiple buffers to
                // fill, simply rely on default memory initialization.
                if (n_mem_pool_buffers > 100) continue;
--- a/tests/benchdnn/utils/dnnl_query.cpp
+++ b/tests/benchdnn/utils/dnnl_query.cpp
@ -135,7 +135,6 @@ dnnl_engine_kind_t query_engine_kind(const dnnl_engine_t &engine) {
    return engine_kind;
 }

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 dnnl_sparse_encoding_t query_md_sparse_encoding(const_dnnl_memory_desc_t md) {
    dnnl_sparse_encoding_t encoding = dnnl_sparse_encoding_undef;
    if (!md) return encoding;
@ -149,17 +148,12 @@ dnnl_dim_t query_md_nnz(const_dnnl_memory_desc_t md) {
    dnnl_memory_desc_query_v2(md, dnnl_query_nnz_s64, 0, &nnz);
    return nnz;
 }
-#endif

 int query_md_num_handles(const_dnnl_memory_desc_t md) {
-#ifdef DNNL_EXPERIMENTAL_SPARSE
    int nhandles = 0;
    if (!md) return nhandles;
    dnnl_memory_desc_query_v2(md, dnnl_query_num_handles_s32, 0, &nhandles);
    return nhandles;
-#else
-    return 1;
-#endif
 }

 int query_md_ndims(const_dnnl_memory_desc_t md) {
@ -188,11 +182,7 @@ dnnl_data_type_t query_md_data_type(
        const_dnnl_memory_desc_t md, int buffer_index) {
    dnnl_data_type_t dt = dnnl_data_type_undef;
    if (!md) return dt;
-#ifdef DNNL_EXPERIMENTAL_SPARSE
    dnnl_memory_desc_query_v2(md, dnnl_query_data_type, buffer_index, &dt);
-#else
-    dnnl_memory_desc_query(md, dnnl_query_data_type, &dt);
-#endif
    return dt;
 }

--- a/tests/benchdnn/utils/dnnl_query.hpp
+++ b/tests/benchdnn/utils/dnnl_query.hpp
@ -56,10 +56,8 @@ const_dnnl_primitive_desc_t query_pd(dnnl_primitive_t prim);

 dnnl_engine_kind_t query_engine_kind(const dnnl_engine_t &engine);

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 dnnl_sparse_encoding_t query_md_sparse_encoding(const_dnnl_memory_desc_t md);
 dnnl_dim_t query_md_nnz(const_dnnl_memory_desc_t md);
-#endif
 int query_md_num_handles(const_dnnl_memory_desc_t md);
 int query_md_ndims(const_dnnl_memory_desc_t md);
 int query_md_inner_nblks(const_dnnl_memory_desc_t md);
--- a/tests/benchdnn/utils/fill.cpp
+++ b/tests/benchdnn/utils/fill.cpp
@ -195,14 +195,12 @@ int fill_random_real_dense(dnn_mem_t &mem, dnn_mem_t &mem_ref, res_t *res,
    // This function doesn't handle the predefined set yet.
    assert(fill_cfg.predefined_set_.empty());

-#ifdef DNNL_EXPERIMENTAL_SPARSE
    // The `nelems()` function returns a product of dims/pdims regardless of
    // whether the tensor is dense or sparse (this is by design). Because of
    // that we need to adjust the `nelems` value for the sparse tensor as the
    // number of elements to fill is equal to `nnz`.
    if (mem_ref.format_kind() == dnnl_format_kind_sparse)
        nelems = query_md_nnz(mem_ref.md_);
-#endif

    // Note: fill_cfg_t drives value distribution, but the final rounding is
    // in compliance with the memory object the values are inserted. Depending
@ -290,7 +288,6 @@ int fill_random_real_dense(dnn_mem_t &mem, dnn_mem_t &mem_ref, res_t *res,
    return OK;
 }

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 // Since a sparsity pattern affects performance, it's crucial to keep the
 // pattern intact and only randomize tensor values. Thus, the function relies on
 // an assumption that every sparse format contains three handles, where the
@ -313,17 +310,14 @@ int fill_random_real_sparse(const_dnnl_memory_t dnnl_memory, dnn_mem_t &mem,

    return fill_random_real_dense(mem, mem_ref, res, fill_cfg);
 }
-#endif

 int fill_random_real(dnn_mem_t &mem, dnn_mem_t &mem_ref, res_t *res,
        const fill_cfg_t &fill_cfg, const_dnnl_memory_t dnnl_memory) {
-#ifdef DNNL_EXPERIMENTAL_SPARSE
    if (mem_ref.format_kind() == dnnl_format_kind_sparse) {
        assert(dnnl_memory != nullptr);
        return fill_random_real_sparse(
                dnnl_memory, mem, mem_ref, res, fill_cfg);
    }
-#endif
    return fill_random_real_dense(mem, mem_ref, res, fill_cfg);
 }

--- a/tests/benchdnn/utils/parser.cpp
+++ b/tests/benchdnn/utils/parser.cpp
@ -543,7 +543,6 @@ bool parse_tag(std::vector<std::string> &tag,
    return true;
 }

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 bool parse_encoding(std::vector<sparse_options_t> &sparse_options,
        const char *str, const std::string &option_name /* = "encoding"*/) {
    static const std::string help
@ -562,7 +561,6 @@ bool parse_encoding(std::vector<sparse_options_t> &sparse_options,
    return parse_vector_option(sparse_options, def, parse_sparse_options_func,
            str, option_name, help);
 }
-#endif

 bool parse_multi_tag(std::vector<std::vector<std::string>> &tag,
        const std::vector<std::vector<std::string>> &def_tag, const char *str,
--- a/tests/benchdnn/utils/parser.hpp
+++ b/tests/benchdnn/utils/parser.hpp
@ -218,12 +218,9 @@ bool parse_tag(std::vector<std::string> &tag,
        const std::vector<std::string> &def_tag, const char *str,
        const std::string &option_name = "tag");

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 bool parse_encoding(std::vector<sparse_options_t> &sparse_options,
        const char *str, const std::string &option_name = "encoding");

-#endif
-
 bool parse_multi_tag(std::vector<std::vector<std::string>> &tag,
        const std::vector<std::vector<std::string>> &def_tag, const char *str,
        const std::string &option_name = "stag");
--- a/tests/gtests/CMakeLists.txt
+++ b/tests/gtests/CMakeLists.txt
@ -61,6 +61,7 @@ file(GLOB PRIM_TEST_CASES_SRC
                              test_iface_attr_quantization.cpp
                              test_iface_weights_format.cpp
                              test_iface_wino_convolution.cpp
+                              test_iface_sparse.cpp
                              test_memory.cpp
                              test_sum.cpp
                              test_reorder.cpp
@ -95,10 +96,6 @@ file(GLOB PRIM_TEST_CASES_SRC
                              test_group_normalization.cpp
                              )

-if(DNNL_EXPERIMENTAL_SPARSE)
-    list(APPEND PRIM_TEST_CASES_SRC test_iface_sparse.cpp)
-endif()
-
 if(DNNL_CPU_RUNTIME STREQUAL "NONE")
    list(APPEND PRIM_TEST_CASES_SRC test_iface_gpu_only.cpp)
    set_source_files_properties(test_iface_gpu_only.cpp PROPERTIES NO_ENGINE_PARAM true)
--- a/tests/gtests/ocl/api/test_memory_buffer.cpp
+++ b/tests/gtests/ocl/api/test_memory_buffer.cpp
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -213,7 +213,6 @@ HANDLE_EXCEPTIONS_FOR_TEST(ocl_memory_buffer_test_cpp_t, BufferMapUnmap) {
    TEST_OCL_CHECK(clReleaseMemObject(ocl_mem));
 }

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 HANDLE_EXCEPTIONS_FOR_TEST(
        ocl_memory_buffer_test_cpp_t, TestSparseMemoryCreation) {
    engine eng(engine::kind::gpu, 0);
@ -331,6 +330,5 @@ HANDLE_EXCEPTIONS_FOR_TEST(
    ASSERT_NO_THROW(coo_mem.unmap_data(mapped_row_indices, 1));
    ASSERT_NO_THROW(coo_mem.unmap_data(mapped_col_indices, 2));
 }
-#endif

 } // namespace dnnl
--- a/tests/gtests/ocl/api/test_memory_usm.cpp
+++ b/tests/gtests/ocl/api/test_memory_usm.cpp
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2021-2024 Intel Corporation
+* Copyright 2021-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -207,7 +207,6 @@ HANDLE_EXCEPTIONS_FOR_TEST(ocl_memory_usm_test_t, SharedMapUnmap) {
            dnnl::impl::xpu::ocl::usm::free);
 }

-#ifdef DNNL_EXPERIMENTAL_SPARSE
 HANDLE_EXCEPTIONS_FOR_TEST(ocl_memory_usm_test_t, TestSparseMemoryCreation) {
    engine eng(engine::kind::gpu, 0);
    const int nnz = 12;
@ -307,6 +306,5 @@ HANDLE_EXCEPTIONS_FOR_TEST(ocl_memory_usm_test_t, TestSparseMemoryMapUnmap) {
    ASSERT_NO_THROW(coo_mem.unmap_data(mapped_row_indices, 1));
    ASSERT_NO_THROW(coo_mem.unmap_data(mapped_col_indices, 2));
 }
-#endif

 } // namespace dnnl
--- a/Show More
+++ b/Show More