doc, examples: add brief descriptions and update orphans.rst

2025-10-20 10:03:50 +08:00 · 2025-10-08 16:30:52 -07:00
parent b0d9796c0f
commit afdeb6acfb
59 changed files with 374 additions and 145 deletions
--- a/doc/rst/orphans.rst
+++ b/doc/rst/orphans.rst
@ -139,3 +139,61 @@ Orphans
    page_weights_decompression_matmul_cpp.rst
    page_matmul_with_host_scalar_scale_cpp.rst
    page_matmul_f8_quantization_cpp.rst
    page_augru_example_cpp_brief.rst
    page_batch_normalization_example_cpp_brief.rst
    page_binary_example_cpp_brief.rst
    page_bnorm_u8_via_binary_postops_cpp_brief.rst
    page_cnn_inference_f32_cpp_brief.rst
    page_cnn_inference_int8_cpp_brief.rst
    page_cnn_training_bf16_cpp_brief.rst
    page_cnn_training_f32_cpp_brief.rst
    page_concat_example_cpp_brief.rst
    page_convolution_example_cpp_brief.rst
    page_cpu_brgemm_example_cpp_brief.rst
    page_cpu_matmul_coo_cpp_brief.rst
    page_cpu_matmul_csr_cpp_brief.rst
    page_cpu_matmul_quantization_cpp_brief.rst
    page_cpu_matmul_weights_compression_cpp_brief.rst
    page_cpu_rnn_inference_f32_cpp_brief.rst
    page_cpu_rnn_inference_int8_cpp_brief.rst
    page_cpu_sgemm_and_matmul_cpp_brief.rst
    page_cross_engine_reorder_cpp_brief.rst
    page_deconvolution_example_cpp_brief.rst
    page_eltwise_example_cpp_brief.rst
    page_getting_started_cpp_brief.rst
    page_gpu_opencl_interop_cpp_brief.rst
    page_graph_cpu_getting_started_cpp_brief.rst
    page_graph_cpu_inference_int8_cpp_brief.rst
    page_graph_cpu_single_op_partition_cpp_brief.rst
    page_graph_gpu_opencl_getting_started_cpp_brief.rst
    page_graph_sycl_getting_started_cpp_brief.rst
    page_graph_sycl_single_op_partition_cpp_brief.rst
    page_group_normalization_example_cpp_brief.rst
    page_inference_int8_matmul_cpp_brief.rst
    page_inner_product_example_cpp_brief.rst
    page_layer_normalization_example_cpp_brief.rst
    page_lbr_gru_example_cpp_brief.rst
    page_lrn_example_cpp_brief.rst
    page_lstm_example_cpp_brief.rst
    page_matmul_example_cpp_brief.rst
    page_matmul_f8_quantization_cpp_brief.rst
    page_matmul_perf_cpp_brief.rst
    page_matmul_with_host_scalar_scale_cpp_brief.rst
    page_memory_format_propagation_cpp_brief.rst
    page_performance_profiling_cpp_brief.rst
    page_pooling_example_cpp_brief.rst
    page_prelu_example_cpp_brief.rst
    page_reduction_example_cpp_brief.rst
    page_reorder_example_cpp_brief.rst
    page_resampling_example_cpp_brief.rst
    page_rnn_training_f32_cpp_brief.rst
    page_shuffle_example_cpp_brief.rst
    page_softmax_example_cpp_brief.rst
    page_sum_example_cpp_brief.rst
    page_sycl_interop_buffer_cpp_brief.rst
    page_sycl_interop_usm_cpp_brief.rst
    page_vanilla_rnn_example_cpp_brief.rst
    page_weights_decompression_matmul_cpp_brief.rst
    page_cnn_inference_f32_c_brief.rst
    page_cpu_cnn_training_f32_c_brief.rst
    page_cross_engine_reorder_c_brief.rst
--- a/examples/bnorm_u8_via_binary_postops.cpp
+++ b/examples/bnorm_u8_via_binary_postops.cpp
@ -17,17 +17,19 @@
 /// @example bnorm_u8_via_binary_postops.cpp
 /// > Annotated version: @ref bnorm_u8_via_binary_postops_cpp
-/// @page bnorm_u8_via_binary_postops_cpp Bnorm u8 by binary post-ops example
+/// @page bnorm_u8_via_binary_postops_cpp_brief
-/// The example implements the Batch normalization u8 via the following
+/// @brief The example implements the Batch normalization u8 via the following
 /// operations: binary_sub(src, mean), binary_div(tmp_dst, variance),
 /// binary_mul(tmp_dst, scale), binary_add(tmp_dst, shift).
 /// @page bnorm_u8_via_binary_postops_cpp Bnorm u8 by binary post-ops example
 /// \copybrief bnorm_u8_via_binary_postops_cpp_brief
 ///
 /// Some key take-aways include:
-///
+/// - How tensors are implemented and submitted to primitives.
-/// * How tensors are implemented and submitted to primitives.
+/// - How primitives are created.
-/// * How primitives are created.
+/// - How to use multiple binary post operations.
-/// * How to use multiple binary post operations.
+/// - How to use different data types in binary.
 /// * How to use different data types in binary.
 ///
 /// @include bnorm_u8_via_binary_postops.cpp
--- a/examples/cnn_inference_f32.c
+++ b/examples/cnn_inference_f32.c
@ -17,9 +17,12 @@
 /// @example cnn_inference_f32.c
 /// > Annotated version: @ref cnn_inference_f32_c
-/// @page cnn_inference_f32_c CNN f32 inference example
+/// @page cnn_inference_f32_c_brief
-/// This C API example demonstrates how to build an AlexNet neural
+/// @brief This C API example demonstrates how to build an AlexNet neural
 /// network topology for forward-pass inference.
 /// @page cnn_inference_f32_c CNN f32 inference example
 /// \copybrief cnn_inference_f32_c_brief
 ///
 /// Some key take-aways include:
 ///
--- a/examples/cnn_inference_f32.cpp
+++ b/examples/cnn_inference_f32.cpp
@ -17,9 +17,12 @@
 /// @example cnn_inference_f32.cpp
 /// > Annotated version: @ref cnn_inference_f32_cpp
 /// @page cnn_inference_f32_cpp_brief
 /// @brief This C++ API example demonstrates how to build an AlexNet neural network
 /// topology for forward-pass inference.
 /// @page cnn_inference_f32_cpp CNN f32 inference example
-/// This C++ API example demonstrates how to build an AlexNet neural
+/// \copybrief cnn_inference_f32_cpp_brief
 /// network topology for forward-pass inference.
 ///
 /// > Example code: @ref cnn_inference_f32.cpp
 ///
--- a/examples/cnn_inference_int8.cpp
+++ b/examples/cnn_inference_int8.cpp
@ -17,9 +17,12 @@
 /// @example cnn_inference_int8.cpp
 /// > Annotated version: @ref cnn_inference_int8_cpp
 /// @page cnn_inference_int8_cpp_brief
 /// @brief This C++ API example demonstrates how to run AlexNet's conv3 and relu3 with
 /// int8 data type.
 /// @page cnn_inference_int8_cpp CNN int8 inference example
-/// This C++ API example demonstrates how to run AlexNet's conv3 and relu3
+/// \copybrief cnn_inference_int8_cpp_brief
 /// with int8 data type.
 ///
 /// > Example code: @ref cnn_inference_int8.cpp
--- a/examples/cnn_training_bf16.cpp
+++ b/examples/cnn_training_bf16.cpp
@ -17,11 +17,12 @@
 /// @example cnn_training_bf16.cpp
 /// > Annotated version: @ref cnn_training_bf16_cpp
 ///
-/// @page cnn_training_bf16_cpp CNN bf16 training example
+/// @page cnn_training_bf16_cpp_brief
-/// This C++ API example demonstrates how to build an AlexNet model training
+/// @brief This C++ API example demonstrates how to build an AlexNet model training
 /// using the bfloat16 data type.
-///
+
-/// The example implements a few layers from AlexNet model.
+/// @page cnn_training_bf16_cpp CNN bf16 training example
 /// \copybrief cnn_training_bf16_cpp_brief
 ///
 /// @include cnn_training_bf16.cpp
--- a/examples/cnn_training_f32.cpp
+++ b/examples/cnn_training_f32.cpp
@ -17,9 +17,11 @@
 /// @example cnn_training_f32.cpp
 /// > Annotated version: @ref cnn_training_f32_cpp
 ///
 /// @page cnn_training_f32_cpp_brief
 /// @brief This C++ API example demonstrates how to build an AlexNet model training.
 /// @page cnn_training_f32_cpp CNN f32 training example
-/// This C++ API example demonstrates how to build an AlexNet model training.
+/// \copybrief cnn_training_f32_cpp_brief
 /// The example implements a few layers from AlexNet model.
 ///
 /// @include cnn_training_f32.cpp
--- a/examples/cpu_cnn_training_f32.c
+++ b/examples/cpu_cnn_training_f32.c
@ -17,9 +17,12 @@
 /// @example cpu_cnn_training_f32.c
 /// > Annotated version: @ref cpu_cnn_training_f32_c
-/// @page cpu_cnn_training_f32_c CNN f32 training example
+/// @page cpu_cnn_training_f32_c_brief
-/// This C API example demonstrates how to build an AlexNet model training.
+/// @brief This C API example demonstrates how to build an AlexNet model training.
 /// The example implements a few layers from AlexNet model.
 /// @page cpu_cnn_training_f32_c CNN f32 training example
 /// \copybrief cpu_cnn_training_f32_c_brief
 ///
 /// @include cpu_cnn_training_f32.c
--- a/examples/cpu_matmul_coo.cpp
+++ b/examples/cpu_matmul_coo.cpp
@ -17,10 +17,13 @@
 /// @example cpu_matmul_coo.cpp
 /// > Annotated version: @ref cpu_matmul_coo_cpp
-/// @page cpu_matmul_coo_cpp MatMul Primitive with Sparse Memory in COO Format
+/// @page cpu_matmul_coo_cpp_brief
-/// This C++ API example demonstrates how to create and execute a
+/// @brief This C++ API example demonstrates how to create and execute a
 /// [MatMul](@ref dev_guide_matmul) primitive that uses a source tensor
 /// encoded with the COO sparse encoding.
 /// @page cpu_matmul_coo_cpp MatMul Primitive with Sparse Memory in COO Format
 /// \copybrief cpu_matmul_coo_cpp_brief
 ///
 /// @include cpu_matmul_coo.cpp
--- a/examples/cpu_matmul_csr.cpp
+++ b/examples/cpu_matmul_csr.cpp
@ -17,10 +17,13 @@
 /// @example cpu_matmul_csr.cpp
 /// > Annotated version: @ref cpu_matmul_csr_cpp
-/// @page cpu_matmul_csr_cpp MatMul Primitive with Sparse Memory in CSR Format
+/// @page cpu_matmul_csr_cpp_brief
-/// This C++ API example demonstrates how to create and execute a
+/// @brief This C++ API example demonstrates how to create and execute a
 /// [MatMul](@ref dev_guide_matmul) primitive that uses a source tensor
 /// encoded with the CSR sparse encoding.
 /// @page cpu_matmul_csr_cpp MatMul Primitive with Sparse Memory in CSR Format
 /// \copybrief cpu_matmul_csr_cpp_brief
 ///
 /// @include cpu_matmul_csr.cpp
--- a/examples/cpu_matmul_weights_compression.cpp
+++ b/examples/cpu_matmul_weights_compression.cpp
@ -17,10 +17,13 @@
 /// @example cpu_matmul_weights_compression.cpp
 /// > Annotated version: @ref cpu_matmul_weights_compression_cpp
-/// @page cpu_matmul_weights_compression_cpp MatMul Primitive Example
+/// @page cpu_matmul_weights_compression_cpp_brief
-/// This C++ API example demonstrates how to create and execute a
+/// @brief This C++ API example demonstrates how to create and execute a
 /// [MatMul](@ref dev_guide_matmul) primitive that uses a weights tensor
 /// encoded with the packed sparse encoding.
 /// @page cpu_matmul_weights_compression_cpp MatMul Primitive Example
 /// \copybrief cpu_matmul_weights_compression_cpp_brief
 ///
 /// @include cpu_matmul_weights_compression.cpp
--- a/examples/cpu_rnn_inference_f32.cpp
+++ b/examples/cpu_rnn_inference_f32.cpp
@ -17,8 +17,11 @@
 /// @example cpu_rnn_inference_f32.cpp
 /// > Annotated version: @ref cpu_rnn_inference_f32_cpp
-/// @page cpu_rnn_inference_f32_cpp RNN f32 inference example
+/// @page cpu_rnn_inference_f32_cpp_brief
-/// This C++ API example demonstrates how to build GNMT model inference.
+/// @brief This C++ API example demonstrates how to build GNMT model inference.
 /// @page cpu_rnn_inference_f32_cpp RNN f32 Inference Example
 /// \copybrief cpu_rnn_inference_f32_cpp_brief
 ///
 /// > Example code: @ref cpu_rnn_inference_f32.cpp
 ///
--- a/examples/cpu_rnn_inference_int8.cpp
+++ b/examples/cpu_rnn_inference_int8.cpp
@ -17,8 +17,11 @@
 /// @example cpu_rnn_inference_int8.cpp
 /// > Annotated version: @ref cpu_rnn_inference_int8_cpp
 /// @page cpu_rnn_inference_int8_cpp_brief
 /// @brief This C++ API example demonstrates how to build GNMT model inference.
 /// @page cpu_rnn_inference_int8_cpp RNN int8 inference example
-/// This C++ API example demonstrates how to build GNMT model inference.
+/// \copybrief cpu_rnn_inference_int8_cpp_brief
 ///
 /// > Example code: @ref cpu_rnn_inference_int8.cpp
 ///
--- a/examples/cross_engine_reorder.c
+++ b/examples/cross_engine_reorder.c
@ -17,9 +17,12 @@
 /// @example cross_engine_reorder.c
 /// > Annotated version: @ref cross_engine_reorder_c
-/// @page cross_engine_reorder_c Reorder between CPU and GPU engines
+/// @page cross_engine_reorder_c_brief
-/// This C API example demonstrates programming flow when reordering memory
+/// @brief This C API example demonstrates programming flow when reordering memory
 /// between CPU and GPU engines.
 /// @page cross_engine_reorder_c Reorder between CPU and GPU engines
 /// \copybrief cross_engine_reorder_c_brief
 ///
 /// @include cross_engine_reorder.c
--- a/examples/cross_engine_reorder.cpp
+++ b/examples/cross_engine_reorder.cpp
@ -17,9 +17,12 @@
 /// @example cross_engine_reorder.cpp
 /// > Annotated version: @ref cross_engine_reorder_cpp
-/// @page cross_engine_reorder_cpp Reorder between CPU and GPU engines
+/// @page cross_engine_reorder_cpp_brief
-/// This C++ API example demonstrates programming flow when reordering memory
+/// @brief This C++ API example demonstrates programming flow when reordering memory
 /// between CPU and GPU engines.
 /// @page cross_engine_reorder_cpp Reorder between CPU and GPU engines
 /// \copybrief cross_engine_reorder_cpp_brief
 ///
 /// > Example code: @ref cross_engine_reorder.cpp
 ///
--- a/examples/getting_started.cpp
+++ b/examples/getting_started.cpp
@ -29,10 +29,15 @@
 using namespace dnnl;
 /// @page getting_started_cpp_brief
 /// @brief This C++ API example demonstrates the basics of the oneDNN programming model.
 /// @page getting_started_cpp oneDNN API Basic Workflow Tutorial
 /// \copybrief getting_started_cpp_brief
 ///
 /// > Example code: @ref getting_started.cpp
 ///
-/// This C++ API example demonstrates the basics of the oneDNN programming model:
+/// Key concepts:
 /// - How to create oneDNN memory objects.
 ///   - How to get data from the user's buffer into a oneDNN memory object.
 ///   - How a tensor's logical dimensions and memory object formats relate.
--- a/examples/gpu_opencl_interop.cpp
+++ b/examples/gpu_opencl_interop.cpp
@ -17,9 +17,12 @@
 /// @example gpu_opencl_interop.cpp
 /// > Annotated version: @ref gpu_opencl_interop_cpp
-/// @page gpu_opencl_interop_cpp Getting started on GPU with OpenCL extensions API
+/// @page gpu_opencl_interop_cpp_brief
-/// This C++ API example demonstrates programming for Intel(R) Processor
+/// @brief This C++ API example demonstrates programming for Intel(R) Processor
 /// Graphics with OpenCL* extensions API in oneDNN.
 /// @page gpu_opencl_interop_cpp Getting started on GPU with OpenCL extensions API
 /// \copybrief gpu_opencl_interop_cpp_brief
 ///
 /// > Example code: @ref gpu_opencl_interop.cpp
 ///
--- a/examples/graph/cpu_getting_started.cpp
+++ b/examples/graph/cpu_getting_started.cpp
@ -17,9 +17,12 @@
 /// @example cpu_getting_started.cpp
 /// > Annotated version: @ref graph_cpu_getting_started_cpp
-/// @page graph_cpu_getting_started_cpp Getting started on CPU with Graph API
+/// @page graph_cpu_getting_started_cpp_brief
-/// This is an example to demonstrate how to build a simple graph and run it on
+/// @brief This is an example to demonstrate how to build a simple graph and run it on
 /// CPU.
 /// @page graph_cpu_getting_started_cpp Getting started on CPU with Graph API
 /// \copybrief graph_cpu_getting_started_cpp_brief
 ///
 /// > Example code: @ref cpu_getting_started.cpp
 ///
--- a/examples/graph/cpu_inference_int8.cpp
+++ b/examples/graph/cpu_inference_int8.cpp
@ -17,9 +17,12 @@
 /// @example cpu_inference_int8.cpp
 /// Annotated version: @ref graph_cpu_inference_int8_cpp
 /// @page graph_cpu_inference_int8_cpp_brief
 /// @brief This is an example to demonstrate how to build an int8 graph with Graph API
 /// and run it on CPU.
 /// @page graph_cpu_inference_int8_cpp Convolution int8 inference example with Graph API
-/// This is an example to demonstrate how to build an int8 graph with Graph
+/// \copybrief graph_cpu_inference_int8_cpp_brief
 /// API and run it on CPU.
 ///
 /// > Example code: @ref cpu_inference_int8.cpp
 ///
--- a/examples/graph/cpu_single_op_partition.cpp
+++ b/examples/graph/cpu_single_op_partition.cpp
@ -17,8 +17,12 @@
 /// @example cpu_single_op_partition.cpp
 /// > Annotated version: @ref graph_cpu_single_op_partition_cpp
 /// @page graph_cpu_single_op_partition_cpp_brief
 /// @brief This is an example to demonstrate how to build a simple op graph and run it
 /// on CPU.
 /// @page graph_cpu_single_op_partition_cpp Single op partition on CPU
-/// This is an example to demonstrate how to build a simple op graph and run it on CPU.
+/// \copybrief graph_cpu_single_op_partition_cpp_brief
 ///
 /// > Example code: @ref cpu_single_op_partition.cpp
 ///
--- a/examples/graph/gpu_opencl_getting_started.cpp
+++ b/examples/graph/gpu_opencl_getting_started.cpp
@ -17,9 +17,12 @@
 /// @example gpu_opencl_getting_started.cpp
 /// > Annotated version: @ref graph_gpu_opencl_getting_started_cpp
-/// @page graph_gpu_opencl_getting_started_cpp Getting started with OpenCL extensions and Graph API
+/// @page graph_gpu_opencl_getting_started_cpp_brief
-/// This is an example to demonstrate how to build a simple graph and run on
+/// @brief This is an example to demonstrate how to build a simple graph and run on
 /// OpenCL GPU runtime.
 /// @page graph_gpu_opencl_getting_started_cpp Getting started with OpenCL extensions and Graph API
 /// \copybrief graph_gpu_opencl_getting_started_cpp_brief
 ///
 /// > Example code: @ref gpu_opencl_getting_started.cpp
 ///
--- a/examples/graph/sycl_getting_started.cpp
+++ b/examples/graph/sycl_getting_started.cpp
@ -17,9 +17,12 @@
 /// @example sycl_getting_started.cpp
 /// > Annotated version: @ref graph_sycl_getting_started_cpp
-/// @page graph_sycl_getting_started_cpp Getting started with SYCL extensions API and Graph API
+/// @page graph_sycl_getting_started_cpp_brief
-/// This is an example to demonstrate how to build a simple graph and run on
+/// @brief This is an example to demonstrate how to build a simple graph and run on
 /// SYCL device.
 /// @page graph_sycl_getting_started_cpp Getting started with SYCL extensions API and Graph API
 /// \copybrief graph_sycl_getting_started_cpp_brief
 ///
 /// > Example code: @ref sycl_getting_started.cpp
 ///
--- a/examples/graph/sycl_single_op_partition.cpp
+++ b/examples/graph/sycl_single_op_partition.cpp
@ -17,8 +17,12 @@
 /// @example sycl_single_op_partition.cpp
 /// > Annotated version: @ref graph_sycl_single_op_partition_cpp
 /// @page graph_sycl_single_op_partition_cpp_brief
 /// @brief This is an example to demonstrate how to build a simple op graph and run it
 /// on GPU.
 /// @page graph_sycl_single_op_partition_cpp Single op partition on GPU
-/// This is an example to demonstrate how to build a simple op graph and run it on gpu.
+/// \copybrief graph_sycl_single_op_partition_cpp_brief
 ///
 /// > Example code: @ref sycl_single_op_partition.cpp
 ///
--- a/examples/matmul_f8_quantization.cpp
+++ b/examples/matmul_f8_quantization.cpp
@ -16,10 +16,13 @@
 /// @example matmul_f8_quantization.cpp
 /// > Annotated version: @ref matmul_f8_quantization_cpp
-///
+
 /// @page matmul_f8_quantization_cpp_brief
 /// @brief C++ API example demonstrating how to use f8_e5m2 and f8_e4m3 data types for
 /// [MatMul](@ref dev_guide_matmul) with scaling for quantization.
 /// @page matmul_f8_quantization_cpp Matrix Multiplication with f8 Quantization
-/// C++ API example demonstrating how to use f8_e5m2 and f8_e4m3 data types
+/// \copybrief matmul_f8_quantization_cpp_brief
 /// for [MatMul](@ref dev_guide_matmul) with scaling for quantization.
 ///
 /// Specification of f8 Formats:
 /// - **f8_e5m2**: 1 sign + 5 exponent + 2 mantissa bits, max value is 57,344.
@ -27,8 +30,8 @@
 ///
 /// Concepts:
 /// - f8 quantization.
-///   - f8_e5m2 and f8_e4m3 data type conversion from f32 is done using [Reorder primitive](@ref dev_guide_reorder)
+///   - f8_e5m2 and f8_e4m3 data type conversion from f32 is done using
-///     with simple scaling factors.
+///     [Reorder primitive](@ref dev_guide_reorder) with simple scaling factors.
 /// - Matrix multiplication with f8 inputs and f32 output.
 ///   - Scaling is done using dnnl::primitive_attr::set_scales_mask().
 ///
--- a/examples/matmul_perf.cpp
+++ b/examples/matmul_perf.cpp
@ -17,9 +17,12 @@
 /// @example matmul_perf.cpp
 /// > Annotated version: @ref matmul_perf_cpp
-/// @page matmul_perf_cpp Matrix Multiplication Performance Example
+/// @page matmul_perf_cpp_brief
-/// This C++ example runs a simple matrix multiplication (matmul) performance
+/// @brief This C++ example runs a simple matrix multiplication (matmul) performance
 /// test using oneDNN.
 /// @page matmul_perf_cpp Matrix Multiplication Performance Example
 /// \copybrief matmul_perf_cpp_brief
 ///
 /// The workflow includes following steps:
 ///   - Set up and execute a matmul operation with the specified engine kind
--- a/examples/matmul_with_host_scalar_scale.cpp
+++ b/examples/matmul_with_host_scalar_scale.cpp
@ -28,10 +28,13 @@ using namespace dnnl;
 /// @example matmul_with_host_scalar_scale.cpp
 /// > Annotated version: @ref matmul_with_host_scalar_scale_cpp
 /// @page matmul_with_host_scalar_scale_cpp_brief
 /// @brief This C++ API example demonstrates matrix multiplication (C = alpha * A * B)
 /// with a scalar scaling factor residing on the host.
 /// @page matmul_with_host_scalar_scale_cpp MatMul with Host Scalar Scale example
 /// \copybrief matmul_with_host_scalar_scale_cpp_brief
 ///
 /// This C++ API example demonstrates matrix multiplication (C = alpha * A * B)
 /// with a scalar scale factor using oneDNN.
 /// The workflow includes following steps:
 /// - Initialize a oneDNN engine and stream for computation.
 /// - Allocate and initialize matrices A and B.
--- a/examples/memory_format_propagation.cpp
+++ b/examples/memory_format_propagation.cpp
@ -21,9 +21,12 @@
 #include <sstream>
 #include <string>
-/// @page memory_format_propagation_cpp Memory Format Propagation
+/// @page memory_format_propagation_cpp_brief
-/// This example demonstrates memory format propagation, which is critical for
+/// @brief This example demonstrates memory format propagation, which is critical for
 /// deep learning applications performance.
 /// @page memory_format_propagation_cpp Memory Format Propagation
 /// \copybrief memory_format_propagation_cpp_brief
 ///
 /// > Example code: @ref memory_format_propagation.cpp
 ///
--- a/examples/performance_profiling.cpp
+++ b/examples/performance_profiling.cpp
@ -17,9 +17,12 @@
 /// @example performance_profiling.cpp
 /// > Annotated version: @ref performance_profiling_cpp
-/// @page performance_profiling_cpp Performance Profiling Example
+/// @page performance_profiling_cpp_brief
-/// This example demonstrates the best practices for application performance
+/// @brief This example demonstrates the best practices for application performance
 /// optimizations with oneDNN.
 /// @page performance_profiling_cpp Performance Profiling Example
 /// \copybrief performance_profiling_cpp_brief
 ///
 /// > Example code: @ref performance_profiling.cpp
 ///
--- a/examples/primitives/augru.cpp
+++ b/examples/primitives/augru.cpp
@ -17,11 +17,12 @@
 /// @example augru.cpp
 /// > Annotated version: @ref augru_example_cpp
 /// @page augru_example_cpp_brief
 /// @brief This C++ API example demonstrates how to create and execute an
 /// [AUGRU RNN](@ref dev_guide_rnn) primitive in forward training propagation mode.
 /// @page augru_example_cpp AUGRU RNN Primitive Example
-///
+/// \copybrief augru_example_cpp_brief
 /// This C++ API example demonstrates how to create and execute an
 /// [AUGRU RNN](@ref dev_guide_rnn) primitive in forward training propagation
 /// mode.
 ///
 /// Key optimizations included in this example:
 /// - Creation of optimized memory format from the primitive descriptor.
--- a/examples/primitives/batch_normalization.cpp
+++ b/examples/primitives/batch_normalization.cpp
@ -17,11 +17,13 @@
 /// @example batch_normalization.cpp
 /// > Annotated version: @ref batch_normalization_example_cpp
 /// @page batch_normalization_example_cpp_brief
 /// @brief This C++ API example demonstrates how to create and execute a
 /// [Batch Normalization](@ref dev_guide_batch_normalization) primitive in forward
 /// training propagation mode.
 /// @page batch_normalization_example_cpp Batch Normalization Primitive Example
-///
+/// \copybrief batch_normalization_example_cpp_brief
 /// This C++ API example demonstrates how to create and execute a
 /// [Batch Normalization](@ref dev_guide_batch_normalization) primitive in
 /// forward training propagation mode.
 ///
 /// Key optimizations included in this example:
 /// - In-place primitive execution;
--- a/examples/primitives/binary.cpp
+++ b/examples/primitives/binary.cpp
@ -17,9 +17,12 @@
 /// @example binary.cpp
 /// > Annotated version: @ref binary_example_cpp
-/// @page binary_example_cpp Binary Primitive Example
+/// @page binary_example_cpp_brief
-/// This C++ API example demonstrates how to create and execute a
+/// @brief This C++ API example demonstrates how to create and execute a
 /// [Binary](@ref dev_guide_binary) primitive.
 /// @page binary_example_cpp Binary Primitive Example
 /// \copybrief binary_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - In-place primitive execution;
--- a/examples/primitives/concat.cpp
+++ b/examples/primitives/concat.cpp
@ -17,9 +17,12 @@
 /// @example concat.cpp
 /// > Annotated version: @ref concat_example_cpp
-/// @page concat_example_cpp Concat Primitive Example
+/// @page concat_example_cpp_brief
-/// This C++ API example demonstrates how to create and execute a
+/// @brief This C++ API example demonstrates how to create and execute a
 /// [Concat](@ref dev_guide_concat) primitive.
 /// @page concat_example_cpp Concat Primitive Example
 /// \copybrief concat_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - Identical source (src) memory formats.
--- a/examples/primitives/convolution.cpp
+++ b/examples/primitives/convolution.cpp
@ -17,10 +17,13 @@
 /// @example convolution.cpp
 /// > Annotated version: @ref convolution_example_cpp
-/// @page convolution_example_cpp Convolution Primitive Example
+/// @page convolution_example_cpp_brief
-/// This C++ API example demonstrates how to create and execute a
+/// @brief This C++ API example demonstrates how to create and execute a
 /// [Convolution](@ref dev_guide_convolution) primitive in forward propagation
 /// mode in two configurations - with and without groups.
 /// @page convolution_example_cpp Convolution Primitive Example
 /// \copybrief convolution_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - Creation of optimized memory format from the primitive descriptor;
--- a/examples/primitives/deconvolution.cpp
+++ b/examples/primitives/deconvolution.cpp
@ -17,10 +17,13 @@
 /// @example deconvolution.cpp
 /// > Annotated version: @ref deconvolution_example_cpp
-/// @page deconvolution_example_cpp Deconvolution Primitive Example
+/// @page deconvolution_example_cpp_brief
-/// This C++ API example demonstrates how to create and execute a
+/// @brief This C++ API example demonstrates how to create and execute a
 /// [Deconvolution](@ref dev_guide_convolution) primitive in forward propagation
 /// mode.
 /// @page deconvolution_example_cpp Deconvolution Primitive Example
 /// \copybrief deconvolution_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - Creation of optimized memory format from the primitive descriptor;
--- a/examples/primitives/eltwise.cpp
+++ b/examples/primitives/eltwise.cpp
@ -17,10 +17,13 @@
 /// @example eltwise.cpp
 /// > Annotated version: @ref eltwise_example_cpp
-/// @page eltwise_example_cpp Element-Wise Primitive Example
+/// @page eltwise_example_cpp_brief
-/// This C++ API example demonstrates how to create and execute an
+/// @brief This C++ API example demonstrates how to create and execute an
 /// [Element-wise](@ref dev_guide_eltwise) primitive in forward training
 /// propagation mode.
 /// @page eltwise_example_cpp Element-Wise Primitive Example
 /// \copybrief eltwise_example_cpp_brief
 ///
 /// @include eltwise.cpp
--- a/examples/primitives/group_normalization.cpp
+++ b/examples/primitives/group_normalization.cpp
@ -17,10 +17,13 @@
 /// @example group_normalization.cpp
 /// > Annotated version: @ref group_normalization_example_cpp
 /// @page group_normalization_example_cpp_brief
 /// @brief This C++ API example demonstrates how to create and execute a
 /// [Group Normalization](@ref dev_guide_group_normalization) primitive in forward
 /// training propagation mode.
 /// @page group_normalization_example_cpp Group Normalization Primitive Example
-/// This C++ API example demonstrates how to create and execute a
+/// \copybrief group_normalization_example_cpp_brief
 /// [Group Normalization](@ref dev_guide_group_normalization) primitive in
 /// forward training propagation mode.
 ///
 /// Key optimizations included in this example:
 /// - In-place primitive execution;
--- a/examples/primitives/inner_product.cpp
+++ b/examples/primitives/inner_product.cpp
@ -17,9 +17,12 @@
 /// @example inner_product.cpp
 /// > Annotated version: @ref inner_product_example_cpp
-/// @page inner_product_example_cpp Inner Product Primitive Example
+/// @page inner_product_example_cpp_brief
-/// This C++ API example demonstrates how to create and execute an
+/// @brief This C++ API example demonstrates how to create and execute an
 /// [Inner Product](@ref dev_guide_inner_product) primitive.
 /// @page inner_product_example_cpp Inner Product Primitive Example
 /// \copybrief inner_product_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - Primitive attributes with fused post-ops;
--- a/examples/primitives/layer_normalization.cpp
+++ b/examples/primitives/layer_normalization.cpp
@ -17,10 +17,13 @@
 /// @example layer_normalization.cpp
 /// > Annotated version: @ref layer_normalization_example_cpp
 /// @page layer_normalization_example_cpp_brief
 /// @brief This C++ API example demonstrates how to create and execute a
 /// [Layer normalization](@ref dev_guide_layer_normalization) primitive in forward
 /// propagation mode.
 /// @page layer_normalization_example_cpp Layer Normalization Primitive Example
-/// This C++ API example demonstrates how to create and execute a
+/// \copybrief layer_normalization_example_cpp_brief
 /// [Layer normalization](@ref dev_guide_layer_normalization) primitive in
 /// forward propagation mode.
 ///
 /// Key optimizations included in this example:
 /// - In-place primitive execution;
--- a/examples/primitives/lbr_gru.cpp
+++ b/examples/primitives/lbr_gru.cpp
@ -17,10 +17,13 @@
 /// @example lbr_gru.cpp
 /// > Annotated version: @ref lbr_gru_example_cpp
-/// @page lbr_gru_example_cpp Linear-Before-Reset GRU RNN Primitive Example
+/// @page lbr_gru_example_cpp_brief
-/// This C++ API example demonstrates how to create and execute a
+/// @brief This C++ API example demonstrates how to create and execute a
 /// [Linear-Before-Reset GRU RNN](@ref dev_guide_rnn) primitive in forward
 /// training propagation mode.
 /// @page lbr_gru_example_cpp Linear-Before-Reset GRU RNN Primitive Example
 /// \copybrief lbr_gru_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - Creation of optimized memory format from the primitive descriptor.
--- a/examples/primitives/lrn.cpp
+++ b/examples/primitives/lrn.cpp
@ -17,10 +17,13 @@
 /// @example lrn.cpp
 /// > Annotated version: @ref lrn_example_cpp
-/// @page lrn_example_cpp Local Response Normalization Primitive Example
+/// @page lrn_example_cpp_brief
-/// This C++ API demonstrates how to create and execute a
+/// @brief This C++ API demonstrates how to create and execute a
 /// [Local response normalization](@ref dev_guide_lrn) primitive in forward
 /// training propagation mode.
 /// @page lrn_example_cpp Local Response Normalization Primitive Example
 /// \copybrief lrn_example_cpp_brief
 ///
 /// @include lrn.cpp
--- a/examples/primitives/lstm.cpp
+++ b/examples/primitives/lstm.cpp
@ -17,10 +17,12 @@
 /// @example lstm.cpp
 /// > Annotated version: @ref lstm_example_cpp
 /// @page lstm_example_cpp_brief
 /// @brief This C++ API example demonstrates how to create and execute an
 /// [LSTM RNN](@ref dev_guide_rnn) primitive in forward training propagation mode.
 /// @page lstm_example_cpp LSTM RNN Primitive Example
-/// This C++ API example demonstrates how to create and execute an
+/// \copybrief lstm_example_cpp_brief
 /// [LSTM RNN](@ref dev_guide_rnn) primitive in forward training propagation
 /// mode.
 ///
 /// Key optimizations included in this example:
 /// - Creation of optimized memory format from the primitive descriptor.
--- a/examples/primitives/matmul.cpp
+++ b/examples/primitives/matmul.cpp
@ -16,9 +16,12 @@
 /// @example matmul.cpp
-/// @page matmul_example_cpp Matmul Primitive Example
+/// @page matmul_example_cpp_brief
-/// This C++ API example demonstrates how to create and execute a
+/// @brief This C++ API example demonstrates how to create and execute a
 /// [MatMul](@ref dev_guide_matmul) primitive.
 /// @page matmul_example_cpp Matmul Primitive Example
 /// \copybrief matmul_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - Primitive attributes with fused post-ops.
--- a/examples/primitives/pooling.cpp
+++ b/examples/primitives/pooling.cpp
@ -17,10 +17,12 @@
 /// @example pooling.cpp
 /// > Annotated version: @ref pooling_example_cpp
 /// @page pooling_example_cpp_brief
 /// @brief This C++ API example demonstrates how to create and execute a
 /// [Pooling](@ref dev_guide_pooling) primitive in forward training propagation mode.
 /// @page pooling_example_cpp Pooling Primitive Example
-/// This C++ API example demonstrates how to create and execute a
+/// \copybrief pooling_example_cpp_brief
 /// [Pooling](@ref dev_guide_pooling) primitive in forward training propagation
 /// mode.
 ///
 /// @include pooling.cpp
--- a/examples/primitives/prelu.cpp
+++ b/examples/primitives/prelu.cpp
@ -17,10 +17,12 @@
 /// @example prelu.cpp
 /// > Annotated version: @ref prelu_example_cpp
 /// @page prelu_example_cpp_brief
 /// @brief This C++ API example demonstrates how to create and execute an
 /// [PReLU](@ref dev_guide_prelu) primitive in forward training propagation mode.
 /// @page prelu_example_cpp Primitive Example
-/// This C++ API example demonstrates how to create and execute an
+/// \copybrief prelu_example_cpp_brief
 /// [PReLU](@ref dev_guide_prelu) primitive in forward training
 /// propagation mode.
 ///
 /// @include prelu.cpp
--- a/examples/primitives/reduction.cpp
+++ b/examples/primitives/reduction.cpp
@ -17,10 +17,12 @@
 /// @example reduction.cpp
 /// > Annotated version: @ref reduction_example_cpp
-/// @page reduction_example_cpp Reduction Primitive Example
+/// @page reduction_example_cpp_brief
-/// This C++ API example demonstrates how to create and execute a
+/// @brief This C++ API example demonstrates how to create and execute a
 /// [Reduction](@ref dev_guide_reduction) primitive.
-///
+
 /// @page reduction_example_cpp Reduction Primitive Example
 /// \copybrief reduction_example_cpp_brief
 /// @include reduction.cpp
 #include <cmath>
--- a/examples/primitives/reorder.cpp
+++ b/examples/primitives/reorder.cpp
@ -17,9 +17,12 @@
 /// @example reorder.cpp
 /// > Annotated version: @ref reorder_example_cpp
-/// @page reorder_example_cpp Reorder Primitive Example
+/// @page reorder_example_cpp_brief
-/// This C++ API demonstrates how to create and execute a
+/// @brief This C++ API demonstrates how to create and execute a
 /// [Reorder](@ref dev_guide_reorder) primitive.
 /// @page reorder_example_cpp Reorder Primitive Example
 /// \copybrief reorder_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - Primitive attributes for output scaling.
--- a/examples/primitives/resampling.cpp
+++ b/examples/primitives/resampling.cpp
@ -17,11 +17,13 @@
 /// @example resampling.cpp
 /// > Annotated version: @ref resampling_example_cpp
-/// @page resampling_example_cpp Resampling Primitive Example
+/// @page resampling_example_cpp_brief
-/// This C++ API example demonstrates how to create and execute a
+/// @brief This C++ API example demonstrates how to create and execute a
 /// [Resampling](@ref dev_guide_resampling) primitive in forward training
 /// propagation mode.
-///
+
 /// @page resampling_example_cpp Resampling Primitive Example
 /// \copybrief resampling_example_cpp_brief
 /// @include resampling.cpp
 #include <algorithm>
--- a/examples/primitives/shuffle.cpp
+++ b/examples/primitives/shuffle.cpp
@ -17,9 +17,12 @@
 /// @example shuffle.cpp
 /// > Annotated version: @ref shuffle_example_cpp
-/// @page shuffle_example_cpp Shuffle Primitive Example
+/// @page shuffle_example_cpp_brief
-/// This C++ API example demonstrates how to create and execute a
+/// @brief This C++ API example demonstrates how to create and execute a
 /// [Shuffle](@ref dev_guide_shuffle) primitive.
 /// @page shuffle_example_cpp Shuffle Primitive Example
 /// \copybrief shuffle_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - Shuffle along axis 1 (channels).
--- a/examples/primitives/softmax.cpp
+++ b/examples/primitives/softmax.cpp
@ -17,10 +17,12 @@
 /// @example softmax.cpp
 /// > Annotated version: @ref softmax_example_cpp
 /// @page softmax_example_cpp_brief
 /// @brief This C++ API example demonstrates how to create and execute a
 /// [Softmax](@ref dev_guide_softmax) primitive in forward training propagation mode.
 /// @page softmax_example_cpp Softmax Primitive Example
-/// This C++ API example demonstrates how to create and execute a
+/// \copybrief softmax_example_cpp_brief
 /// [Softmax](@ref dev_guide_softmax) primitive in forward training propagation
 /// mode.
 ///
 /// Key optimizations included in this example:
 /// - In-place primitive execution;
--- a/examples/primitives/sum.cpp
+++ b/examples/primitives/sum.cpp
@ -17,9 +17,12 @@
 /// @example sum.cpp
 /// > Annotated version: @ref sum_example_cpp
-/// @page sum_example_cpp Sum Primitive Example
+/// @page sum_example_cpp_brief
-/// This C++ API example demonstrates how to create and execute a
+/// @brief This C++ API example demonstrates how to create and execute a
 /// [Sum](@ref dev_guide_sum) primitive.
 /// @page sum_example_cpp Sum Primitive Example
 /// \copybrief sum_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - Identical memory formats for source (src) and destination (dst) tensors.
--- a/examples/primitives/vanilla_rnn.cpp
+++ b/examples/primitives/vanilla_rnn.cpp
@ -17,10 +17,12 @@
 /// @example vanilla_rnn.cpp
 /// > Annotated version: @ref vanilla_rnn_example_cpp
 /// @page vanilla_rnn_example_cpp_brief
 /// @brief This C++ API example demonstrates how to create and execute a
 /// [Vanilla RNN](@ref dev_guide_rnn) primitive in forward training propagation mode.
 /// @page vanilla_rnn_example_cpp Vanilla RNN Primitive Example
-/// This C++ API example demonstrates how to create and execute a
+/// \copybrief vanilla_rnn_example_cpp_brief
 /// [Vanilla RNN](@ref dev_guide_rnn) primitive in forward training propagation
 /// mode.
 ///
 /// Key optimizations included in this example:
 /// - Creation of optimized memory format from the primitive descriptor.
--- a/examples/rnn_training_f32.cpp
+++ b/examples/rnn_training_f32.cpp
@ -17,8 +17,11 @@
 /// @example rnn_training_f32.cpp
 /// > Annotated version: @ref rnn_training_f32_cpp
 /// @page rnn_training_f32_cpp_brief
 /// @brief This C++ API example demonstrates how to build GNMT model training.
 /// @page rnn_training_f32_cpp RNN f32 training example
-/// This C++ API example demonstrates how to build GNMT model training.
+/// \copybrief rnn_training_f32_cpp_brief
 ///
 /// @include rnn_training_f32.cpp
--- a/examples/sycl_interop_buffer.cpp
+++ b/examples/sycl_interop_buffer.cpp
@ -17,11 +17,15 @@
 /// @example  sycl_interop_buffer.cpp
 /// > Annotated version: @ref sycl_interop_buffer_cpp
-/// @page  sycl_interop_buffer_cpp Getting started on both CPU and GPU with SYCL extensions API
+/// @page sycl_interop_buffer_cpp_brief
 /// @brief This C++ API example demonstrates programming for Intel(R) Processor
 /// Graphics with SYCL extensions API in oneDNN.
 /// @page  sycl_interop_buffer_cpp Getting Started with SYCL Extensions API
 /// \copybrief sycl_interop_buffer_cpp_brief
 ///
 /// Full example text: @ref sycl_interop_buffer.cpp
 ///
 /// This C++ API example demonstrates programming for Intel(R) Processor
 /// Graphics with SYCL extensions API in oneDNN.
 /// The workflow includes following steps:
 ///   - Create a GPU or CPU engine. It uses DPC++ as the runtime in this sample.
 ///   - Create a memory descriptor/object.
@ -146,8 +150,10 @@ void sycl_interop_buffer_tutorial(engine::kind engine_kind) {
    /// We are going to create an SYCL kernel that should initialize our data.
    /// To execute SYCL kernel we need a SYCL queue.
-    /// For simplicity we can construct a stream and extract the SYCL queue from it.
+    /// For simplicity we can construct a stream and extract the SYCL queue from
-    /// The kernel initializes the data by the `0, -1, 2, -3, ...` sequence: `data[i] = (-1)^i * i`.
+    /// it.
    /// The kernel initializes the data by the `0, -1, 2, -3, ...` sequence:
    /// `data[i] = (-1)^i * i`.
    /// @snippet sycl_interop_buffer.cpp sycl kernel exec
    // [sycl kernel exec]
    queue q = sycl_interop::get_queue(strm);
@ -185,8 +191,10 @@ void sycl_interop_buffer_tutorial(engine::kind engine_kind) {
    // [relu exec]
    ///
    ///@note
-    ///    With DPC++ runtime, both CPU and GPU have asynchronous execution; However, the user can
+    ///    With DPC++ runtime, both CPU and GPU have asynchronous execution;
-    ///    call dnnl::stream::wait() to synchronize the stream and ensure that all
+    ///    However, the user can
    ///    call dnnl::stream::wait() to synchronize the stream and ensure that
    ///    all
    ///    previously submitted primitives are completed.
    ///
@ -196,8 +204,10 @@ void sycl_interop_buffer_tutorial(engine::kind engine_kind) {
    /// Before running validation codes, we need to access the SYCL memory on
    /// the host.
    /// The simplest way to access the SYCL-backed memory on the host is to
-    /// construct a host accessor. Then we can directly read and write this data on the host.
+    /// construct a host accessor. Then we can directly read and write this data
-    /// However no any conflicting operations are allowed until the host accessor is destroyed.
+    /// on the host.
    /// However no any conflicting operations are allowed until the host
    /// accessor is destroyed.
    /// We can run validation codes on the host accordingly.
    /// @snippet sycl_interop_buffer.cpp Check the results
    // [Check the results]
--- a/examples/sycl_interop_usm.cpp
+++ b/examples/sycl_interop_usm.cpp
@ -17,10 +17,13 @@
 /// @example sycl_interop_usm.cpp
 /// > Annotated version: @ref sycl_interop_usm_cpp
-/// @page sycl_interop_usm_cpp SYCL USM example
+/// @page sycl_interop_usm_cpp_brief
-///
+/// @brief This C++ API example demonstrates programming for Intel(R) Processor
 /// This C++ API example demonstrates programming for Intel(R) Processor
 /// Graphics with SYCL extensions API in oneDNN.
 /// @page sycl_interop_usm_cpp SYCL USM Example
 /// \copybrief sycl_interop_usm_cpp_brief
 ///
 /// The workflow includes following steps:
 ///   - Create a GPU or CPU engine. It uses DPC++ as the runtime in this sample.
 ///   - Create a memory descriptor/object.
--- a/examples/tutorials/matmul/cpu_matmul_quantization.cpp
+++ b/examples/tutorials/matmul/cpu_matmul_quantization.cpp
@ -17,10 +17,13 @@
 /// @example cpu_matmul_quantization.cpp
 /// > Annotated version: @ref cpu_matmul_quantization_cpp
-/// @page cpu_matmul_quantization_cpp MatMul Tutorial: Quantization
+/// @page cpu_matmul_quantization_cpp_brief
-/// C++ API example demonstrating how one can perform reduced precision
+/// @brief C++ API example demonstrating how one can perform reduced precision
 /// matrix-matrix multiplication using [MatMul](@ref dev_guide_matmul) and the
 /// accuracy of the result compared to the floating point computations.
 /// @page cpu_matmul_quantization_cpp MatMul Tutorial: Quantization
 /// \copybrief cpu_matmul_quantization_cpp_brief
 ///
 /// Concepts:
 /// - **Static** and **dynamic** quantization
--- a/examples/tutorials/matmul/cpu_sgemm_and_matmul.cpp
+++ b/examples/tutorials/matmul/cpu_sgemm_and_matmul.cpp
@ -17,9 +17,12 @@
 /// @example cpu_sgemm_and_matmul.cpp
 /// > Annotated version: @ref cpu_sgemm_and_matmul_cpp
 /// @page cpu_sgemm_and_matmul_cpp_brief
 /// @brief C++ API example demonstrating [MatMul](@ref dev_guide_matmul) as a
 /// replacement for SGEMM functions.
 /// @page cpu_sgemm_and_matmul_cpp MatMul Tutorial: Comparison with SGEMM
-/// C++ API example demonstrating [MatMul](@ref dev_guide_matmul)
+/// \copybrief cpu_sgemm_and_matmul_cpp_brief
 /// as a replacement for SGEMM functions.
 ///
 /// Concepts:
 /// - Create primitive once, use multiple times
--- a/examples/tutorials/matmul/inference_int8_matmul.cpp
+++ b/examples/tutorials/matmul/inference_int8_matmul.cpp
@ -16,10 +16,13 @@
 /// @example inference_int8_matmul.cpp
 /// > Annotated version: @ref inference_int8_matmul_cpp
-///
+
-/// @page inference_int8_matmul_cpp MatMul Tutorial: INT8 Inference
+/// @page inference_int8_matmul_cpp_brief
-/// C++ API example demonstrating how one can use
+/// @brief C++ API example demonstrating how one can use
 /// [MatMul](@ref dev_guide_matmul) fused with ReLU in INT8 inference.
 /// @page inference_int8_matmul_cpp MatMul Tutorial: INT8 Inference
 /// \copybrief inference_int8_matmul_cpp_brief
 ///
 /// Concepts:
 /// - Asymmetric quantization
--- a/examples/tutorials/matmul/weights_decompression_matmul.cpp
+++ b/examples/tutorials/matmul/weights_decompression_matmul.cpp
@ -16,10 +16,13 @@
 /// @example weights_decompression_matmul.cpp
 /// > Annotated version: @ref weights_decompression_matmul_cpp
-///
+
-/// @page weights_decompression_matmul_cpp MatMul Tutorial: weights decompression
+/// @page weights_decompression_matmul_cpp_brief
-/// C++ API example demonstrating how one can use
+/// @brief C++ API example demonstrating how one can use
 /// [MatMul](@ref dev_guide_matmul) with compressed weights.
 /// @page weights_decompression_matmul_cpp MatMul Tutorial: Weights Decompression
 /// \copybrief weights_decompression_matmul_cpp_brief
 ///
 /// Concepts:
 /// - Asymmetric quantization
--- a/examples/ukernels/cpu_brgemm.cpp
+++ b/examples/ukernels/cpu_brgemm.cpp
@ -16,10 +16,13 @@
 /// @example cpu_brgemm.cpp
 /// > Annotated version: @ref cpu_brgemm_example_cpp
-///
+
-/// @page cpu_brgemm_example_cpp BRGeMM ukernel example
+/// @page cpu_brgemm_example_cpp_brief
-/// This C++ API example demonstrates how to create and execute a BRGeMM
+/// @brief This C++ API example demonstrates how to create and execute a BRGeMM
 /// ukernel.
 /// @page cpu_brgemm_example_cpp BRGeMM ukernel example
 /// \copybrief cpu_brgemm_example_cpp_brief
 ///
 /// @include cpu_brgemm.cpp