doc, examples: add brief descriptions and update orphans.rst

2025-10-20 10:03:50 +08:00 · 2025-10-08 16:30:52 -07:00
parent b0d9796c0f
commit afdeb6acfb
59 changed files with 374 additions and 145 deletions
--- a/doc/rst/orphans.rst
+++ b/doc/rst/orphans.rst
@ -139,3 +139,61 @@ Orphans
    page_weights_decompression_matmul_cpp.rst
    page_matmul_with_host_scalar_scale_cpp.rst
    page_matmul_f8_quantization_cpp.rst
+    page_augru_example_cpp_brief.rst
+    page_batch_normalization_example_cpp_brief.rst
+    page_binary_example_cpp_brief.rst
+    page_bnorm_u8_via_binary_postops_cpp_brief.rst
+    page_cnn_inference_f32_cpp_brief.rst
+    page_cnn_inference_int8_cpp_brief.rst
+    page_cnn_training_bf16_cpp_brief.rst
+    page_cnn_training_f32_cpp_brief.rst
+    page_concat_example_cpp_brief.rst
+    page_convolution_example_cpp_brief.rst
+    page_cpu_brgemm_example_cpp_brief.rst
+    page_cpu_matmul_coo_cpp_brief.rst
+    page_cpu_matmul_csr_cpp_brief.rst
+    page_cpu_matmul_quantization_cpp_brief.rst
+    page_cpu_matmul_weights_compression_cpp_brief.rst
+    page_cpu_rnn_inference_f32_cpp_brief.rst
+    page_cpu_rnn_inference_int8_cpp_brief.rst
+    page_cpu_sgemm_and_matmul_cpp_brief.rst
+    page_cross_engine_reorder_cpp_brief.rst
+    page_deconvolution_example_cpp_brief.rst
+    page_eltwise_example_cpp_brief.rst
+    page_getting_started_cpp_brief.rst
+    page_gpu_opencl_interop_cpp_brief.rst
+    page_graph_cpu_getting_started_cpp_brief.rst
+    page_graph_cpu_inference_int8_cpp_brief.rst
+    page_graph_cpu_single_op_partition_cpp_brief.rst
+    page_graph_gpu_opencl_getting_started_cpp_brief.rst
+    page_graph_sycl_getting_started_cpp_brief.rst
+    page_graph_sycl_single_op_partition_cpp_brief.rst
+    page_group_normalization_example_cpp_brief.rst
+    page_inference_int8_matmul_cpp_brief.rst
+    page_inner_product_example_cpp_brief.rst
+    page_layer_normalization_example_cpp_brief.rst
+    page_lbr_gru_example_cpp_brief.rst
+    page_lrn_example_cpp_brief.rst
+    page_lstm_example_cpp_brief.rst
+    page_matmul_example_cpp_brief.rst
+    page_matmul_f8_quantization_cpp_brief.rst
+    page_matmul_perf_cpp_brief.rst
+    page_matmul_with_host_scalar_scale_cpp_brief.rst
+    page_memory_format_propagation_cpp_brief.rst
+    page_performance_profiling_cpp_brief.rst
+    page_pooling_example_cpp_brief.rst
+    page_prelu_example_cpp_brief.rst
+    page_reduction_example_cpp_brief.rst
+    page_reorder_example_cpp_brief.rst
+    page_resampling_example_cpp_brief.rst
+    page_rnn_training_f32_cpp_brief.rst
+    page_shuffle_example_cpp_brief.rst
+    page_softmax_example_cpp_brief.rst
+    page_sum_example_cpp_brief.rst
+    page_sycl_interop_buffer_cpp_brief.rst
+    page_sycl_interop_usm_cpp_brief.rst
+    page_vanilla_rnn_example_cpp_brief.rst
+    page_weights_decompression_matmul_cpp_brief.rst
+    page_cnn_inference_f32_c_brief.rst
+    page_cpu_cnn_training_f32_c_brief.rst
+    page_cross_engine_reorder_c_brief.rst
--- a/examples/bnorm_u8_via_binary_postops.cpp
+++ b/examples/bnorm_u8_via_binary_postops.cpp
@ -17,17 +17,19 @@
 /// @example bnorm_u8_via_binary_postops.cpp
 /// > Annotated version: @ref bnorm_u8_via_binary_postops_cpp

-/// @page bnorm_u8_via_binary_postops_cpp Bnorm u8 by binary post-ops example
-/// The example implements the Batch normalization u8 via the following
+/// @page bnorm_u8_via_binary_postops_cpp_brief
+/// @brief The example implements the Batch normalization u8 via the following
 /// operations: binary_sub(src, mean), binary_div(tmp_dst, variance),
 /// binary_mul(tmp_dst, scale), binary_add(tmp_dst, shift).
+
+/// @page bnorm_u8_via_binary_postops_cpp Bnorm u8 by binary post-ops example
+/// \copybrief bnorm_u8_via_binary_postops_cpp_brief
 ///
 /// Some key take-aways include:
-///
-/// * How tensors are implemented and submitted to primitives.
-/// * How primitives are created.
-/// * How to use multiple binary post operations.
-/// * How to use different data types in binary.
+/// - How tensors are implemented and submitted to primitives.
+/// - How primitives are created.
+/// - How to use multiple binary post operations.
+/// - How to use different data types in binary.
 ///
 /// @include bnorm_u8_via_binary_postops.cpp

--- a/examples/cnn_inference_f32.c
+++ b/examples/cnn_inference_f32.c
@ -17,9 +17,12 @@
 /// @example cnn_inference_f32.c
 /// > Annotated version: @ref cnn_inference_f32_c

-/// @page cnn_inference_f32_c CNN f32 inference example
-/// This C API example demonstrates how to build an AlexNet neural
+/// @page cnn_inference_f32_c_brief
+/// @brief This C API example demonstrates how to build an AlexNet neural
 /// network topology for forward-pass inference.
+
+/// @page cnn_inference_f32_c CNN f32 inference example
+/// \copybrief cnn_inference_f32_c_brief
 ///
 /// Some key take-aways include:
 ///
--- a/examples/cnn_inference_f32.cpp
+++ b/examples/cnn_inference_f32.cpp
@ -17,9 +17,12 @@
 /// @example cnn_inference_f32.cpp
 /// > Annotated version: @ref cnn_inference_f32_cpp

+/// @page cnn_inference_f32_cpp_brief
+/// @brief This C++ API example demonstrates how to build an AlexNet neural network
+/// topology for forward-pass inference.
+
 /// @page cnn_inference_f32_cpp CNN f32 inference example
-/// This C++ API example demonstrates how to build an AlexNet neural
-/// network topology for forward-pass inference.
+/// \copybrief cnn_inference_f32_cpp_brief
 ///
 /// > Example code: @ref cnn_inference_f32.cpp
 ///
--- a/examples/cnn_inference_int8.cpp
+++ b/examples/cnn_inference_int8.cpp
@ -17,9 +17,12 @@
 /// @example cnn_inference_int8.cpp
 /// > Annotated version: @ref cnn_inference_int8_cpp

+/// @page cnn_inference_int8_cpp_brief
+/// @brief This C++ API example demonstrates how to run AlexNet's conv3 and relu3 with
+/// int8 data type.
+
 /// @page cnn_inference_int8_cpp CNN int8 inference example
-/// This C++ API example demonstrates how to run AlexNet's conv3 and relu3
-/// with int8 data type.
+/// \copybrief cnn_inference_int8_cpp_brief
 ///
 /// > Example code: @ref cnn_inference_int8.cpp

--- a/examples/cnn_training_bf16.cpp
+++ b/examples/cnn_training_bf16.cpp
@ -17,11 +17,12 @@
 /// @example cnn_training_bf16.cpp
 /// > Annotated version: @ref cnn_training_bf16_cpp
 ///
-/// @page cnn_training_bf16_cpp CNN bf16 training example
-/// This C++ API example demonstrates how to build an AlexNet model training
+/// @page cnn_training_bf16_cpp_brief
+/// @brief This C++ API example demonstrates how to build an AlexNet model training
 /// using the bfloat16 data type.
-///
-/// The example implements a few layers from AlexNet model.
+
+/// @page cnn_training_bf16_cpp CNN bf16 training example
+/// \copybrief cnn_training_bf16_cpp_brief
 ///
 /// @include cnn_training_bf16.cpp

--- a/examples/cnn_training_f32.cpp
+++ b/examples/cnn_training_f32.cpp
@ -17,9 +17,11 @@
 /// @example cnn_training_f32.cpp
 /// > Annotated version: @ref cnn_training_f32_cpp
 ///
+/// @page cnn_training_f32_cpp_brief
+/// @brief This C++ API example demonstrates how to build an AlexNet model training.
+
 /// @page cnn_training_f32_cpp CNN f32 training example
-/// This C++ API example demonstrates how to build an AlexNet model training.
-/// The example implements a few layers from AlexNet model.
+/// \copybrief cnn_training_f32_cpp_brief
 ///
 /// @include cnn_training_f32.cpp

--- a/examples/cpu_cnn_training_f32.c
+++ b/examples/cpu_cnn_training_f32.c
@ -17,9 +17,12 @@
 /// @example cpu_cnn_training_f32.c
 /// > Annotated version: @ref cpu_cnn_training_f32_c

-/// @page cpu_cnn_training_f32_c CNN f32 training example
-/// This C API example demonstrates how to build an AlexNet model training.
+/// @page cpu_cnn_training_f32_c_brief
+/// @brief This C API example demonstrates how to build an AlexNet model training.
 /// The example implements a few layers from AlexNet model.
+
+/// @page cpu_cnn_training_f32_c CNN f32 training example
+/// \copybrief cpu_cnn_training_f32_c_brief
 ///
 /// @include cpu_cnn_training_f32.c

--- a/examples/cpu_matmul_coo.cpp
+++ b/examples/cpu_matmul_coo.cpp
@ -17,10 +17,13 @@
 /// @example cpu_matmul_coo.cpp
 /// > Annotated version: @ref cpu_matmul_coo_cpp

-/// @page cpu_matmul_coo_cpp MatMul Primitive with Sparse Memory in COO Format
-/// This C++ API example demonstrates how to create and execute a
+/// @page cpu_matmul_coo_cpp_brief
+/// @brief This C++ API example demonstrates how to create and execute a
 /// [MatMul](@ref dev_guide_matmul) primitive that uses a source tensor
 /// encoded with the COO sparse encoding.
+
+/// @page cpu_matmul_coo_cpp MatMul Primitive with Sparse Memory in COO Format
+/// \copybrief cpu_matmul_coo_cpp_brief
 ///
 /// @include cpu_matmul_coo.cpp

--- a/examples/cpu_matmul_csr.cpp
+++ b/examples/cpu_matmul_csr.cpp
@ -17,10 +17,13 @@
 /// @example cpu_matmul_csr.cpp
 /// > Annotated version: @ref cpu_matmul_csr_cpp

-/// @page cpu_matmul_csr_cpp MatMul Primitive with Sparse Memory in CSR Format
-/// This C++ API example demonstrates how to create and execute a
+/// @page cpu_matmul_csr_cpp_brief
+/// @brief This C++ API example demonstrates how to create and execute a
 /// [MatMul](@ref dev_guide_matmul) primitive that uses a source tensor
 /// encoded with the CSR sparse encoding.
+
+/// @page cpu_matmul_csr_cpp MatMul Primitive with Sparse Memory in CSR Format
+/// \copybrief cpu_matmul_csr_cpp_brief
 ///
 /// @include cpu_matmul_csr.cpp

--- a/examples/cpu_matmul_weights_compression.cpp
+++ b/examples/cpu_matmul_weights_compression.cpp
@ -17,10 +17,13 @@
 /// @example cpu_matmul_weights_compression.cpp
 /// > Annotated version: @ref cpu_matmul_weights_compression_cpp

-/// @page cpu_matmul_weights_compression_cpp MatMul Primitive Example
-/// This C++ API example demonstrates how to create and execute a
+/// @page cpu_matmul_weights_compression_cpp_brief
+/// @brief This C++ API example demonstrates how to create and execute a
 /// [MatMul](@ref dev_guide_matmul) primitive that uses a weights tensor
 /// encoded with the packed sparse encoding.
+
+/// @page cpu_matmul_weights_compression_cpp MatMul Primitive Example
+/// \copybrief cpu_matmul_weights_compression_cpp_brief
 ///
 /// @include cpu_matmul_weights_compression.cpp

--- a/examples/cpu_rnn_inference_f32.cpp
+++ b/examples/cpu_rnn_inference_f32.cpp
@ -17,8 +17,11 @@
 /// @example cpu_rnn_inference_f32.cpp
 /// > Annotated version: @ref cpu_rnn_inference_f32_cpp

-/// @page cpu_rnn_inference_f32_cpp RNN f32 inference example
-/// This C++ API example demonstrates how to build GNMT model inference.
+/// @page cpu_rnn_inference_f32_cpp_brief
+/// @brief This C++ API example demonstrates how to build GNMT model inference.
+
+/// @page cpu_rnn_inference_f32_cpp RNN f32 Inference Example
+/// \copybrief cpu_rnn_inference_f32_cpp_brief
 ///
 /// > Example code: @ref cpu_rnn_inference_f32.cpp
 ///
--- a/examples/cpu_rnn_inference_int8.cpp
+++ b/examples/cpu_rnn_inference_int8.cpp
@ -17,8 +17,11 @@
 /// @example cpu_rnn_inference_int8.cpp
 /// > Annotated version: @ref cpu_rnn_inference_int8_cpp

+/// @page cpu_rnn_inference_int8_cpp_brief
+/// @brief This C++ API example demonstrates how to build GNMT model inference.
+
 /// @page cpu_rnn_inference_int8_cpp RNN int8 inference example
-/// This C++ API example demonstrates how to build GNMT model inference.
+/// \copybrief cpu_rnn_inference_int8_cpp_brief
 ///
 /// > Example code: @ref cpu_rnn_inference_int8.cpp
 ///
--- a/examples/cross_engine_reorder.c
+++ b/examples/cross_engine_reorder.c
@ -17,9 +17,12 @@
 /// @example cross_engine_reorder.c
 /// > Annotated version: @ref cross_engine_reorder_c

-/// @page cross_engine_reorder_c Reorder between CPU and GPU engines
-/// This C API example demonstrates programming flow when reordering memory
+/// @page cross_engine_reorder_c_brief
+/// @brief This C API example demonstrates programming flow when reordering memory
 /// between CPU and GPU engines.
+
+/// @page cross_engine_reorder_c Reorder between CPU and GPU engines
+/// \copybrief cross_engine_reorder_c_brief
 ///
 /// @include cross_engine_reorder.c

--- a/examples/cross_engine_reorder.cpp
+++ b/examples/cross_engine_reorder.cpp
@ -17,9 +17,12 @@
 /// @example cross_engine_reorder.cpp
 /// > Annotated version: @ref cross_engine_reorder_cpp

-/// @page cross_engine_reorder_cpp Reorder between CPU and GPU engines
-/// This C++ API example demonstrates programming flow when reordering memory
+/// @page cross_engine_reorder_cpp_brief
+/// @brief This C++ API example demonstrates programming flow when reordering memory
 /// between CPU and GPU engines.
+
+/// @page cross_engine_reorder_cpp Reorder between CPU and GPU engines
+/// \copybrief cross_engine_reorder_cpp_brief
 ///
 /// > Example code: @ref cross_engine_reorder.cpp
 ///
--- a/examples/getting_started.cpp
+++ b/examples/getting_started.cpp
@ -29,10 +29,15 @@

 using namespace dnnl;

+/// @page getting_started_cpp_brief
+/// @brief This C++ API example demonstrates the basics of the oneDNN programming model.
+
 /// @page getting_started_cpp oneDNN API Basic Workflow Tutorial
+/// \copybrief getting_started_cpp_brief
+///
 /// > Example code: @ref getting_started.cpp
 ///
-/// This C++ API example demonstrates the basics of the oneDNN programming model:
+/// Key concepts:
 /// - How to create oneDNN memory objects.
 ///   - How to get data from the user's buffer into a oneDNN memory object.
 ///   - How a tensor's logical dimensions and memory object formats relate.
--- a/examples/gpu_opencl_interop.cpp
+++ b/examples/gpu_opencl_interop.cpp
@ -17,9 +17,12 @@
 /// @example gpu_opencl_interop.cpp
 /// > Annotated version: @ref gpu_opencl_interop_cpp

-/// @page gpu_opencl_interop_cpp Getting started on GPU with OpenCL extensions API
-/// This C++ API example demonstrates programming for Intel(R) Processor
+/// @page gpu_opencl_interop_cpp_brief
+/// @brief This C++ API example demonstrates programming for Intel(R) Processor
 /// Graphics with OpenCL* extensions API in oneDNN.
+
+/// @page gpu_opencl_interop_cpp Getting started on GPU with OpenCL extensions API
+/// \copybrief gpu_opencl_interop_cpp_brief
 ///
 /// > Example code: @ref gpu_opencl_interop.cpp
 ///
--- a/examples/graph/cpu_getting_started.cpp
+++ b/examples/graph/cpu_getting_started.cpp
@ -17,9 +17,12 @@
 /// @example cpu_getting_started.cpp
 /// > Annotated version: @ref graph_cpu_getting_started_cpp

-/// @page graph_cpu_getting_started_cpp Getting started on CPU with Graph API
-/// This is an example to demonstrate how to build a simple graph and run it on
+/// @page graph_cpu_getting_started_cpp_brief
+/// @brief This is an example to demonstrate how to build a simple graph and run it on
 /// CPU.
+
+/// @page graph_cpu_getting_started_cpp Getting started on CPU with Graph API
+/// \copybrief graph_cpu_getting_started_cpp_brief
 ///
 /// > Example code: @ref cpu_getting_started.cpp
 ///
--- a/examples/graph/cpu_inference_int8.cpp
+++ b/examples/graph/cpu_inference_int8.cpp
@ -17,9 +17,12 @@
 /// @example cpu_inference_int8.cpp
 /// Annotated version: @ref graph_cpu_inference_int8_cpp

+/// @page graph_cpu_inference_int8_cpp_brief
+/// @brief This is an example to demonstrate how to build an int8 graph with Graph API
+/// and run it on CPU.
+
 /// @page graph_cpu_inference_int8_cpp Convolution int8 inference example with Graph API
-/// This is an example to demonstrate how to build an int8 graph with Graph
-/// API and run it on CPU.
+/// \copybrief graph_cpu_inference_int8_cpp_brief
 ///
 /// > Example code: @ref cpu_inference_int8.cpp
 ///
--- a/examples/graph/cpu_single_op_partition.cpp
+++ b/examples/graph/cpu_single_op_partition.cpp
@ -17,8 +17,12 @@
 /// @example cpu_single_op_partition.cpp
 /// > Annotated version: @ref graph_cpu_single_op_partition_cpp

+/// @page graph_cpu_single_op_partition_cpp_brief
+/// @brief This is an example to demonstrate how to build a simple op graph and run it
+/// on CPU.
+
 /// @page graph_cpu_single_op_partition_cpp Single op partition on CPU
-/// This is an example to demonstrate how to build a simple op graph and run it on CPU.
+/// \copybrief graph_cpu_single_op_partition_cpp_brief
 ///
 /// > Example code: @ref cpu_single_op_partition.cpp
 ///
--- a/examples/graph/gpu_opencl_getting_started.cpp
+++ b/examples/graph/gpu_opencl_getting_started.cpp
@ -17,9 +17,12 @@
 /// @example gpu_opencl_getting_started.cpp
 /// > Annotated version: @ref graph_gpu_opencl_getting_started_cpp

-/// @page graph_gpu_opencl_getting_started_cpp Getting started with OpenCL extensions and Graph API
-/// This is an example to demonstrate how to build a simple graph and run on
+/// @page graph_gpu_opencl_getting_started_cpp_brief
+/// @brief This is an example to demonstrate how to build a simple graph and run on
 /// OpenCL GPU runtime.
+
+/// @page graph_gpu_opencl_getting_started_cpp Getting started with OpenCL extensions and Graph API
+/// \copybrief graph_gpu_opencl_getting_started_cpp_brief
 ///
 /// > Example code: @ref gpu_opencl_getting_started.cpp
 ///
--- a/examples/graph/sycl_getting_started.cpp
+++ b/examples/graph/sycl_getting_started.cpp
@ -17,9 +17,12 @@
 /// @example sycl_getting_started.cpp
 /// > Annotated version: @ref graph_sycl_getting_started_cpp

-/// @page graph_sycl_getting_started_cpp Getting started with SYCL extensions API and Graph API
-/// This is an example to demonstrate how to build a simple graph and run on
+/// @page graph_sycl_getting_started_cpp_brief
+/// @brief This is an example to demonstrate how to build a simple graph and run on
 /// SYCL device.
+
+/// @page graph_sycl_getting_started_cpp Getting started with SYCL extensions API and Graph API
+/// \copybrief graph_sycl_getting_started_cpp_brief
 ///
 /// > Example code: @ref sycl_getting_started.cpp
 ///
--- a/examples/graph/sycl_single_op_partition.cpp
+++ b/examples/graph/sycl_single_op_partition.cpp
@ -17,8 +17,12 @@
 /// @example sycl_single_op_partition.cpp
 /// > Annotated version: @ref graph_sycl_single_op_partition_cpp

+/// @page graph_sycl_single_op_partition_cpp_brief
+/// @brief This is an example to demonstrate how to build a simple op graph and run it
+/// on GPU.
+
 /// @page graph_sycl_single_op_partition_cpp Single op partition on GPU
-/// This is an example to demonstrate how to build a simple op graph and run it on gpu.
+/// \copybrief graph_sycl_single_op_partition_cpp_brief
 ///
 /// > Example code: @ref sycl_single_op_partition.cpp
 ///
--- a/examples/matmul_f8_quantization.cpp
+++ b/examples/matmul_f8_quantization.cpp
@ -16,10 +16,13 @@

 /// @example matmul_f8_quantization.cpp
 /// > Annotated version: @ref matmul_f8_quantization_cpp
-///
+
+/// @page matmul_f8_quantization_cpp_brief
+/// @brief C++ API example demonstrating how to use f8_e5m2 and f8_e4m3 data types for
+/// [MatMul](@ref dev_guide_matmul) with scaling for quantization.
+
 /// @page matmul_f8_quantization_cpp Matrix Multiplication with f8 Quantization
-/// C++ API example demonstrating how to use f8_e5m2 and f8_e4m3 data types
-/// for [MatMul](@ref dev_guide_matmul) with scaling for quantization.
+/// \copybrief matmul_f8_quantization_cpp_brief
 ///
 /// Specification of f8 Formats:
 /// - **f8_e5m2**: 1 sign + 5 exponent + 2 mantissa bits, max value is 57,344.
@ -27,8 +30,8 @@
 ///
 /// Concepts:
 /// - f8 quantization.
-///   - f8_e5m2 and f8_e4m3 data type conversion from f32 is done using [Reorder primitive](@ref dev_guide_reorder)
-///     with simple scaling factors.
+///   - f8_e5m2 and f8_e4m3 data type conversion from f32 is done using
+///     [Reorder primitive](@ref dev_guide_reorder) with simple scaling factors.
 /// - Matrix multiplication with f8 inputs and f32 output.
 ///   - Scaling is done using dnnl::primitive_attr::set_scales_mask().
 ///
--- a/examples/matmul_perf.cpp
+++ b/examples/matmul_perf.cpp
@ -17,9 +17,12 @@
 /// @example matmul_perf.cpp
 /// > Annotated version: @ref matmul_perf_cpp

-/// @page matmul_perf_cpp Matrix Multiplication Performance Example
-/// This C++ example runs a simple matrix multiplication (matmul) performance
+/// @page matmul_perf_cpp_brief
+/// @brief This C++ example runs a simple matrix multiplication (matmul) performance
 /// test using oneDNN.
+
+/// @page matmul_perf_cpp Matrix Multiplication Performance Example
+/// \copybrief matmul_perf_cpp_brief
 ///
 /// The workflow includes following steps:
 ///   - Set up and execute a matmul operation with the specified engine kind
--- a/examples/matmul_with_host_scalar_scale.cpp
+++ b/examples/matmul_with_host_scalar_scale.cpp
@ -28,10 +28,13 @@ using namespace dnnl;
 /// @example matmul_with_host_scalar_scale.cpp
 /// > Annotated version: @ref matmul_with_host_scalar_scale_cpp

+/// @page matmul_with_host_scalar_scale_cpp_brief
+/// @brief This C++ API example demonstrates matrix multiplication (C = alpha * A * B)
+/// with a scalar scaling factor residing on the host.
+
 /// @page matmul_with_host_scalar_scale_cpp MatMul with Host Scalar Scale example
+/// \copybrief matmul_with_host_scalar_scale_cpp_brief
 ///
-/// This C++ API example demonstrates matrix multiplication (C = alpha * A * B)
-/// with a scalar scale factor using oneDNN.
 /// The workflow includes following steps:
 /// - Initialize a oneDNN engine and stream for computation.
 /// - Allocate and initialize matrices A and B.
--- a/examples/memory_format_propagation.cpp
+++ b/examples/memory_format_propagation.cpp
@ -21,9 +21,12 @@
 #include <sstream>
 #include <string>

-/// @page memory_format_propagation_cpp Memory Format Propagation
-/// This example demonstrates memory format propagation, which is critical for
+/// @page memory_format_propagation_cpp_brief
+/// @brief This example demonstrates memory format propagation, which is critical for
 /// deep learning applications performance.
+
+/// @page memory_format_propagation_cpp Memory Format Propagation
+/// \copybrief memory_format_propagation_cpp_brief
 ///
 /// > Example code: @ref memory_format_propagation.cpp
 ///
--- a/examples/performance_profiling.cpp
+++ b/examples/performance_profiling.cpp
@ -17,9 +17,12 @@
 /// @example performance_profiling.cpp
 /// > Annotated version: @ref performance_profiling_cpp

-/// @page performance_profiling_cpp Performance Profiling Example
-/// This example demonstrates the best practices for application performance
+/// @page performance_profiling_cpp_brief
+/// @brief This example demonstrates the best practices for application performance
 /// optimizations with oneDNN.
+
+/// @page performance_profiling_cpp Performance Profiling Example
+/// \copybrief performance_profiling_cpp_brief
 ///
 /// > Example code: @ref performance_profiling.cpp
 ///
--- a/examples/primitives/augru.cpp
+++ b/examples/primitives/augru.cpp
@ -17,11 +17,12 @@
 /// @example augru.cpp
 /// > Annotated version: @ref augru_example_cpp

+/// @page augru_example_cpp_brief
+/// @brief This C++ API example demonstrates how to create and execute an
+/// [AUGRU RNN](@ref dev_guide_rnn) primitive in forward training propagation mode.
+
 /// @page augru_example_cpp AUGRU RNN Primitive Example
-///
-/// This C++ API example demonstrates how to create and execute an
-/// [AUGRU RNN](@ref dev_guide_rnn) primitive in forward training propagation
-/// mode.
+/// \copybrief augru_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - Creation of optimized memory format from the primitive descriptor.
--- a/examples/primitives/batch_normalization.cpp
+++ b/examples/primitives/batch_normalization.cpp
@ -17,11 +17,13 @@
 /// @example batch_normalization.cpp
 /// > Annotated version: @ref batch_normalization_example_cpp

+/// @page batch_normalization_example_cpp_brief
+/// @brief This C++ API example demonstrates how to create and execute a
+/// [Batch Normalization](@ref dev_guide_batch_normalization) primitive in forward
+/// training propagation mode.
+
 /// @page batch_normalization_example_cpp Batch Normalization Primitive Example
-///
-/// This C++ API example demonstrates how to create and execute a
-/// [Batch Normalization](@ref dev_guide_batch_normalization) primitive in
-/// forward training propagation mode.
+/// \copybrief batch_normalization_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - In-place primitive execution;
--- a/examples/primitives/binary.cpp
+++ b/examples/primitives/binary.cpp
@ -17,9 +17,12 @@
 /// @example binary.cpp
 /// > Annotated version: @ref binary_example_cpp

-/// @page binary_example_cpp Binary Primitive Example
-/// This C++ API example demonstrates how to create and execute a
+/// @page binary_example_cpp_brief
+/// @brief This C++ API example demonstrates how to create and execute a
 /// [Binary](@ref dev_guide_binary) primitive.
+
+/// @page binary_example_cpp Binary Primitive Example
+/// \copybrief binary_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - In-place primitive execution;
--- a/examples/primitives/concat.cpp
+++ b/examples/primitives/concat.cpp
@ -17,9 +17,12 @@
 /// @example concat.cpp
 /// > Annotated version: @ref concat_example_cpp

-/// @page concat_example_cpp Concat Primitive Example
-/// This C++ API example demonstrates how to create and execute a
+/// @page concat_example_cpp_brief
+/// @brief This C++ API example demonstrates how to create and execute a
 /// [Concat](@ref dev_guide_concat) primitive.
+
+/// @page concat_example_cpp Concat Primitive Example
+/// \copybrief concat_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - Identical source (src) memory formats.
--- a/examples/primitives/convolution.cpp
+++ b/examples/primitives/convolution.cpp
@ -17,10 +17,13 @@
 /// @example convolution.cpp
 /// > Annotated version: @ref convolution_example_cpp

-/// @page convolution_example_cpp Convolution Primitive Example
-/// This C++ API example demonstrates how to create and execute a
+/// @page convolution_example_cpp_brief
+/// @brief This C++ API example demonstrates how to create and execute a
 /// [Convolution](@ref dev_guide_convolution) primitive in forward propagation
 /// mode in two configurations - with and without groups.
+
+/// @page convolution_example_cpp Convolution Primitive Example
+/// \copybrief convolution_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - Creation of optimized memory format from the primitive descriptor;
--- a/examples/primitives/deconvolution.cpp
+++ b/examples/primitives/deconvolution.cpp
@ -17,10 +17,13 @@
 /// @example deconvolution.cpp
 /// > Annotated version: @ref deconvolution_example_cpp

-/// @page deconvolution_example_cpp Deconvolution Primitive Example
-/// This C++ API example demonstrates how to create and execute a
+/// @page deconvolution_example_cpp_brief
+/// @brief This C++ API example demonstrates how to create and execute a
 /// [Deconvolution](@ref dev_guide_convolution) primitive in forward propagation
 /// mode.
+
+/// @page deconvolution_example_cpp Deconvolution Primitive Example
+/// \copybrief deconvolution_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - Creation of optimized memory format from the primitive descriptor;
--- a/examples/primitives/eltwise.cpp
+++ b/examples/primitives/eltwise.cpp
@ -17,10 +17,13 @@
 /// @example eltwise.cpp
 /// > Annotated version: @ref eltwise_example_cpp

-/// @page eltwise_example_cpp Element-Wise Primitive Example
-/// This C++ API example demonstrates how to create and execute an
+/// @page eltwise_example_cpp_brief
+/// @brief This C++ API example demonstrates how to create and execute an
 /// [Element-wise](@ref dev_guide_eltwise) primitive in forward training
 /// propagation mode.
+
+/// @page eltwise_example_cpp Element-Wise Primitive Example
+/// \copybrief eltwise_example_cpp_brief
 ///
 /// @include eltwise.cpp

--- a/examples/primitives/group_normalization.cpp
+++ b/examples/primitives/group_normalization.cpp
@ -17,10 +17,13 @@
 /// @example group_normalization.cpp
 /// > Annotated version: @ref group_normalization_example_cpp

+/// @page group_normalization_example_cpp_brief
+/// @brief This C++ API example demonstrates how to create and execute a
+/// [Group Normalization](@ref dev_guide_group_normalization) primitive in forward
+/// training propagation mode.
+
 /// @page group_normalization_example_cpp Group Normalization Primitive Example
-/// This C++ API example demonstrates how to create and execute a
-/// [Group Normalization](@ref dev_guide_group_normalization) primitive in
-/// forward training propagation mode.
+/// \copybrief group_normalization_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - In-place primitive execution;
--- a/examples/primitives/inner_product.cpp
+++ b/examples/primitives/inner_product.cpp
@ -17,9 +17,12 @@
 /// @example inner_product.cpp
 /// > Annotated version: @ref inner_product_example_cpp

-/// @page inner_product_example_cpp Inner Product Primitive Example
-/// This C++ API example demonstrates how to create and execute an
+/// @page inner_product_example_cpp_brief
+/// @brief This C++ API example demonstrates how to create and execute an
 /// [Inner Product](@ref dev_guide_inner_product) primitive.
+
+/// @page inner_product_example_cpp Inner Product Primitive Example
+/// \copybrief inner_product_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - Primitive attributes with fused post-ops;
--- a/examples/primitives/layer_normalization.cpp
+++ b/examples/primitives/layer_normalization.cpp
@ -17,10 +17,13 @@
 /// @example layer_normalization.cpp
 /// > Annotated version: @ref layer_normalization_example_cpp

+/// @page layer_normalization_example_cpp_brief
+/// @brief This C++ API example demonstrates how to create and execute a
+/// [Layer normalization](@ref dev_guide_layer_normalization) primitive in forward
+/// propagation mode.
+
 /// @page layer_normalization_example_cpp Layer Normalization Primitive Example
-/// This C++ API example demonstrates how to create and execute a
-/// [Layer normalization](@ref dev_guide_layer_normalization) primitive in
-/// forward propagation mode.
+/// \copybrief layer_normalization_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - In-place primitive execution;
--- a/examples/primitives/lbr_gru.cpp
+++ b/examples/primitives/lbr_gru.cpp
@ -17,10 +17,13 @@
 /// @example lbr_gru.cpp
 /// > Annotated version: @ref lbr_gru_example_cpp

-/// @page lbr_gru_example_cpp Linear-Before-Reset GRU RNN Primitive Example
-/// This C++ API example demonstrates how to create and execute a
+/// @page lbr_gru_example_cpp_brief
+/// @brief This C++ API example demonstrates how to create and execute a
 /// [Linear-Before-Reset GRU RNN](@ref dev_guide_rnn) primitive in forward
 /// training propagation mode.
+
+/// @page lbr_gru_example_cpp Linear-Before-Reset GRU RNN Primitive Example
+/// \copybrief lbr_gru_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - Creation of optimized memory format from the primitive descriptor.
--- a/examples/primitives/lrn.cpp
+++ b/examples/primitives/lrn.cpp
@ -17,10 +17,13 @@
 /// @example lrn.cpp
 /// > Annotated version: @ref lrn_example_cpp

-/// @page lrn_example_cpp Local Response Normalization Primitive Example
-/// This C++ API demonstrates how to create and execute a
+/// @page lrn_example_cpp_brief
+/// @brief This C++ API demonstrates how to create and execute a
 /// [Local response normalization](@ref dev_guide_lrn) primitive in forward
 /// training propagation mode.
+
+/// @page lrn_example_cpp Local Response Normalization Primitive Example
+/// \copybrief lrn_example_cpp_brief
 ///
 /// @include lrn.cpp

--- a/examples/primitives/lstm.cpp
+++ b/examples/primitives/lstm.cpp
@ -17,10 +17,12 @@
 /// @example lstm.cpp
 /// > Annotated version: @ref lstm_example_cpp

+/// @page lstm_example_cpp_brief
+/// @brief This C++ API example demonstrates how to create and execute an
+/// [LSTM RNN](@ref dev_guide_rnn) primitive in forward training propagation mode.
+
 /// @page lstm_example_cpp LSTM RNN Primitive Example
-/// This C++ API example demonstrates how to create and execute an
-/// [LSTM RNN](@ref dev_guide_rnn) primitive in forward training propagation
-/// mode.
+/// \copybrief lstm_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - Creation of optimized memory format from the primitive descriptor.
--- a/examples/primitives/matmul.cpp
+++ b/examples/primitives/matmul.cpp
@ -16,9 +16,12 @@

 /// @example matmul.cpp

-/// @page matmul_example_cpp Matmul Primitive Example
-/// This C++ API example demonstrates how to create and execute a
+/// @page matmul_example_cpp_brief
+/// @brief This C++ API example demonstrates how to create and execute a
 /// [MatMul](@ref dev_guide_matmul) primitive.
+
+/// @page matmul_example_cpp Matmul Primitive Example
+/// \copybrief matmul_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - Primitive attributes with fused post-ops.
--- a/examples/primitives/pooling.cpp
+++ b/examples/primitives/pooling.cpp
@ -17,10 +17,12 @@
 /// @example pooling.cpp
 /// > Annotated version: @ref pooling_example_cpp

+/// @page pooling_example_cpp_brief
+/// @brief This C++ API example demonstrates how to create and execute a
+/// [Pooling](@ref dev_guide_pooling) primitive in forward training propagation mode.
+
 /// @page pooling_example_cpp Pooling Primitive Example
-/// This C++ API example demonstrates how to create and execute a
-/// [Pooling](@ref dev_guide_pooling) primitive in forward training propagation
-/// mode.
+/// \copybrief pooling_example_cpp_brief
 ///
 /// @include pooling.cpp

--- a/examples/primitives/prelu.cpp
+++ b/examples/primitives/prelu.cpp
@ -17,10 +17,12 @@
 /// @example prelu.cpp
 /// > Annotated version: @ref prelu_example_cpp

+/// @page prelu_example_cpp_brief
+/// @brief This C++ API example demonstrates how to create and execute an
+/// [PReLU](@ref dev_guide_prelu) primitive in forward training propagation mode.
+
 /// @page prelu_example_cpp Primitive Example
-/// This C++ API example demonstrates how to create and execute an
-/// [PReLU](@ref dev_guide_prelu) primitive in forward training
-/// propagation mode.
+/// \copybrief prelu_example_cpp_brief
 ///
 /// @include prelu.cpp

--- a/examples/primitives/reduction.cpp
+++ b/examples/primitives/reduction.cpp
@ -17,10 +17,12 @@
 /// @example reduction.cpp
 /// > Annotated version: @ref reduction_example_cpp

-/// @page reduction_example_cpp Reduction Primitive Example
-/// This C++ API example demonstrates how to create and execute a
+/// @page reduction_example_cpp_brief
+/// @brief This C++ API example demonstrates how to create and execute a
 /// [Reduction](@ref dev_guide_reduction) primitive.
-///
+
+/// @page reduction_example_cpp Reduction Primitive Example
+/// \copybrief reduction_example_cpp_brief
 /// @include reduction.cpp

 #include <cmath>
--- a/examples/primitives/reorder.cpp
+++ b/examples/primitives/reorder.cpp
@ -17,9 +17,12 @@
 /// @example reorder.cpp
 /// > Annotated version: @ref reorder_example_cpp

-/// @page reorder_example_cpp Reorder Primitive Example
-/// This C++ API demonstrates how to create and execute a
+/// @page reorder_example_cpp_brief
+/// @brief This C++ API demonstrates how to create and execute a
 /// [Reorder](@ref dev_guide_reorder) primitive.
+
+/// @page reorder_example_cpp Reorder Primitive Example
+/// \copybrief reorder_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - Primitive attributes for output scaling.
--- a/examples/primitives/resampling.cpp
+++ b/examples/primitives/resampling.cpp
@ -17,11 +17,13 @@
 /// @example resampling.cpp
 /// > Annotated version: @ref resampling_example_cpp

-/// @page resampling_example_cpp Resampling Primitive Example
-/// This C++ API example demonstrates how to create and execute a
+/// @page resampling_example_cpp_brief
+/// @brief This C++ API example demonstrates how to create and execute a
 /// [Resampling](@ref dev_guide_resampling) primitive in forward training
 /// propagation mode.
-///
+
+/// @page resampling_example_cpp Resampling Primitive Example
+/// \copybrief resampling_example_cpp_brief
 /// @include resampling.cpp

 #include <algorithm>
--- a/examples/primitives/shuffle.cpp
+++ b/examples/primitives/shuffle.cpp
@ -17,9 +17,12 @@
 /// @example shuffle.cpp
 /// > Annotated version: @ref shuffle_example_cpp

-/// @page shuffle_example_cpp Shuffle Primitive Example
-/// This C++ API example demonstrates how to create and execute a
+/// @page shuffle_example_cpp_brief
+/// @brief This C++ API example demonstrates how to create and execute a
 /// [Shuffle](@ref dev_guide_shuffle) primitive.
+
+/// @page shuffle_example_cpp Shuffle Primitive Example
+/// \copybrief shuffle_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - Shuffle along axis 1 (channels).
--- a/examples/primitives/softmax.cpp
+++ b/examples/primitives/softmax.cpp
@ -17,10 +17,12 @@
 /// @example softmax.cpp
 /// > Annotated version: @ref softmax_example_cpp

+/// @page softmax_example_cpp_brief
+/// @brief This C++ API example demonstrates how to create and execute a
+/// [Softmax](@ref dev_guide_softmax) primitive in forward training propagation mode.
+
 /// @page softmax_example_cpp Softmax Primitive Example
-/// This C++ API example demonstrates how to create and execute a
-/// [Softmax](@ref dev_guide_softmax) primitive in forward training propagation
-/// mode.
+/// \copybrief softmax_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - In-place primitive execution;
--- a/examples/primitives/sum.cpp
+++ b/examples/primitives/sum.cpp
@ -17,9 +17,12 @@
 /// @example sum.cpp
 /// > Annotated version: @ref sum_example_cpp

-/// @page sum_example_cpp Sum Primitive Example
-/// This C++ API example demonstrates how to create and execute a
+/// @page sum_example_cpp_brief
+/// @brief This C++ API example demonstrates how to create and execute a
 /// [Sum](@ref dev_guide_sum) primitive.
+
+/// @page sum_example_cpp Sum Primitive Example
+/// \copybrief sum_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - Identical memory formats for source (src) and destination (dst) tensors.
--- a/examples/primitives/vanilla_rnn.cpp
+++ b/examples/primitives/vanilla_rnn.cpp
@ -17,10 +17,12 @@
 /// @example vanilla_rnn.cpp
 /// > Annotated version: @ref vanilla_rnn_example_cpp

+/// @page vanilla_rnn_example_cpp_brief
+/// @brief This C++ API example demonstrates how to create and execute a
+/// [Vanilla RNN](@ref dev_guide_rnn) primitive in forward training propagation mode.
+
 /// @page vanilla_rnn_example_cpp Vanilla RNN Primitive Example
-/// This C++ API example demonstrates how to create and execute a
-/// [Vanilla RNN](@ref dev_guide_rnn) primitive in forward training propagation
-/// mode.
+/// \copybrief vanilla_rnn_example_cpp_brief
 ///
 /// Key optimizations included in this example:
 /// - Creation of optimized memory format from the primitive descriptor.
--- a/examples/rnn_training_f32.cpp
+++ b/examples/rnn_training_f32.cpp
@ -17,8 +17,11 @@
 /// @example rnn_training_f32.cpp
 /// > Annotated version: @ref rnn_training_f32_cpp

+/// @page rnn_training_f32_cpp_brief
+/// @brief This C++ API example demonstrates how to build GNMT model training.
+
 /// @page rnn_training_f32_cpp RNN f32 training example
-/// This C++ API example demonstrates how to build GNMT model training.
+/// \copybrief rnn_training_f32_cpp_brief
 ///
 /// @include rnn_training_f32.cpp

--- a/examples/sycl_interop_buffer.cpp
+++ b/examples/sycl_interop_buffer.cpp
@ -17,11 +17,15 @@
 /// @example  sycl_interop_buffer.cpp
 /// > Annotated version: @ref sycl_interop_buffer_cpp

-/// @page  sycl_interop_buffer_cpp Getting started on both CPU and GPU with SYCL extensions API
+/// @page sycl_interop_buffer_cpp_brief
+/// @brief This C++ API example demonstrates programming for Intel(R) Processor
+/// Graphics with SYCL extensions API in oneDNN.
+
+/// @page  sycl_interop_buffer_cpp Getting Started with SYCL Extensions API
+/// \copybrief sycl_interop_buffer_cpp_brief
+///
 /// Full example text: @ref sycl_interop_buffer.cpp
 ///
-/// This C++ API example demonstrates programming for Intel(R) Processor
-/// Graphics with SYCL extensions API in oneDNN.
 /// The workflow includes following steps:
 ///   - Create a GPU or CPU engine. It uses DPC++ as the runtime in this sample.
 ///   - Create a memory descriptor/object.
@ -146,8 +150,10 @@ void sycl_interop_buffer_tutorial(engine::kind engine_kind) {

    /// We are going to create an SYCL kernel that should initialize our data.
    /// To execute SYCL kernel we need a SYCL queue.
-    /// For simplicity we can construct a stream and extract the SYCL queue from it.
-    /// The kernel initializes the data by the `0, -1, 2, -3, ...` sequence: `data[i] = (-1)^i * i`.
+    /// For simplicity we can construct a stream and extract the SYCL queue from
+    /// it.
+    /// The kernel initializes the data by the `0, -1, 2, -3, ...` sequence:
+    /// `data[i] = (-1)^i * i`.
    /// @snippet sycl_interop_buffer.cpp sycl kernel exec
    // [sycl kernel exec]
    queue q = sycl_interop::get_queue(strm);
@ -185,8 +191,10 @@ void sycl_interop_buffer_tutorial(engine::kind engine_kind) {
    // [relu exec]
    ///
    ///@note
-    ///    With DPC++ runtime, both CPU and GPU have asynchronous execution; However, the user can
-    ///    call dnnl::stream::wait() to synchronize the stream and ensure that all
+    ///    With DPC++ runtime, both CPU and GPU have asynchronous execution;
+    ///    However, the user can
+    ///    call dnnl::stream::wait() to synchronize the stream and ensure that
+    ///    all
    ///    previously submitted primitives are completed.
    ///

@ -196,8 +204,10 @@ void sycl_interop_buffer_tutorial(engine::kind engine_kind) {
    /// Before running validation codes, we need to access the SYCL memory on
    /// the host.
    /// The simplest way to access the SYCL-backed memory on the host is to
-    /// construct a host accessor. Then we can directly read and write this data on the host.
-    /// However no any conflicting operations are allowed until the host accessor is destroyed.
+    /// construct a host accessor. Then we can directly read and write this data
+    /// on the host.
+    /// However no any conflicting operations are allowed until the host
+    /// accessor is destroyed.
    /// We can run validation codes on the host accordingly.
    /// @snippet sycl_interop_buffer.cpp Check the results
    // [Check the results]
--- a/examples/sycl_interop_usm.cpp
+++ b/examples/sycl_interop_usm.cpp
@ -17,10 +17,13 @@
 /// @example sycl_interop_usm.cpp
 /// > Annotated version: @ref sycl_interop_usm_cpp

-/// @page sycl_interop_usm_cpp SYCL USM example
-///
-/// This C++ API example demonstrates programming for Intel(R) Processor
+/// @page sycl_interop_usm_cpp_brief
+/// @brief This C++ API example demonstrates programming for Intel(R) Processor
 /// Graphics with SYCL extensions API in oneDNN.
+
+/// @page sycl_interop_usm_cpp SYCL USM Example
+/// \copybrief sycl_interop_usm_cpp_brief
+///
 /// The workflow includes following steps:
 ///   - Create a GPU or CPU engine. It uses DPC++ as the runtime in this sample.
 ///   - Create a memory descriptor/object.
--- a/examples/tutorials/matmul/cpu_matmul_quantization.cpp
+++ b/examples/tutorials/matmul/cpu_matmul_quantization.cpp
@ -17,10 +17,13 @@
 /// @example cpu_matmul_quantization.cpp
 /// > Annotated version: @ref cpu_matmul_quantization_cpp

-/// @page cpu_matmul_quantization_cpp MatMul Tutorial: Quantization
-/// C++ API example demonstrating how one can perform reduced precision
+/// @page cpu_matmul_quantization_cpp_brief
+/// @brief C++ API example demonstrating how one can perform reduced precision
 /// matrix-matrix multiplication using [MatMul](@ref dev_guide_matmul) and the
 /// accuracy of the result compared to the floating point computations.
+
+/// @page cpu_matmul_quantization_cpp MatMul Tutorial: Quantization
+/// \copybrief cpu_matmul_quantization_cpp_brief
 ///
 /// Concepts:
 /// - **Static** and **dynamic** quantization
--- a/examples/tutorials/matmul/cpu_sgemm_and_matmul.cpp
+++ b/examples/tutorials/matmul/cpu_sgemm_and_matmul.cpp
@ -17,9 +17,12 @@
 /// @example cpu_sgemm_and_matmul.cpp
 /// > Annotated version: @ref cpu_sgemm_and_matmul_cpp

+/// @page cpu_sgemm_and_matmul_cpp_brief
+/// @brief C++ API example demonstrating [MatMul](@ref dev_guide_matmul) as a
+/// replacement for SGEMM functions.
+
 /// @page cpu_sgemm_and_matmul_cpp MatMul Tutorial: Comparison with SGEMM
-/// C++ API example demonstrating [MatMul](@ref dev_guide_matmul)
-/// as a replacement for SGEMM functions.
+/// \copybrief cpu_sgemm_and_matmul_cpp_brief
 ///
 /// Concepts:
 /// - Create primitive once, use multiple times
--- a/examples/tutorials/matmul/inference_int8_matmul.cpp
+++ b/examples/tutorials/matmul/inference_int8_matmul.cpp
@ -16,10 +16,13 @@

 /// @example inference_int8_matmul.cpp
 /// > Annotated version: @ref inference_int8_matmul_cpp
-///
-/// @page inference_int8_matmul_cpp MatMul Tutorial: INT8 Inference
-/// C++ API example demonstrating how one can use
+
+/// @page inference_int8_matmul_cpp_brief
+/// @brief C++ API example demonstrating how one can use
 /// [MatMul](@ref dev_guide_matmul) fused with ReLU in INT8 inference.
+
+/// @page inference_int8_matmul_cpp MatMul Tutorial: INT8 Inference
+/// \copybrief inference_int8_matmul_cpp_brief
 ///
 /// Concepts:
 /// - Asymmetric quantization
--- a/examples/tutorials/matmul/weights_decompression_matmul.cpp
+++ b/examples/tutorials/matmul/weights_decompression_matmul.cpp
@ -16,10 +16,13 @@

 /// @example weights_decompression_matmul.cpp
 /// > Annotated version: @ref weights_decompression_matmul_cpp
-///
-/// @page weights_decompression_matmul_cpp MatMul Tutorial: weights decompression
-/// C++ API example demonstrating how one can use
+
+/// @page weights_decompression_matmul_cpp_brief
+/// @brief C++ API example demonstrating how one can use
 /// [MatMul](@ref dev_guide_matmul) with compressed weights.
+
+/// @page weights_decompression_matmul_cpp MatMul Tutorial: Weights Decompression
+/// \copybrief weights_decompression_matmul_cpp_brief
 ///
 /// Concepts:
 /// - Asymmetric quantization
--- a/examples/ukernels/cpu_brgemm.cpp
+++ b/examples/ukernels/cpu_brgemm.cpp
@ -16,10 +16,13 @@

 /// @example cpu_brgemm.cpp
 /// > Annotated version: @ref cpu_brgemm_example_cpp
-///
-/// @page cpu_brgemm_example_cpp BRGeMM ukernel example
-/// This C++ API example demonstrates how to create and execute a BRGeMM
+
+/// @page cpu_brgemm_example_cpp_brief
+/// @brief This C++ API example demonstrates how to create and execute a BRGeMM
 /// ukernel.
+
+/// @page cpu_brgemm_example_cpp BRGeMM ukernel example
+/// \copybrief cpu_brgemm_example_cpp_brief
 ///
 /// @include cpu_brgemm.cpp