mirror of
https://github.com/uxlfoundation/oneDNN.git
synced 2025-10-20 18:43:49 +08:00
271 lines
11 KiB
C++
271 lines
11 KiB
C++
/*******************************************************************************
|
|
* Copyright 2019-2025 Intel Corporation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*******************************************************************************/
|
|
|
|
/// @example sycl_interop_buffer.cpp
|
|
/// > Annotated version: @ref sycl_interop_buffer_cpp
|
|
|
|
/// @page sycl_interop_buffer_cpp_brief
|
|
/// @brief This C++ API example demonstrates programming for Intel(R) Processor
|
|
/// Graphics with SYCL extensions API in oneDNN.
|
|
|
|
/// @page sycl_interop_buffer_cpp Getting Started with SYCL Extensions API
|
|
/// \copybrief sycl_interop_buffer_cpp_brief
|
|
///
|
|
/// Full example text: @ref sycl_interop_buffer.cpp
|
|
///
|
|
/// The workflow includes following steps:
|
|
/// - Create a GPU or CPU engine. It uses DPC++ as the runtime in this sample.
|
|
/// - Create a memory descriptor/object.
|
|
/// - Create a SYCL kernel for data initialization.
|
|
/// - Access a SYCL buffer via SYCL interoperability interface.
|
|
/// - Access a SYCL queue via SYCL interoperability interface.
|
|
/// - Execute a SYCL kernel with related SYCL queue and SYCL buffer
|
|
/// - Create primitives descriptor/primitive.
|
|
/// - Execute the primitive with the initialized memory.
|
|
/// - Validate the result through a host accessor.
|
|
///
|
|
/// @page sycl_interop_buffer_cpp
|
|
|
|
/// @section sycl_interop_buffer_cpp_headers Public headers
|
|
///
|
|
/// To start using oneDNN, we must first include the @ref dnnl.hpp
|
|
/// header file in the application. We also include sycl/sycl.hpp from DPC++ for
|
|
/// using SYCL APIs and @ref dnnl_debug.h, which contains some debugging
|
|
/// facilities such as returning a string representation
|
|
/// for common oneDNN C types.
|
|
/// All C++ API types and functions reside in the `dnnl` namespace, and
|
|
/// SYCL API types and functions reside in the `cl::sycl` namespace.
|
|
/// For simplicity of the example we import both namespaces.
|
|
/// @page sycl_interop_buffer_cpp
|
|
/// @snippet sycl_interop_buffer.cpp Prologue
|
|
// [Prologue]
|
|
|
|
#include "example_utils.hpp"
|
|
#include "oneapi/dnnl/dnnl.hpp"
|
|
#include "oneapi/dnnl/dnnl_debug.h"
|
|
#include "oneapi/dnnl/dnnl_sycl.hpp"
|
|
|
|
#if __has_include(<sycl/sycl.hpp>)
|
|
#include <sycl/sycl.hpp>
|
|
#else
|
|
#error "Unsupported compiler"
|
|
#endif
|
|
|
|
#include <cassert>
|
|
#include <iostream>
|
|
#include <numeric>
|
|
|
|
using namespace dnnl;
|
|
using namespace sycl;
|
|
// [Prologue]
|
|
|
|
class kernel_tag;
|
|
|
|
/// @page sycl_interop_buffer_cpp
|
|
/// @section sycl_interop_buffer_cpp_tutorial sycl_interop_buffer_tutorial() function
|
|
/// @page sycl_interop_buffer_cpp
|
|
void sycl_interop_buffer_tutorial(engine::kind engine_kind) {
|
|
|
|
/// @page sycl_interop_buffer_cpp
|
|
/// @subsection sycl_interop_buffer_cpp_sub1 Engine and stream
|
|
///
|
|
/// All oneDNN primitives and memory objects are attached to a
|
|
/// particular @ref dnnl::engine, which is an abstraction of a
|
|
/// computational device (see also @ref dev_guide_basic_concepts). The
|
|
/// primitives are created and optimized for the device to which they are
|
|
/// attached, and the memory objects refer to memory residing on the
|
|
/// corresponding device. In particular, that means neither memory objects
|
|
/// nor primitives that were created for one engine can be used on
|
|
/// another.
|
|
///
|
|
/// To create engines, we must specify the @ref dnnl::engine::kind
|
|
/// and the index of the device of the given kind. In this example we use
|
|
/// the first available GPU or CPU engine, so the index for the engine is 0.
|
|
/// This example assumes DPC++ being a runtime. In such case,
|
|
/// during engine creation, an SYCL context is also created and attaches
|
|
/// to the created engine.
|
|
///
|
|
/// @snippet sycl_interop_buffer.cpp Initialize engine
|
|
// [Initialize engine]
|
|
engine eng(engine_kind, 0);
|
|
// [Initialize engine]
|
|
|
|
/// In addition to an engine, all primitives require a @ref dnnl::stream
|
|
/// for the execution. The stream encapsulates an execution context and is
|
|
/// tied to a particular engine.
|
|
///
|
|
/// In this example, a stream is created.
|
|
/// This example assumes DPC++ being a runtime. During stream creation,
|
|
/// a SYCL queue is also created and attaches to this stream.
|
|
///
|
|
/// @snippet sycl_interop_buffer.cpp Initialize stream
|
|
// [Initialize stream]
|
|
dnnl::stream strm(eng);
|
|
// [Initialize stream]
|
|
|
|
/// @subsection sycl_interop_buffer_cpp_sub2 Wrapping data into oneDNN memory object
|
|
///
|
|
/// Next, we create a memory object. We need to specify dimensions of our
|
|
/// memory by passing a memory::dims object. Then we create a memory
|
|
/// descriptor with these dimensions, with the dnnl::memory::data_type::f32
|
|
/// data type, and with the dnnl::memory::format_tag::nchw memory format.
|
|
/// Finally, we construct a memory object and pass the memory descriptor.
|
|
/// The library allocates memory internally.
|
|
/// @snippet sycl_interop_buffer.cpp memory alloc
|
|
// [memory alloc]
|
|
memory::dims tz_dims = {2, 3, 4, 5};
|
|
const size_t N = std::accumulate(tz_dims.begin(), tz_dims.end(), (size_t)1,
|
|
std::multiplies<size_t>());
|
|
|
|
memory::desc mem_d(
|
|
tz_dims, memory::data_type::f32, memory::format_tag::nchw);
|
|
|
|
memory mem = sycl_interop::make_memory(
|
|
mem_d, eng, sycl_interop::memory_kind::buffer);
|
|
// [memory alloc]
|
|
|
|
/// @subsection sycl_interop_buffer_cpp_sub3 Initialize the data executing a custom SYCL kernel
|
|
///
|
|
/// The underlying SYCL buffer can be extracted from the memory object using
|
|
/// the interoperability interface:
|
|
/// `dnnl::sycl_interop_buffer::get_buffer<T>(const dnnl::memory)`.
|
|
///
|
|
/// @snippet sycl_interop_buffer.cpp get sycl buf
|
|
// [get sycl buf]
|
|
auto sycl_buf = sycl_interop::get_buffer<float>(mem);
|
|
// [get sycl buf]
|
|
|
|
/// We are going to create an SYCL kernel that should initialize our data.
|
|
/// To execute SYCL kernel we need a SYCL queue.
|
|
/// For simplicity we can construct a stream and extract the SYCL queue from
|
|
/// it.
|
|
/// The kernel initializes the data by the `0, -1, 2, -3, ...` sequence:
|
|
/// `data[i] = (-1)^i * i`.
|
|
/// @snippet sycl_interop_buffer.cpp sycl kernel exec
|
|
// [sycl kernel exec]
|
|
queue q = sycl_interop::get_queue(strm);
|
|
q.submit([&](handler &cgh) {
|
|
auto a = sycl_buf.get_access<access::mode::write>(cgh);
|
|
cgh.parallel_for<kernel_tag>(range<1>(N), [=](id<1> i) {
|
|
int idx = (int)i[0];
|
|
a[idx] = (idx % 2) ? -idx : idx;
|
|
});
|
|
});
|
|
// [sycl kernel exec]
|
|
|
|
/// @subsection sycl_interop_buffer_cpp_sub4 Create and execute a primitive
|
|
/// There are two steps to create an operation primitive in oneDNN:
|
|
/// 1. Create a primitive descriptor.
|
|
/// 2. Create a primitive.
|
|
///
|
|
/// Let's create the primitive to perform the ReLU (rectified linear unit)
|
|
/// operation: x = max(0, x). Primitive descriptors are attached to a
|
|
/// specific engine and represent some implementation for this engine.
|
|
/// A primitive object is a realization of a primitive descriptor,
|
|
/// and its construction is usually much "heavier".
|
|
/// @snippet sycl_interop_buffer.cpp relu creation
|
|
// [relu creation]
|
|
auto relu_pd = eltwise_forward::primitive_desc(eng, prop_kind::forward,
|
|
algorithm::eltwise_relu, mem_d, mem_d, 0.0f);
|
|
auto relu = eltwise_forward(relu_pd);
|
|
// [relu creation]
|
|
|
|
/// Next, execute the primitive.
|
|
/// @snippet sycl_interop_buffer.cpp relu exec
|
|
// [relu exec]
|
|
relu.execute(strm, {{DNNL_ARG_SRC, mem}, {DNNL_ARG_DST, mem}});
|
|
strm.wait();
|
|
// [relu exec]
|
|
///
|
|
///@note
|
|
/// With DPC++ runtime, both CPU and GPU have asynchronous execution;
|
|
/// However, the user can
|
|
/// call dnnl::stream::wait() to synchronize the stream and ensure that
|
|
/// all
|
|
/// previously submitted primitives are completed.
|
|
///
|
|
|
|
/// @page sycl_interop_buffer_cpp
|
|
/// @subsection sycl_interop_buffer_cpp_sub5 Validate the results
|
|
///
|
|
/// Before running validation codes, we need to access the SYCL memory on
|
|
/// the host.
|
|
/// The simplest way to access the SYCL-backed memory on the host is to
|
|
/// construct a host accessor. Then we can directly read and write this data
|
|
/// on the host.
|
|
/// However no any conflicting operations are allowed until the host
|
|
/// accessor is destroyed.
|
|
/// We can run validation codes on the host accordingly.
|
|
/// @snippet sycl_interop_buffer.cpp Check the results
|
|
// [Check the results]
|
|
auto host_acc = sycl_buf.get_host_access();
|
|
for (size_t i = 0; i < N; i++) {
|
|
float exp_value = (i % 2) ? 0.0f : i;
|
|
if (host_acc[i] != (float)exp_value)
|
|
throw std::string(
|
|
"Unexpected output, find a negative value after the ReLU "
|
|
"execution.");
|
|
}
|
|
// [Check the results]
|
|
}
|
|
|
|
/// @section sycl_interop_buffer_cpp_main main() function
|
|
///
|
|
/// We now just call everything we prepared earlier.
|
|
///
|
|
/// Because we are using the oneDNN C++ API, we use exceptions to handle
|
|
/// errors (see @ref dev_guide_c_and_cpp_apis). The oneDNN C++ API throws
|
|
/// exceptions of type @ref dnnl::error, which contains the error status
|
|
/// (of type @ref dnnl_status_t) and a human-readable error message accessible
|
|
/// through the regular `what()` method.
|
|
/// @page sycl_interop_buffer_cpp
|
|
/// @snippet sycl_interop_buffer.cpp Main
|
|
// [Main]
|
|
int main(int argc, char **argv) {
|
|
int exit_code = 0;
|
|
|
|
engine::kind engine_kind = parse_engine_kind(argc, argv);
|
|
try {
|
|
sycl_interop_buffer_tutorial(engine_kind);
|
|
} catch (dnnl::error &e) {
|
|
std::cout << "oneDNN error caught: " << std::endl
|
|
<< "\tStatus: " << dnnl_status2str(e.status) << std::endl
|
|
<< "\tMessage: " << e.what() << std::endl;
|
|
exit_code = 1;
|
|
} catch (std::string &e) {
|
|
std::cout << "Error in the example: " << e << "." << std::endl;
|
|
exit_code = 2;
|
|
} catch (exception &e) {
|
|
std::cout << "Error in the example: " << e.what() << "." << std::endl;
|
|
exit_code = 3;
|
|
}
|
|
|
|
std::cout << "Example " << (exit_code ? "failed" : "passed") << " on "
|
|
<< engine_kind2str_upper(engine_kind) << "." << std::endl;
|
|
finalize();
|
|
return exit_code;
|
|
}
|
|
// [Main]
|
|
/// <b></b>
|
|
///
|
|
/// Upon compiling and running the example, the output should be just:
|
|
///
|
|
/// ~~~
|
|
/// Example passed.
|
|
/// ~~~
|
|
///
|
|
/// @page sycl_interop_buffer_cpp
|