mirror of
https://github.com/uxlfoundation/oneDNN.git
synced 2025-10-20 18:43:49 +08:00
280 lines
10 KiB
C++
280 lines
10 KiB
C++
/*******************************************************************************
|
|
* Copyright 2021-2025 Intel Corporation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*******************************************************************************/
|
|
|
|
#ifndef TEST_GEMM_DATA_PREPARATION_H
|
|
#define TEST_GEMM_DATA_PREPARATION_H
|
|
|
|
#include "test_gemm_params.hpp"
|
|
|
|
#include "dnnl_test_common.hpp"
|
|
|
|
namespace dnnl {
|
|
/*
|
|
* To reduce the time spent in GEMM validation the test matrices A, B, and C
|
|
* are generated from sub-matrices (A', B', and C') of smaller size:
|
|
* - A(M, K) <-> A'(M_test, K)
|
|
* - B(K, N) <-> B'(K, N_test)
|
|
* - C(M, N) <-> C'(M_test, N_test)
|
|
*
|
|
* The matrices A', B', and C' are generated randomly. Then:
|
|
* - A(m, k) := A'(mapper_m[m], k),
|
|
* - B(k, n) := B'(k, mapper_n[n]),
|
|
* - C(m, n) := C'(mapper_m[m], mapper_n[n]);
|
|
*
|
|
* Here `mapper_x[]` is surjection of {0, ..., X-1} onto {0, ..., X_test-1}.
|
|
* For simplicity mapper_x[x] = x, for x in {0, ..., X_test-1}.
|
|
*
|
|
* This technique allows reducing the complexity of the validation code from
|
|
* O(M*N*K) to O(M_test * N_test * K).
|
|
*
|
|
* X_test := min(X, X_test_max), where X_test_max is prime number around 50.
|
|
*
|
|
* To make the test robust the surjective functions mapper_m and mapper_n
|
|
* should randomly map the elements {X_test, ..., X-1} onto {0, ..., X_test-1}.
|
|
*/
|
|
|
|
static constexpr int M_test_max = 47;
|
|
static constexpr int N_test_max = 53;
|
|
|
|
/** Mapper:
|
|
* a surjective function from {0, ..., dim-1} onto {0, ..., dim_test-1}.
|
|
*/
|
|
struct mapper_t {
|
|
mapper_t(const mapper_t &other) = default;
|
|
|
|
mapper_t(mapper_t &&other) noexcept
|
|
: dim_(other.dim_)
|
|
, dim_test_(other.dim_test_)
|
|
, gen_(other.gen_)
|
|
, gen_start_(other.gen_start_)
|
|
, mapper_(std::move(other.mapper_)) {}
|
|
|
|
mapper_t(int64_t dim, int64_t dim_test_max, int64_t gen = 7,
|
|
int64_t gen_start = 13)
|
|
: dim_(dim)
|
|
, dim_test_((std::min)(dim, dim_test_max))
|
|
, gen_(gen)
|
|
, gen_start_(gen_start)
|
|
, mapper_(dim) {
|
|
for (int64_t d = 0; d < dim_test_; ++d)
|
|
mapper_[d] = d;
|
|
for (int64_t g = gen_start_ % dim_test_, d = dim_test_; d < dim_; ++d) {
|
|
mapper_[d] = mapper_[g];
|
|
g = g * gen_ % dim_test_;
|
|
}
|
|
}
|
|
|
|
int64_t dim() const { return dim_; }
|
|
int64_t dim_test() const { return dim_test_; }
|
|
int64_t operator[](int64_t d) const { return mapper_[d]; }
|
|
|
|
private:
|
|
const int64_t dim_;
|
|
const int64_t dim_test_;
|
|
const int64_t gen_, gen_start_;
|
|
std::vector<int64_t> mapper_;
|
|
};
|
|
|
|
struct test_gemm_data_t {
|
|
std::shared_ptr<test_memory> a_mem;
|
|
std::shared_ptr<test_memory> b_mem;
|
|
std::shared_ptr<test_memory> c_mem;
|
|
std::shared_ptr<test_memory> c_ref_mem;
|
|
std::shared_ptr<test_memory> oc_mem;
|
|
std::shared_ptr<mapper_t> mapper_m;
|
|
std::shared_ptr<mapper_t> mapper_n;
|
|
};
|
|
|
|
/** Prepares matrix A or B according to the dimension mapper.
|
|
* The K dimension is always assumed to be columns, hence:
|
|
* - A layout = A_is_transposed ? ROW_MAJOR : COL_MAJOR
|
|
* - B layout = B_is_transposed ? COL_MAJOR : ROW_MAJOR
|
|
*/
|
|
template <typename data_t>
|
|
void prepare_matrix(const test_memory &M_mem, int64_t off_beg, layout_t layout,
|
|
int64_t R, int64_t C, int64_t LD, const mapper_t &mapper) {
|
|
auto M = map_memory<data_t>(M_mem);
|
|
auto dt = data_traits_t<data_t>::data_type;
|
|
bool is_fp = (false || dt == memory::data_type::f16
|
|
|| dt == memory::data_type::bf16 || dt == memory::data_type::f32);
|
|
const data_t mean = (data_t)(is_fp ? 1.f : 4);
|
|
const data_t var = (data_t)(is_fp ? 2e-1f : 3);
|
|
|
|
ASSERT_EQ(R, mapper.dim());
|
|
const int R_test = mapper.dim_test();
|
|
|
|
if (layout == layout_t::COL_MAJOR) {
|
|
dnnl::impl::parallel_nd(C, R_test, [&](int64_t c, int64_t r) {
|
|
const int64_t off = c * LD + r;
|
|
M[off_beg + off] = set_value<data_t>(off, mean, var, 1.);
|
|
});
|
|
if (R > R_test) {
|
|
const int64_t R_rest = R - R_test;
|
|
dnnl::impl::parallel_nd(C, R_rest, [&](int64_t c, int64_t r_) {
|
|
const int64_t r = R_test + r_;
|
|
const int64_t off = c * LD + r;
|
|
const int64_t off0 = c * LD + mapper[r];
|
|
M[off_beg + off] = M[off_beg + off0];
|
|
});
|
|
}
|
|
} else {
|
|
dnnl::impl::parallel_nd(R_test, C, [&](int64_t r, int64_t c) {
|
|
const int64_t off = r * LD + c;
|
|
M[off_beg + off] = set_value<data_t>(off, mean, var, 1.);
|
|
});
|
|
if (R > R_test) {
|
|
const int64_t R_rest = R - R_test;
|
|
dnnl::impl::parallel_nd(R_rest, C, [&](int64_t r_, int64_t c) {
|
|
const int64_t r = R_test + r_;
|
|
const int64_t off = r * LD + c;
|
|
const int64_t off0 = mapper[r] * LD + c;
|
|
M[off_beg + off] = M[off_beg + off0];
|
|
});
|
|
}
|
|
}
|
|
|
|
// To test if igemm row/col sum are correct when performing sign/zero
|
|
// extensions.
|
|
if (dt == memory::data_type::u8)
|
|
M[off_beg] = data_t(UINT8_MAX);
|
|
else if (dt == memory::data_type::s8)
|
|
M[off_beg] = data_t(-64);
|
|
}
|
|
|
|
/** Extends columns of the matrix M according to the mapper_c */
|
|
template <typename data_t>
|
|
void extend_matrix_cols(const test_memory &M_mem, int64_t off, int64_t R,
|
|
int64_t C, int64_t LD, const mapper_t &mapper_c) {
|
|
auto M = map_memory<data_t>(M_mem);
|
|
ASSERT_EQ(C, mapper_c.dim());
|
|
const int64_t C_test = mapper_c.dim_test();
|
|
if (C_test == C) return;
|
|
|
|
dnnl::impl::parallel_nd(R, C - C_test, [&](int64_t r, int64_t c_) {
|
|
const int64_t c = C_test + c_;
|
|
const int64_t c0 = mapper_c[c];
|
|
M[off + r * LD + c] = M[off + r * LD + c0];
|
|
});
|
|
}
|
|
|
|
/** Extends rows of the matrix M according to the mapper_r */
|
|
template <typename data_t>
|
|
void extend_matrix_rows(const test_memory &M_mem, int64_t off, int64_t R,
|
|
int64_t C, int64_t LD, const mapper_t &mapper_r) {
|
|
auto M = map_memory<data_t>(M_mem);
|
|
ASSERT_EQ(R, mapper_r.dim());
|
|
const int64_t R_test = mapper_r.dim_test();
|
|
if (R_test == R) return;
|
|
|
|
dnnl::impl::parallel_nd(R - R_test, [&](int64_t r_) {
|
|
const int64_t r = R_test + r_;
|
|
const int64_t r0 = mapper_r[r];
|
|
for (int64_t c = 0; c < C; ++c)
|
|
M[off + r * LD + c] = M[off + r0 * LD + c];
|
|
});
|
|
}
|
|
|
|
/** Extends matrix M according to the mapper_r and mapper_c */
|
|
template <typename data_t>
|
|
void extend_matrix(const test_memory &M_mem, int64_t off, int64_t R, int64_t C,
|
|
int64_t LD, const mapper_t &mapper_r, const mapper_t &mapper_c) {
|
|
ASSERT_EQ(R, mapper_r.dim());
|
|
ASSERT_EQ(C, mapper_c.dim());
|
|
extend_matrix_rows<data_t>(M_mem, off, R, C, LD, mapper_r);
|
|
extend_matrix_cols<data_t>(M_mem, off, R, C, LD, mapper_c);
|
|
}
|
|
|
|
inline void get_matrix_size(
|
|
const test_params_t &p, size_t &sizeA, size_t &sizeB, size_t &sizeC) {
|
|
const bool tr_a = (p.transA == 'T' || p.transA == 't');
|
|
const bool tr_b = (p.transB == 'T' || p.transB == 't');
|
|
sizeA = tr_a ? p.lda * p.K : p.lda * p.M;
|
|
sizeB = tr_b ? p.ldb * p.N : p.ldb * p.K;
|
|
sizeC = p.ldc * p.M;
|
|
}
|
|
|
|
template <typename T>
|
|
inline memory::desc get_matrix_md(memory::dim n, memory::dim off) {
|
|
return create_md(
|
|
{n + off}, data_traits_t<T>::data_type, memory::format_tag::x);
|
|
}
|
|
|
|
template <typename a_dt, typename b_dt, typename c_dt>
|
|
void fill_matrices(const test_params_t &p, const mapper_t &mapper_m,
|
|
const mapper_t &mapper_n, const test_memory &a_mem,
|
|
const test_memory &b_mem, const test_memory &c_mem,
|
|
const test_memory &c_ref_mem, const test_memory &oc_mem) {
|
|
prepare_matrix<a_dt>(a_mem, p.off.a,
|
|
p.tr_a() ? layout_t::COL_MAJOR : layout_t::ROW_MAJOR, p.M, p.K,
|
|
p.lda, mapper_m);
|
|
prepare_matrix<b_dt>(b_mem, p.off.b,
|
|
p.tr_b() ? layout_t::ROW_MAJOR : layout_t::COL_MAJOR, p.N, p.K,
|
|
p.ldb, mapper_n);
|
|
|
|
fill_data<c_dt>(p.off.c + p.sizeC(), c_mem.get());
|
|
extend_matrix<c_dt>(c_mem, p.off.c, p.M, p.N, p.ldc, mapper_m, mapper_n);
|
|
{
|
|
auto C = map_memory<c_dt>(c_mem);
|
|
auto C_ref = map_memory<c_dt>(c_ref_mem);
|
|
dnnl::impl::parallel_nd(p.sizeC(),
|
|
[&](int64_t i) { C_ref[p.off.c + i] = C[p.off.c + i]; });
|
|
}
|
|
|
|
if (oc_mem.get_size() == 0) return;
|
|
|
|
if (p.igemm_params.nonzero_oc) {
|
|
fill_data<c_dt>(p.size_oc(), oc_mem.get(), (c_dt)1, (c_dt)0);
|
|
if (p.oc_is_R()) {
|
|
extend_matrix_cols<c_dt>(oc_mem, 0, 1, p.N, p.N, mapper_n);
|
|
} else if (p.oc_is_C()) {
|
|
extend_matrix_rows<c_dt>(oc_mem, 0, p.M, 1, 1, mapper_m);
|
|
}
|
|
} else {
|
|
auto oc = map_memory<c_dt>(oc_mem);
|
|
for (int64_t i = 0; i < p.size_oc(); i++)
|
|
oc[i] = 0;
|
|
}
|
|
}
|
|
|
|
template <typename a_dt, typename b_dt, typename c_dt>
|
|
void prepare_data_for_gemm_testing(
|
|
const test_params_t &p, test_gemm_data_t &gemm_data, engine &eng) {
|
|
size_t sizeA, sizeB, sizeC;
|
|
get_matrix_size(p, sizeA, sizeB, sizeC);
|
|
|
|
gemm_data.a_mem = std::make_shared<test_memory>(
|
|
get_matrix_md<a_dt>(sizeA, p.off.a), eng);
|
|
gemm_data.b_mem = std::make_shared<test_memory>(
|
|
get_matrix_md<b_dt>(sizeB, p.off.b), eng);
|
|
gemm_data.c_mem = std::make_shared<test_memory>(
|
|
get_matrix_md<c_dt>(sizeC, p.off.c), eng);
|
|
gemm_data.c_ref_mem = std::make_shared<test_memory>(
|
|
get_matrix_md<c_dt>(sizeC, p.off.c), eng);
|
|
gemm_data.oc_mem = std::make_shared<test_memory>(
|
|
get_matrix_md<c_dt>(p.size_oc(), p.off.co), eng);
|
|
|
|
gemm_data.mapper_m = std::make_shared<mapper_t>(p.M, M_test_max);
|
|
gemm_data.mapper_n = std::make_shared<mapper_t>(p.N, N_test_max);
|
|
|
|
fill_matrices<a_dt, b_dt, c_dt>(p, *gemm_data.mapper_m, *gemm_data.mapper_n,
|
|
*gemm_data.a_mem, *gemm_data.b_mem, *gemm_data.c_mem,
|
|
*gemm_data.c_ref_mem, *gemm_data.oc_mem);
|
|
}
|
|
|
|
} // namespace dnnl
|
|
|
|
#endif
|