mirror of
https://github.com/uxlfoundation/oneDNN.git
synced 2025-10-20 18:43:49 +08:00
build: cmake: unsupported compilers cleanup
This commit is contained in:
committed by
Stefan Palicki
parent
c7827a8f3a
commit
664b22abd5
19
README.md
19
README.md
@ -295,9 +295,9 @@ Runtime-specific dependencies:
|
||||
|
||||
### Validated Configurations
|
||||
|
||||
CPU engine was validated on RedHat\* Enterprise Linux 8 with
|
||||
* GNU Compiler Collection 5.4, 6.1, 7.2, 8.1, 9.1, 11.1, 11.3
|
||||
* Clang\* 7.1, 8.0, 9.0, 14.0.6
|
||||
x86-64 CPU engine was validated on RedHat\* Enterprise Linux 8 with
|
||||
* GNU Compiler Collection 8.5, 9.5, 11.1, 11.3
|
||||
* Clang\* 11.0, 14.0.6
|
||||
* [Intel oneAPI DPC++/C++ Compiler] 2024.0
|
||||
|
||||
on Windows Server\* 2019 with
|
||||
@ -307,16 +307,19 @@ on Windows Server\* 2019 with
|
||||
on macOS 11 (Big Sur) with
|
||||
* Apple LLVM version 13.0
|
||||
|
||||
on Ubuntu 20.04 AArch64 with
|
||||
* GNU Compiler Collection 7.0, 8.0, 9.0, 10.0
|
||||
* Clang\* 9.0, 17.0
|
||||
AArch64 CPU engine was validated on Ubuntu 22.04 with
|
||||
* GNU Compiler Collection 10.0, 13.0
|
||||
* Clang\* 17.0
|
||||
* [Arm Compiler for Linux] 24.04
|
||||
* [Arm Compute Library (ACL)] built for armv8-a arch, latest stable version
|
||||
available at the time of release
|
||||
|
||||
on macOS 14 (Sonoma) with
|
||||
* Apple LLVM version 15.0
|
||||
|
||||
GPU engine was validated on Ubuntu\* 22.04 with
|
||||
* GNU Compiler Collection 7.2, 8.1, and 9.1
|
||||
* Clang 7.1, 8.0, 9.0
|
||||
* GNU Compiler Collection 8.5, and 9.5
|
||||
* Clang 11.0
|
||||
* [Intel oneAPI DPC++/C++ Compiler] 2024.0
|
||||
* [Intel Software for General Purpose GPU capabilities] latest stable version
|
||||
available at the time of release
|
||||
|
@ -1,5 +1,5 @@
|
||||
#===============================================================================
|
||||
# Copyright 2017-2024 Intel Corporation
|
||||
# Copyright 2017-2025 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@ -52,13 +52,6 @@ if(NOT OpenMP_CXX_FOUND AND MSVC AND CMAKE_CXX_COMPILER_ID MATCHES "(Clang|Intel
|
||||
# The ICX driver doesn't link OpenMP library even if `/Qopenmp`
|
||||
# was specified.
|
||||
set(OpenMP_FLAGS "/Qopenmp -Xclang --dependent-lib=libiomp5md")
|
||||
else()
|
||||
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "10.0")
|
||||
# version < 10 can't pass cl-style `/openmp` flag
|
||||
set(OpenMP_FLAGS "-Xclang -fopenmp")
|
||||
# ... and requires explicit linking against omp library
|
||||
set(OpenMP_CXX_LIBRARIES "libomp.lib")
|
||||
endif()
|
||||
endif()
|
||||
set(OpenMP_C_FLAGS ${OpenMP_FLAGS})
|
||||
set(OpenMP_CXX_FLAGS ${OpenMP_FLAGS})
|
||||
|
@ -1,5 +1,5 @@
|
||||
#===============================================================================
|
||||
# Copyright 2017-2024 Intel Corporation
|
||||
# Copyright 2017-2025 Intel Corporation
|
||||
# Copyright 2021 FUJITSU LIMITED
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -30,14 +30,10 @@ macro(sdl_unix_common_ccxx_flags var)
|
||||
append(${var} "-fPIC -Wformat -Wformat-security")
|
||||
endmacro()
|
||||
|
||||
macro(sdl_gnu_common_ccxx_flags var gnu_version)
|
||||
if(${gnu_version} VERSION_LESS 4.9)
|
||||
append(${var} "-fstack-protector-all")
|
||||
else()
|
||||
append(${var} "-fstack-protector-strong")
|
||||
if(NOT (${gnu_version} VERSION_LESS 8.0) AND (DNNL_TARGET_ARCH STREQUAL "X64"))
|
||||
append(${var} "-fcf-protection=full")
|
||||
endif()
|
||||
macro(sdl_gnu_common_ccxx_flags var)
|
||||
append(${var} "-fstack-protector-strong")
|
||||
if(DNNL_TARGET_ARCH STREQUAL "X64")
|
||||
append(${var} "-fcf-protection=full")
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
@ -65,7 +61,7 @@ if(UNIX)
|
||||
append(ONEDNN_SDL_COMPILER_FLAGS "-D_FORTIFY_SOURCE=2")
|
||||
endif()
|
||||
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
||||
sdl_gnu_common_ccxx_flags(ONEDNN_SDL_COMPILER_FLAGS CMAKE_CXX_COMPILER_VERSION)
|
||||
sdl_gnu_common_ccxx_flags(ONEDNN_SDL_COMPILER_FLAGS)
|
||||
sdl_gnu_src_ccxx_flags(CMAKE_SRC_CCXX_FLAGS)
|
||||
sdl_gnu_example_ccxx_flags(CMAKE_EXAMPLE_CCXX_FLAGS)
|
||||
elseif(CMAKE_CXX_COMPILER_ID MATCHES "(Apple)?[Cc]lang")
|
||||
|
@ -1,5 +1,5 @@
|
||||
#===============================================================================
|
||||
# Copyright 2019-2024 Intel Corporation
|
||||
# Copyright 2019-2025 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@ -36,11 +36,7 @@ if("${DNNL_CODE_COVERAGE}" STREQUAL "GCOV")
|
||||
message(FATAL_ERROR "GCOV not found in path")
|
||||
endif()
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID MATCHES "(Apple)?[Cc]lang")
|
||||
if("${CMAKE_CXX_COMPILER_VERSION}" VERSION_LESS 3)
|
||||
message(FATAL_ERROR "Clang version must be 3.0.0 or greater! Aborting...")
|
||||
endif()
|
||||
elseif(NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
||||
if(NOT CMAKE_CXX_COMPILER_ID MATCHES "(Apple)?[Cc]lang|GNU")
|
||||
message(FATAL_ERROR "Unsupported compiler: ${CMAKE_CXX_COMPILER_ID}")
|
||||
endif()
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
#===============================================================================
|
||||
# Copyright 2021-2024 Intel Corporation
|
||||
# Copyright 2021-2025 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@ -78,7 +78,7 @@ if(DPCPP_HOST_COMPILER_KIND MATCHES "^(GNU|CLANG)$")
|
||||
|
||||
if(DPCPP_HOST_COMPILER_KIND STREQUAL "GNU")
|
||||
platform_gnu_nowarn_ccxx_flags(DPCPP_CXX_NOWARN_FLAGS ${DPCPP_HOST_COMPILER_MAJOR_VER}.${DPCPP_HOST_COMPILER_MINOR_VER})
|
||||
sdl_gnu_common_ccxx_flags(DPCPP_HOST_COMPILER_OPTS DPCPP_HOST_COMPILER_VER)
|
||||
sdl_gnu_common_ccxx_flags(DPCPP_HOST_COMPILER_OPTS)
|
||||
sdl_gnu_src_ccxx_flags(DPCPP_SRC_CXX_FLAGS)
|
||||
sdl_gnu_example_ccxx_flags(DPCPP_EXAMPLE_CXX_FLAGS)
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
#===============================================================================
|
||||
# Copyright 2024 Intel Corporation
|
||||
# Copyright 2024-2025 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@ -98,13 +98,13 @@ message(STATUS "Host compiler version: ${DPCPP_HOST_COMPILER_MAJOR_VER}.${DPCPP_
|
||||
|
||||
# Check the version of the provided host compiler.
|
||||
if(DPCPP_HOST_COMPILER_KIND STREQUAL "GNU")
|
||||
if((DPCPP_HOST_COMPILER_MAJOR_VER LESS 7) OR (DPCPP_HOST_COMPILER_MAJOR_VER EQUAL 7 AND DPCPP_HOST_COMPILER_MINOR_VER LESS 4))
|
||||
message(FATAL_ERROR "The minimum version of ${DPCPP_HOST_COMPILER_KIND} host compiler is 7.4.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(DPCPP_HOST_COMPILER_KIND STREQUAL "CLANG")
|
||||
if(DPCPP_HOST_COMPILER_MAJOR_VER LESS 8)
|
||||
message(FATAL_ERROR "The minimum version of ${DPCPP_HOST_COMPILER_KIND} host compiler is 8.0.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(DPCPP_HOST_COMPILER_KIND STREQUAL "CLANG")
|
||||
if(DPCPP_HOST_COMPILER_MAJOR_VER LESS 11)
|
||||
message(FATAL_ERROR "The minimum version of ${DPCPP_HOST_COMPILER_KIND} host compiler is 11.0.")
|
||||
endif()
|
||||
endif()
|
||||
|
@ -362,12 +362,6 @@ elseif(UNIX OR MINGW)
|
||||
append(CMAKE_CCXX_FLAGS "-Wno-ignored-attributes")
|
||||
endif()
|
||||
|
||||
# XXX: Suppress an erroneous warning of nested lambda visibility
|
||||
# exceeding that of the containing class (GCC Bugzilla - Bug 80947).
|
||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8 AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 6.0)
|
||||
append(CMAKE_CCXX_FLAGS "-Wno-attributes")
|
||||
endif()
|
||||
|
||||
if(DNNL_TARGET_ARCH STREQUAL "AARCH64")
|
||||
if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
set(DEF_ARCH_OPT_FLAGS "-O3")
|
||||
@ -486,10 +480,5 @@ if (DNNL_TARGET_ARCH STREQUAL "RV64")
|
||||
message(STATUS "DNNL_RISCV_USE_RVV_INTRINSICS: ${DNNL_RISCV_USE_RVV_INTRINSICS}")
|
||||
endif()
|
||||
|
||||
# Old compiler versions do not support warnings available on newer compilers.
|
||||
if(CMAKE_CXX_COMPILER_ID MATCHES "(Apple)?[Cc]lang" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8.0.0)
|
||||
append(CMAKE_CCXX_FLAGS "-Wno-unknown-warning-option")
|
||||
endif()
|
||||
|
||||
append(CMAKE_C_FLAGS "${CMAKE_CCXX_FLAGS}")
|
||||
append(CMAKE_CXX_FLAGS "${CMAKE_CCXX_FLAGS}")
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2020-2024 Intel Corporation
|
||||
* Copyright 2020-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -17,11 +17,6 @@
|
||||
#ifndef COMPILER_WORKAROUNDS_HPP
|
||||
#define COMPILER_WORKAROUNDS_HPP
|
||||
|
||||
#if (defined __GNUC__) && (!defined(__INTEL_COMPILER)) \
|
||||
&& (!defined(__INTEL_LLVM_COMPILER)) && (!defined(__clang__major__))
|
||||
#define NEED_GCC_WA_CHECK 1
|
||||
#endif
|
||||
|
||||
// Workaround 01: clang.
|
||||
//
|
||||
// Clang has an issue [1] with `#pragma omp simd` that might lead to segfault.
|
||||
@ -32,7 +27,7 @@
|
||||
// vectorization for clang altogether for now.
|
||||
//
|
||||
// [1] https://bugs.llvm.org/show_bug.cgi?id=48104
|
||||
#if (defined __clang_major__) && (__clang_major__ >= 6)
|
||||
#if (defined __clang_major__) && (__clang_major__ < 13)
|
||||
#define CLANG_WA_01_SAFE_TO_USE_OMP_SIMD 0
|
||||
#else
|
||||
#define CLANG_WA_01_SAFE_TO_USE_OMP_SIMD 1
|
||||
@ -40,40 +35,15 @@
|
||||
|
||||
// Workaround 02: clang.
|
||||
//
|
||||
// Clang 6+ generates incorrect code with OMP_SIMD in some particular cases.
|
||||
// Clang generates incorrect code with OMP_SIMD in some particular cases.
|
||||
// Unlike CLANG_WA_01_SAFE_TO_USE_OMP_SIMD, the issue happens even with -O3.
|
||||
#if (defined __clang_major__) && (__clang_major__ >= 6)
|
||||
#if (defined __clang_major__) && (__clang_major__ < 13)
|
||||
#define CLANG_WA_02_SAFE_TO_USE_OMP_SIMD 0
|
||||
#else
|
||||
#define CLANG_WA_02_SAFE_TO_USE_OMP_SIMD 1
|
||||
#endif
|
||||
|
||||
// Workaround 03: GCC
|
||||
//
|
||||
// For very large functions with too much control flow (i.e. if, switch, goto
|
||||
// statements), GCC 7 may struggle to perform optimizations based on tree
|
||||
// dominator (i.e. -ftree-dominator-opts, which is enabled with O1), thereby
|
||||
// producing an internal compiler error (ICE). Specifically, it seems that the
|
||||
// jump threading optimization is the culprit, which cannot be disabled on its
|
||||
// own. There is no reliable way to reproduce the ICE, therefore it is not clear
|
||||
// which __GCC_MINOR__ version fixes issue.
|
||||
#if (defined NEED_GCC_WA_CHECK) && (__GNUC__ == 7)
|
||||
#define GCC_WA_NO_TREE_DOMINATOR_OPTS 1
|
||||
#else
|
||||
#define GCC_WA_NO_TREE_DOMINATOR_OPTS 0
|
||||
#endif
|
||||
|
||||
// Workaround 05: GCC
|
||||
//
|
||||
// NOTE: inside lambda, type cast variables captured by reference using
|
||||
// either c-like "(type)var" or functional "type(var)" notation in order
|
||||
// to avoid gcc7 bug with c++14 standard
|
||||
// (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83204).
|
||||
#if (defined NEED_GCC_WA_CHECK) && (__GNUC__ <= 7)
|
||||
#define GCC_WA_LAMBDA_C_CAST
|
||||
#endif
|
||||
|
||||
// Workaround 05: c++17 vs c++20
|
||||
// Workaround 03: MSVC c++17 vs c++20
|
||||
//
|
||||
// C++17/20 are contradictory wrt capturing this and using default '=' capture.
|
||||
// - C++17 and before have to return a warning for the [=, this] capture as
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2017-2023 Intel Corporation
|
||||
* Copyright 2017-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -192,18 +192,6 @@ inline int dnnl_get_current_num_threads() {
|
||||
#define PRAGMA_OMP_SIMD(...) PRAGMA_MACRO(CHAIN2(omp, simd __VA_ARGS__))
|
||||
#endif // defined(_MSC_VER) && !defined(__clang__) && !defined(__INTEL_COMPILER)
|
||||
|
||||
// process simdlen; it is supported for Clang >= 3.9; ICC >= 17.0; GCC >= 6.1
|
||||
// No support on Windows.
|
||||
#if (defined(__clang_major__) \
|
||||
&& (__clang_major__ < 3 \
|
||||
|| (__clang_major__ == 3 && __clang_minor__ < 9))) \
|
||||
|| (defined(__INTEL_COMPILER) && __INTEL_COMPILER < 1700) \
|
||||
|| (!defined(__INTEL_COMPILER) && !defined(__clang__) \
|
||||
&& (defined(_MSC_VER) || __GNUC__ < 6 \
|
||||
|| (__GNUC__ == 6 && __GNUC_MINOR__ < 1)))
|
||||
#define simdlen(x)
|
||||
#endif // long simdlen if
|
||||
|
||||
namespace dnnl {
|
||||
namespace impl {
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2024 Intel Corporation
|
||||
* Copyright 2024-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -42,10 +42,7 @@ struct runtime_scales_t;
|
||||
const runtime_scales_t &default_runtime_scale();
|
||||
|
||||
struct runtime_scales_t : public c_compatible {
|
||||
// Clang-3.8.1 raises an error for a default initialization of a const
|
||||
// object. Const runtime_scales_t object is used as default_scales.
|
||||
// runtime_scales_t() = default;
|
||||
runtime_scales_t() {}
|
||||
runtime_scales_t() = default;
|
||||
|
||||
runtime_scales_t &operator=(const runtime_scales_t &rhs) {
|
||||
mask_ = rhs.mask_;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2019-2024 Intel Corporation
|
||||
* Copyright 2019-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -271,11 +271,7 @@ status_t gemm_bf16_matmul_t<dst_type>::execute_ref(
|
||||
const dim_t acc_stride = gemm_based::get_scratchpad_block_elements(
|
||||
batch, M, N, use_single_gemm_call, nthr);
|
||||
|
||||
#ifdef GCC_WA_LAMBDA_C_CAST
|
||||
parallel(nthr, [= WA_THIS_COPY_CAPTURE, &st](int ithr, int nthr) {
|
||||
#else
|
||||
parallel(nthr, [&](int ithr, int nthr) {
|
||||
#endif
|
||||
size_t t_work_start {0}, t_work_end {0};
|
||||
balance211(work_amount, nthr, ithr, t_work_start, t_work_end);
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2019-2024 Intel Corporation
|
||||
* Copyright 2019-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -297,11 +297,7 @@ status_t gemm_x8s8s32x_matmul_t::execute_ref(const exec_ctx_t &ctx) const {
|
||||
bool postops_in_matmul = need_post_processing(pd(), dst_zero_point_f32);
|
||||
assert(IMPLICATION(postops_in_matmul, params.has_pp_kernel_));
|
||||
|
||||
#ifdef GCC_WA_LAMBDA_C_CAST
|
||||
parallel(nthr, [= WA_THIS_COPY_CAPTURE, &st](int ithr, int nthr) {
|
||||
#else
|
||||
parallel(nthr, [&](int ithr, int nthr) {
|
||||
#endif
|
||||
size_t t_work_start {0}, t_work_end {0};
|
||||
balance211(work_amount, nthr, ithr, t_work_start, t_work_end);
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2020-2021 Intel Corporation
|
||||
* Copyright 2020-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -66,7 +66,7 @@ protected:
|
||||
void copy_n(int unroll_n, Xbyak::Label &epilogue);
|
||||
void copy_ns(int unroll_n, Xbyak::Label &epilogue);
|
||||
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
private:
|
||||
static const int offset_a_ = 0, offset_b_ = 0;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2020-2021 Intel Corporation
|
||||
* Copyright 2020-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -31,7 +31,7 @@ public:
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_amx_gemm_kern);
|
||||
|
||||
protected:
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
const int typea;
|
||||
const int typeb;
|
||||
const int typec;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2019-2024 Intel Corporation
|
||||
* Copyright 2019-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -26,7 +26,7 @@ namespace x64 {
|
||||
|
||||
class jit_avx512_core_s16_48x8_copy_an_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_48x8_copy_an_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx512_core_s16_48x8_copy_an_kern();
|
||||
@ -34,7 +34,7 @@ public:
|
||||
|
||||
class jit_avx512_core_s16_48x8_copy_at_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_48x8_copy_at_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx512_core_s16_48x8_copy_at_kern();
|
||||
@ -42,7 +42,7 @@ public:
|
||||
|
||||
class jit_avx512_core_s16_48x8_copy_bn_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_48x8_copy_bn_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx512_core_s16_48x8_copy_bn_kern();
|
||||
@ -50,7 +50,7 @@ public:
|
||||
|
||||
class jit_avx512_core_s16_48x8_copy_bt_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_48x8_copy_bt_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx512_core_s16_48x8_copy_bt_kern();
|
||||
@ -58,7 +58,7 @@ public:
|
||||
|
||||
class jit_avx512_core_s16_24x8_copy_an_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_24x8_copy_an_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx512_core_s16_24x8_copy_an_kern();
|
||||
@ -66,7 +66,7 @@ public:
|
||||
|
||||
class jit_avx512_core_s16_24x8_copy_at_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_24x8_copy_at_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx512_core_s16_24x8_copy_at_kern();
|
||||
@ -74,7 +74,7 @@ public:
|
||||
|
||||
class jit_avx512_core_s16_24x8_copy_bn_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_24x8_copy_bn_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx512_core_s16_24x8_copy_bn_kern();
|
||||
@ -82,7 +82,7 @@ public:
|
||||
|
||||
class jit_avx512_core_s16_24x8_copy_bt_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_24x8_copy_bt_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx512_core_s16_24x8_copy_bt_kern();
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2019-2021 Intel Corporation
|
||||
* Copyright 2019-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -58,7 +58,7 @@ protected:
|
||||
void innerloop(int unroll_m, int unroll_n);
|
||||
void outerloop(int unroll_x, int unroll_y, Xbyak::Label *&outerloop_label);
|
||||
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
private:
|
||||
static const int UNROLL_N_ = 8;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2020-2021 Intel Corporation
|
||||
* Copyright 2020-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -52,7 +52,7 @@ protected:
|
||||
void outerloop(int unroll_y, Xbyak::Label *&cur_outerloop_label,
|
||||
Xbyak::Label *&outerloop_end_label);
|
||||
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
private:
|
||||
static const int UNROLL_M_ = 64;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2019-2024 Intel Corporation
|
||||
* Copyright 2019-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -26,7 +26,7 @@ namespace x64 {
|
||||
|
||||
class jit_avx512_core_f32_copy_an_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_f32_copy_an_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx512_core_f32_copy_an_kern();
|
||||
@ -34,11 +34,10 @@ public:
|
||||
|
||||
class jit_avx512_core_f32_copy_at_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_f32_copy_at_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
void generate_part1(const Xbyak::Label &, const Xbyak::Label &,
|
||||
const Xbyak::Label &, const Xbyak::Label &) ATTRIBUTE_OPTIMIZE;
|
||||
void generate_part2(Xbyak::Label, Xbyak::Label, Xbyak::Label,
|
||||
Xbyak::Label) ATTRIBUTE_OPTIMIZE;
|
||||
const Xbyak::Label &, const Xbyak::Label &);
|
||||
void generate_part2(Xbyak::Label, Xbyak::Label, Xbyak::Label, Xbyak::Label);
|
||||
|
||||
public:
|
||||
jit_avx512_core_f32_copy_at_kern();
|
||||
@ -46,7 +45,7 @@ public:
|
||||
|
||||
class jit_avx512_core_f32_copy_bn_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_f32_copy_bn_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx512_core_f32_copy_bn_kern();
|
||||
@ -54,7 +53,7 @@ public:
|
||||
|
||||
class jit_avx512_core_f32_copy_bt_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_f32_copy_bt_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx512_core_f32_copy_bt_kern();
|
||||
@ -62,7 +61,7 @@ public:
|
||||
|
||||
class jit_avx2_f32_copy_an_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_f32_copy_an_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx2_f32_copy_an_kern();
|
||||
@ -70,7 +69,7 @@ public:
|
||||
|
||||
class jit_avx2_f32_copy_at_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_f32_copy_at_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx2_f32_copy_at_kern();
|
||||
@ -78,7 +77,7 @@ public:
|
||||
|
||||
class jit_avx2_f32_copy_bn_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_f32_copy_bn_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx2_f32_copy_bn_kern();
|
||||
@ -86,7 +85,7 @@ public:
|
||||
|
||||
class jit_avx2_f32_copy_bt_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_f32_copy_bt_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx2_f32_copy_bt_kern();
|
||||
@ -94,7 +93,7 @@ public:
|
||||
|
||||
class jit_avx_f32_copy_an_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_f32_copy_an_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx_f32_copy_an_kern();
|
||||
@ -102,7 +101,7 @@ public:
|
||||
|
||||
class jit_avx_f32_copy_at_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_f32_copy_at_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx_f32_copy_at_kern();
|
||||
@ -110,7 +109,7 @@ public:
|
||||
|
||||
class jit_avx_f32_copy_bn_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_f32_copy_bn_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx_f32_copy_bn_kern();
|
||||
@ -118,7 +117,7 @@ public:
|
||||
|
||||
class jit_avx_f32_copy_bt_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_f32_copy_bt_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx_f32_copy_bt_kern();
|
||||
@ -126,11 +125,10 @@ public:
|
||||
|
||||
class jit_avx_kernel_b0_sgemm_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_b0_sgemm_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
void generate_part1(const Xbyak::Label &, const Xbyak::Label &,
|
||||
const Xbyak::Label &, const Xbyak::Label &) ATTRIBUTE_OPTIMIZE;
|
||||
void generate_part2(Xbyak::Label, Xbyak::Label, Xbyak::Label,
|
||||
Xbyak::Label) ATTRIBUTE_OPTIMIZE;
|
||||
const Xbyak::Label &, const Xbyak::Label &);
|
||||
void generate_part2(Xbyak::Label, Xbyak::Label, Xbyak::Label, Xbyak::Label);
|
||||
|
||||
public:
|
||||
jit_avx_kernel_b0_sgemm_kern();
|
||||
@ -138,11 +136,10 @@ public:
|
||||
|
||||
class jit_avx_kernel_sgemm_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_sgemm_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate_part1(const Xbyak::Label &, const Xbyak::Label &,
|
||||
const Xbyak::Label &) ATTRIBUTE_OPTIMIZE;
|
||||
void generate_part2(
|
||||
Xbyak::Label &, Xbyak::Label &, Xbyak::Label &) ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
void generate_part1(
|
||||
const Xbyak::Label &, const Xbyak::Label &, const Xbyak::Label &);
|
||||
void generate_part2(Xbyak::Label &, Xbyak::Label &, Xbyak::Label &);
|
||||
|
||||
public:
|
||||
jit_avx_kernel_sgemm_kern();
|
||||
@ -150,7 +147,7 @@ public:
|
||||
|
||||
class jit_sse41_f32_copy_an_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_f32_copy_an_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_sse41_f32_copy_an_kern();
|
||||
@ -158,7 +155,7 @@ public:
|
||||
|
||||
class jit_sse41_f32_copy_at_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_f32_copy_at_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_sse41_f32_copy_at_kern();
|
||||
@ -166,7 +163,7 @@ public:
|
||||
|
||||
class jit_sse41_f32_copy_bn_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_f32_copy_bn_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_sse41_f32_copy_bn_kern();
|
||||
@ -174,7 +171,7 @@ public:
|
||||
|
||||
class jit_sse41_f32_copy_bt_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_f32_copy_bt_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_sse41_f32_copy_bt_kern();
|
||||
@ -182,7 +179,7 @@ public:
|
||||
|
||||
class jit_sse41_kernel_b0_sgemm_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_b0_sgemm_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_sse41_kernel_b0_sgemm_kern();
|
||||
@ -190,7 +187,7 @@ public:
|
||||
|
||||
class jit_sse41_kernel_sgemm_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_sgemm_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_sse41_kernel_sgemm_kern();
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2019-2024 Intel Corporation
|
||||
* Copyright 2019-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -79,7 +79,7 @@ class jit_avx2_kernel_sgemm_kern : public jit_generator {
|
||||
void prefetchA_beforeFMA(int um, int un, int k_idx, int n_idx, int m_idx);
|
||||
void prefetchC_afterBload(int um, int un, int k_idx, int n_idx);
|
||||
void prefetchC_beforeKloop(int um);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
template <typename T_reg, typename T_desta, typename T_srca>
|
||||
void loadA_betweenFMAs(int um, int un, int k_idx, int n_idx, int m_idx,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2017-2024 Intel Corporation
|
||||
* Copyright 2017-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -70,7 +70,7 @@ struct xbyak_gemm_t : public jit_generator {
|
||||
, hasBias(hasBias)
|
||||
, STACK_K_CAPACITY((STACK_CAPACITY - 256) / (SIZE * UNROLL_M)) {}
|
||||
|
||||
void generate() override ATTRIBUTE_OPTIMIZE {
|
||||
void generate() override {
|
||||
using namespace Xbyak;
|
||||
bool isBeta0 = (beta == 0.0);
|
||||
bool isBetaN = (!isBeta0 && beta != 1.0);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2020-2024 Intel Corporation
|
||||
* Copyright 2020-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -49,7 +49,7 @@ struct xbyak_gemm_smalln_tn_t : public jit_generator {
|
||||
xbyak_gemm_smalln_tn_t(int N, float beta, float alpha)
|
||||
: jit_generator(jit_name()), N(N), beta(beta), alpha(alpha) {}
|
||||
|
||||
void generate() override ATTRIBUTE_OPTIMIZE {
|
||||
void generate() override {
|
||||
using namespace Xbyak;
|
||||
/**
|
||||
* numN = 1 : 16 rows of A, 1x16 accumulators
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2016-2024 Intel Corporation
|
||||
* Copyright 2016-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -1966,7 +1966,7 @@ struct xbyak_gemm_t : public jit_generator {
|
||||
if (hasBias) { add(BIAS, unroll_m * SIZE); }
|
||||
}
|
||||
|
||||
void generate() override ATTRIBUTE_OPTIMIZE {
|
||||
void generate() override {
|
||||
assert(IMPLICATION(!is_avx2, mayiuse(avx)));
|
||||
|
||||
preamble();
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2019-2021 Intel Corporation
|
||||
* Copyright 2019-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -40,7 +40,7 @@ protected:
|
||||
void innerloop(int unroll_m, int unroll_n);
|
||||
void outerloop(int unroll_x, int unroll_y, Xbyak::Label *&outerloop_label);
|
||||
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
private:
|
||||
static const int M_UNROLL_ = 16;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2021 Intel Corporation
|
||||
* Copyright 2021-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -61,7 +61,7 @@ protected:
|
||||
Xbyak::Label *&cur_outerloop_label,
|
||||
Xbyak::Label *&outerloop_end_label);
|
||||
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
private:
|
||||
static const int max_um_vecs_ = 16;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2019-2021 Intel Corporation
|
||||
* Copyright 2019-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -38,7 +38,7 @@ protected:
|
||||
void innerloop(int unroll_m, int unroll_n);
|
||||
void outerloop(int unroll_x, int unroll_y, Xbyak::Label *&outerloop_label);
|
||||
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
private:
|
||||
static const int M_UNROLL_ = 8;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2019-2024 Intel Corporation
|
||||
* Copyright 2019-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -32,7 +32,7 @@ namespace x64 {
|
||||
|
||||
class jit_avx512_core_u8_copy_an_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_an_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx512_core_u8_copy_an_kern();
|
||||
@ -40,7 +40,7 @@ public:
|
||||
|
||||
class jit_avx512_core_u8_copy_at_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_at_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx512_core_u8_copy_at_kern();
|
||||
@ -48,7 +48,7 @@ public:
|
||||
|
||||
class jit_avx512_core_u8_copy_bn_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_bn_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
bool s8_case;
|
||||
|
||||
public:
|
||||
@ -57,7 +57,7 @@ public:
|
||||
|
||||
class jit_avx512_core_u8_copy_bt_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_bt_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
bool s8_case;
|
||||
|
||||
public:
|
||||
@ -66,7 +66,7 @@ public:
|
||||
|
||||
class jit_avx512_core_u8_copy_sum_an_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_sum_an_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx512_core_u8_copy_sum_an_kern();
|
||||
@ -74,7 +74,7 @@ public:
|
||||
|
||||
class jit_avx512_core_u8_copy_sum_at_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_sum_at_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx512_core_u8_copy_sum_at_kern();
|
||||
@ -82,7 +82,7 @@ public:
|
||||
|
||||
class jit_avx512_core_u8_copy_sum_bn_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_sum_bn_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
bool s8_case;
|
||||
|
||||
public:
|
||||
@ -91,7 +91,7 @@ public:
|
||||
|
||||
class jit_avx512_core_u8_copy_sum_bt_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_sum_bt_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
bool s8_case;
|
||||
|
||||
public:
|
||||
@ -100,7 +100,7 @@ public:
|
||||
|
||||
class jit_avx2_vnni_u8_copy_an_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_an_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx2_vnni_u8_copy_an_kern();
|
||||
@ -108,7 +108,7 @@ public:
|
||||
|
||||
class jit_avx2_vnni_u8_copy_at_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_at_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx2_vnni_u8_copy_at_kern();
|
||||
@ -116,7 +116,7 @@ public:
|
||||
|
||||
class jit_avx2_vnni_u8_copy_bn_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_bn_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx2_vnni_u8_copy_bn_kern();
|
||||
@ -124,7 +124,7 @@ public:
|
||||
|
||||
class jit_avx2_vnni_u8_copy_bt_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_bt_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx2_vnni_u8_copy_bt_kern();
|
||||
@ -132,7 +132,7 @@ public:
|
||||
|
||||
class jit_avx2_vnni_u8_copy_sum_an_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_sum_an_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx2_vnni_u8_copy_sum_an_kern();
|
||||
@ -140,7 +140,7 @@ public:
|
||||
|
||||
class jit_avx2_vnni_u8_copy_sum_at_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_sum_at_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx2_vnni_u8_copy_sum_at_kern();
|
||||
@ -148,7 +148,7 @@ public:
|
||||
|
||||
class jit_avx2_vnni_u8_copy_sum_bn_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_sum_bn_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx2_vnni_u8_copy_sum_bn_kern();
|
||||
@ -156,7 +156,7 @@ public:
|
||||
|
||||
class jit_avx2_vnni_u8_copy_sum_bt_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_sum_bt_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx2_vnni_u8_copy_sum_bt_kern();
|
||||
@ -164,7 +164,7 @@ public:
|
||||
|
||||
class jit_avx2_u8_copy_an_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_an_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx2_u8_copy_an_kern();
|
||||
@ -172,7 +172,7 @@ public:
|
||||
|
||||
class jit_avx2_u8_copy_at_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_at_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx2_u8_copy_at_kern();
|
||||
@ -180,7 +180,7 @@ public:
|
||||
|
||||
class jit_avx2_u8_copy_bn_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_bn_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx2_u8_copy_bn_kern();
|
||||
@ -188,7 +188,7 @@ public:
|
||||
|
||||
class jit_avx2_u8_copy_bt_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_bt_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx2_u8_copy_bt_kern();
|
||||
@ -196,7 +196,7 @@ public:
|
||||
|
||||
class jit_avx2_u8_copy_sum_an_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_sum_an_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx2_u8_copy_sum_an_kern();
|
||||
@ -204,7 +204,7 @@ public:
|
||||
|
||||
class jit_avx2_u8_copy_sum_at_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_sum_at_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx2_u8_copy_sum_at_kern();
|
||||
@ -212,7 +212,7 @@ public:
|
||||
|
||||
class jit_avx2_u8_copy_sum_bn_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_sum_bn_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx2_u8_copy_sum_bn_kern();
|
||||
@ -220,7 +220,7 @@ public:
|
||||
|
||||
class jit_avx2_u8_copy_sum_bt_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_sum_bt_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx2_u8_copy_sum_bt_kern();
|
||||
@ -228,7 +228,7 @@ public:
|
||||
|
||||
class jit_avx_u8_copy_an_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_an_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx_u8_copy_an_kern();
|
||||
@ -236,7 +236,7 @@ public:
|
||||
|
||||
class jit_avx_u8_copy_at_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_at_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx_u8_copy_at_kern();
|
||||
@ -244,7 +244,7 @@ public:
|
||||
|
||||
class jit_avx_u8_copy_bn_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_bn_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx_u8_copy_bn_kern();
|
||||
@ -252,7 +252,7 @@ public:
|
||||
|
||||
class jit_avx_u8_copy_bt_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_bt_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx_u8_copy_bt_kern();
|
||||
@ -260,7 +260,7 @@ public:
|
||||
|
||||
class jit_avx_u8_copy_sum_an_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_sum_an_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx_u8_copy_sum_an_kern();
|
||||
@ -268,7 +268,7 @@ public:
|
||||
|
||||
class jit_avx_u8_copy_sum_at_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_sum_at_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx_u8_copy_sum_at_kern();
|
||||
@ -276,7 +276,7 @@ public:
|
||||
|
||||
class jit_avx_u8_copy_sum_bn_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_sum_bn_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx_u8_copy_sum_bn_kern();
|
||||
@ -284,7 +284,7 @@ public:
|
||||
|
||||
class jit_avx_u8_copy_sum_bt_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_sum_bt_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx_u8_copy_sum_bt_kern();
|
||||
@ -292,7 +292,7 @@ public:
|
||||
|
||||
class jit_avx_kernel_b0_gemm_s8u8s32_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_b0_gemm_s8u8s32_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx_kernel_b0_gemm_s8u8s32_kern();
|
||||
@ -300,7 +300,7 @@ public:
|
||||
|
||||
class jit_avx_kernel_b0_b_gemm_s8u8s32_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_b0_b_gemm_s8u8s32_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx_kernel_b0_b_gemm_s8u8s32_kern();
|
||||
@ -308,7 +308,7 @@ public:
|
||||
|
||||
class jit_avx_kernel_b0_r_gemm_s8u8s32_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_b0_r_gemm_s8u8s32_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx_kernel_b0_r_gemm_s8u8s32_kern();
|
||||
@ -316,7 +316,7 @@ public:
|
||||
|
||||
class jit_avx_kernel_b0_c_gemm_s8u8s32_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_b0_c_gemm_s8u8s32_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx_kernel_b0_c_gemm_s8u8s32_kern();
|
||||
@ -324,7 +324,7 @@ public:
|
||||
|
||||
class jit_avx_kernel_gemm_s8u8s32_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_gemm_s8u8s32_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx_kernel_gemm_s8u8s32_kern();
|
||||
@ -332,7 +332,7 @@ public:
|
||||
|
||||
class jit_avx_kernel_b_gemm_s8u8s32_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_b_gemm_s8u8s32_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx_kernel_b_gemm_s8u8s32_kern();
|
||||
@ -340,7 +340,7 @@ public:
|
||||
|
||||
class jit_avx_kernel_r_gemm_s8u8s32_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_r_gemm_s8u8s32_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx_kernel_r_gemm_s8u8s32_kern();
|
||||
@ -348,7 +348,7 @@ public:
|
||||
|
||||
class jit_avx_kernel_c_gemm_s8u8s32_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_c_gemm_s8u8s32_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_avx_kernel_c_gemm_s8u8s32_kern();
|
||||
@ -356,7 +356,7 @@ public:
|
||||
|
||||
class jit_sse41_u8_copy_an_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_an_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_sse41_u8_copy_an_kern();
|
||||
@ -364,7 +364,7 @@ public:
|
||||
|
||||
class jit_sse41_u8_copy_at_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_at_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_sse41_u8_copy_at_kern();
|
||||
@ -372,7 +372,7 @@ public:
|
||||
|
||||
class jit_sse41_u8_copy_bn_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_bn_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_sse41_u8_copy_bn_kern();
|
||||
@ -380,7 +380,7 @@ public:
|
||||
|
||||
class jit_sse41_u8_copy_bt_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_bt_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_sse41_u8_copy_bt_kern();
|
||||
@ -388,7 +388,7 @@ public:
|
||||
|
||||
class jit_sse41_u8_copy_sum_an_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_sum_an_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_sse41_u8_copy_sum_an_kern();
|
||||
@ -396,7 +396,7 @@ public:
|
||||
|
||||
class jit_sse41_u8_copy_sum_at_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_sum_at_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_sse41_u8_copy_sum_at_kern();
|
||||
@ -404,7 +404,7 @@ public:
|
||||
|
||||
class jit_sse41_u8_copy_sum_bn_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_sum_bn_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_sse41_u8_copy_sum_bn_kern();
|
||||
@ -412,7 +412,7 @@ public:
|
||||
|
||||
class jit_sse41_u8_copy_sum_bt_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_sum_bt_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_sse41_u8_copy_sum_bt_kern();
|
||||
@ -420,7 +420,7 @@ public:
|
||||
|
||||
class jit_sse41_kernel_b0_gemm_s8u8s32_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_b0_gemm_s8u8s32_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_sse41_kernel_b0_gemm_s8u8s32_kern();
|
||||
@ -428,7 +428,7 @@ public:
|
||||
|
||||
class jit_sse41_kernel_b0_b_gemm_s8u8s32_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_b0_b_gemm_s8u8s32_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_sse41_kernel_b0_b_gemm_s8u8s32_kern();
|
||||
@ -436,7 +436,7 @@ public:
|
||||
|
||||
class jit_sse41_kernel_b0_r_gemm_s8u8s32_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_b0_r_gemm_s8u8s32_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_sse41_kernel_b0_r_gemm_s8u8s32_kern();
|
||||
@ -444,7 +444,7 @@ public:
|
||||
|
||||
class jit_sse41_kernel_b0_c_gemm_s8u8s32_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_b0_c_gemm_s8u8s32_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_sse41_kernel_b0_c_gemm_s8u8s32_kern();
|
||||
@ -452,7 +452,7 @@ public:
|
||||
|
||||
class jit_sse41_kernel_gemm_s8u8s32_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_gemm_s8u8s32_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_sse41_kernel_gemm_s8u8s32_kern();
|
||||
@ -460,7 +460,7 @@ public:
|
||||
|
||||
class jit_sse41_kernel_b_gemm_s8u8s32_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_b_gemm_s8u8s32_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_sse41_kernel_b_gemm_s8u8s32_kern();
|
||||
@ -468,7 +468,7 @@ public:
|
||||
|
||||
class jit_sse41_kernel_r_gemm_s8u8s32_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_r_gemm_s8u8s32_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_sse41_kernel_r_gemm_s8u8s32_kern();
|
||||
@ -476,7 +476,7 @@ public:
|
||||
|
||||
class jit_sse41_kernel_c_gemm_s8u8s32_kern : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_c_gemm_s8u8s32_kern);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
public:
|
||||
jit_sse41_kernel_c_gemm_s8u8s32_kern();
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2018-2021 Intel Corporation
|
||||
* Copyright 2018-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -51,7 +51,7 @@ protected:
|
||||
void innerloop(int unroll_m, int unroll_n);
|
||||
void outerloop(int unroll_x, int unroll_y, Xbyak::Label *&outerloop_label);
|
||||
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
private:
|
||||
static const int IGEMM_UNROLL_N_ = 4;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2018-2021 Intel Corporation
|
||||
* Copyright 2018-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -50,7 +50,7 @@ protected:
|
||||
void innerloop(int unroll_m, int unroll_n);
|
||||
void outerloop(int unroll_x, int unroll_y, Xbyak::Label *&outerloop_label);
|
||||
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
private:
|
||||
static const int IGEMM_UNROLL_M_ = 48;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2019-2024 Intel Corporation
|
||||
* Copyright 2019-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -44,7 +44,7 @@ class jit_avx512_core_gemv_s8x8s32_kern : public jit_generator {
|
||||
void shuffle_and_add(
|
||||
Xbyak::Zmm, Xbyak::Zmm, Xbyak::Zmm, Xbyak::Zmm, Xbyak::Zmm);
|
||||
void update_c(int, Xbyak::Reg64, int, Xbyak::Opmask);
|
||||
void generate() override ATTRIBUTE_OPTIMIZE;
|
||||
void generate() override;
|
||||
|
||||
cpu_isa_t isa = isa_undef;
|
||||
ver_t ver = ver_t::undef;
|
||||
|
@ -39,12 +39,6 @@
|
||||
#define OFFSET_SHADOWSPACE 0x28
|
||||
#endif
|
||||
|
||||
#if GCC_WA_NO_TREE_DOMINATOR_OPTS
|
||||
#define ATTRIBUTE_OPTIMIZE __attribute__((optimize("no-tree-dominator-opts")))
|
||||
#else
|
||||
#define ATTRIBUTE_OPTIMIZE
|
||||
#endif
|
||||
|
||||
#define DECLARE_CPU_JIT_AUX_FUNCTIONS(gen_name) \
|
||||
const char *name() const override { return STRINGIFY(gen_name); } \
|
||||
const char *source_file() const override { return __FILE__; } \
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2019-2023 Intel Corporation
|
||||
* Copyright 2019-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -436,13 +436,6 @@ status_t jit_xf16_sum_t<src_data_type, dst_data_type, isa>::execute(
|
||||
const dim_t num_blocks = nelems / num_elems_in_block;
|
||||
const dim_t tail = nelems % num_elems_in_block;
|
||||
|
||||
#if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8 \
|
||||
&& __GNUC_PATCHLEVEL__ == 3
|
||||
// GCC issues a false positive warning 'array subscript is above array bounds'
|
||||
// with gcc 4.8.3 + -march=native option, so disable it for now
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Warray-bounds"
|
||||
#endif
|
||||
parallel(0, [&](const int ithr, const int nthr) {
|
||||
dim_t start {0}, end {0};
|
||||
balance211(num_blocks, nthr, ithr, start, end);
|
||||
@ -477,10 +470,6 @@ status_t jit_xf16_sum_t<src_data_type, dst_data_type, isa>::execute(
|
||||
(*kernel_)(&arg);
|
||||
}
|
||||
});
|
||||
#if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8 \
|
||||
&& __GNUC_PATCHLEVEL__ == 3
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
return status::success;
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2021-2024 Intel Corporation
|
||||
* Copyright 2021-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -25,12 +25,6 @@
|
||||
#include <vector>
|
||||
#include <initializer_list>
|
||||
|
||||
#if defined(__GNUC__) && __GNUC__ == 7
|
||||
// GCC 7.x issues a false positive warning 'array subscript is above array bounds'
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Warray-bounds"
|
||||
#endif
|
||||
|
||||
namespace dnnl {
|
||||
namespace impl {
|
||||
namespace gpu {
|
||||
@ -849,7 +843,3 @@ bank_conflict_allocation_t bank_conflict_allocation_t::create(
|
||||
} // namespace gpu
|
||||
} // namespace impl
|
||||
} // namespace dnnl
|
||||
|
||||
#if defined(__GNUC__) && __GNUC__ == 7
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright 2020-2024 Intel Corporation
|
||||
* Copyright 2020-2025 Intel Corporation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@ -825,7 +825,7 @@ SC_API void dnnl_brgemm_call(brgemm_kernel_info *brg_desc, const void *A,
|
||||
brgemm_batch_element_t *batch = (brgemm_batch_element_t *)_malloca(
|
||||
num * sizeof(brgemm_batch_element_t));
|
||||
#else
|
||||
#if SC_IS_DPCPP() || (defined(CLANGVERSION) && CLANGVERSION <= 3)
|
||||
#if SC_IS_DPCPP()
|
||||
std::unique_ptr<brgemm_batch_element_t[]> batch_v(
|
||||
new brgemm_batch_element_t[num]);
|
||||
brgemm_batch_element_t *batch = batch_v.get();
|
||||
@ -876,7 +876,7 @@ SC_API void dnnl_brgemm_call_postops(brgemm_kernel_info *brg_desc,
|
||||
brgemm_batch_element_t *batch = (brgemm_batch_element_t *)_malloca(
|
||||
num * sizeof(brgemm_batch_element_t));
|
||||
#else
|
||||
#if SC_IS_DPCPP() || (defined(CLANGVERSION) && CLANGVERSION <= 3)
|
||||
#if SC_IS_DPCPP()
|
||||
std::unique_ptr<brgemm_batch_element_t[]> batch_v(
|
||||
new brgemm_batch_element_t[num]);
|
||||
brgemm_batch_element_t *batch = batch_v.get();
|
||||
@ -930,7 +930,7 @@ SC_API void dnnl_brgemm_list_call(brgemm_kernel_info *brg_desc,
|
||||
brgemm_batch_element_t *batch = (brgemm_batch_element_t *)_malloca(
|
||||
batch_num * sizeof(brgemm_batch_element_t));
|
||||
#else
|
||||
#if SC_IS_DPCPP() || (defined(CLANGVERSION) && CLANGVERSION <= 3)
|
||||
#if SC_IS_DPCPP()
|
||||
std::unique_ptr<brgemm_batch_element_t[]> batch_v(
|
||||
new brgemm_batch_element_t[batch_num]);
|
||||
brgemm_batch_element_t *batch = batch_v.get();
|
||||
@ -995,7 +995,7 @@ SC_API void dnnl_brgemm_list_call_postops(brgemm_kernel_info *brg_desc,
|
||||
brgemm_batch_element_t *batch = (brgemm_batch_element_t *)_malloca(
|
||||
batch_num * sizeof(brgemm_batch_element_t));
|
||||
#else
|
||||
#if SC_IS_DPCPP() || (defined(CLANGVERSION) && CLANGVERSION <= 3)
|
||||
#if SC_IS_DPCPP()
|
||||
std::unique_ptr<brgemm_batch_element_t[]> batch_v(
|
||||
new brgemm_batch_element_t[batch_num]);
|
||||
brgemm_batch_element_t *batch = batch_v.get();
|
||||
@ -1087,7 +1087,7 @@ SC_API int dnnl_brgemm_init_update(const void *A, const void *B, void *C,
|
||||
brgemm_batch_element_t *batch = (brgemm_batch_element_t *)_malloca(
|
||||
batch_num * sizeof(brgemm_batch_element_t));
|
||||
#else
|
||||
#if SC_IS_DPCPP() || (defined(CLANGVERSION) && CLANGVERSION <= 3)
|
||||
#if SC_IS_DPCPP()
|
||||
std::unique_ptr<brgemm_batch_element_t[]> batch_v(
|
||||
new brgemm_batch_element_t[batch_num]);
|
||||
brgemm_batch_element_t *batch = batch_v.get();
|
||||
@ -1154,7 +1154,7 @@ SC_API int dnnl_brgemm_update(const void *A, const void *B, void *C, int num,
|
||||
brgemm_batch_element_t *batch = (brgemm_batch_element_t *)_malloca(
|
||||
batch_num * sizeof(brgemm_batch_element_t));
|
||||
#else
|
||||
#if SC_IS_DPCPP() || (defined(CLANGVERSION) && CLANGVERSION <= 3)
|
||||
#if SC_IS_DPCPP()
|
||||
std::unique_ptr<brgemm_batch_element_t[]> batch_v(
|
||||
new brgemm_batch_element_t[batch_num]);
|
||||
brgemm_batch_element_t *batch = batch_v.get();
|
||||
@ -1214,7 +1214,7 @@ static int dnnl_brgemm_list_update_func(const void **A_list,
|
||||
brgemm_batch_element_t *batch = (brgemm_batch_element_t *)_malloca(
|
||||
batch_num * sizeof(brgemm_batch_element_t));
|
||||
#else
|
||||
#if SC_IS_DPCPP() || (defined(CLANGVERSION) && CLANGVERSION <= 3)
|
||||
#if SC_IS_DPCPP()
|
||||
std::unique_ptr<brgemm_batch_element_t[]> batch_v(
|
||||
new brgemm_batch_element_t[batch_num]);
|
||||
brgemm_batch_element_t *batch = batch_v.get();
|
||||
|
Reference in New Issue
Block a user