build: cmake: unsupported compilers cleanup

This commit is contained in:
Palicki, Stefan
2025-01-02 15:35:03 -08:00
committed by Stefan Palicki
parent c7827a8f3a
commit 664b22abd5
33 changed files with 171 additions and 277 deletions

View File

@ -295,9 +295,9 @@ Runtime-specific dependencies:
### Validated Configurations
CPU engine was validated on RedHat\* Enterprise Linux 8 with
* GNU Compiler Collection 5.4, 6.1, 7.2, 8.1, 9.1, 11.1, 11.3
* Clang\* 7.1, 8.0, 9.0, 14.0.6
x86-64 CPU engine was validated on RedHat\* Enterprise Linux 8 with
* GNU Compiler Collection 8.5, 9.5, 11.1, 11.3
* Clang\* 11.0, 14.0.6
* [Intel oneAPI DPC++/C++ Compiler] 2024.0
on Windows Server\* 2019 with
@ -307,16 +307,19 @@ on Windows Server\* 2019 with
on macOS 11 (Big Sur) with
* Apple LLVM version 13.0
on Ubuntu 20.04 AArch64 with
* GNU Compiler Collection 7.0, 8.0, 9.0, 10.0
* Clang\* 9.0, 17.0
AArch64 CPU engine was validated on Ubuntu 22.04 with
* GNU Compiler Collection 10.0, 13.0
* Clang\* 17.0
* [Arm Compiler for Linux] 24.04
* [Arm Compute Library (ACL)] built for armv8-a arch, latest stable version
available at the time of release
on macOS 14 (Sonoma) with
* Apple LLVM version 15.0
GPU engine was validated on Ubuntu\* 22.04 with
* GNU Compiler Collection 7.2, 8.1, and 9.1
* Clang 7.1, 8.0, 9.0
* GNU Compiler Collection 8.5, and 9.5
* Clang 11.0
* [Intel oneAPI DPC++/C++ Compiler] 2024.0
* [Intel Software for General Purpose GPU capabilities] latest stable version
available at the time of release

View File

@ -1,5 +1,5 @@
#===============================================================================
# Copyright 2017-2024 Intel Corporation
# Copyright 2017-2025 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -52,13 +52,6 @@ if(NOT OpenMP_CXX_FOUND AND MSVC AND CMAKE_CXX_COMPILER_ID MATCHES "(Clang|Intel
# The ICX driver doesn't link OpenMP library even if `/Qopenmp`
# was specified.
set(OpenMP_FLAGS "/Qopenmp -Xclang --dependent-lib=libiomp5md")
else()
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "10.0")
# version < 10 can't pass cl-style `/openmp` flag
set(OpenMP_FLAGS "-Xclang -fopenmp")
# ... and requires explicit linking against omp library
set(OpenMP_CXX_LIBRARIES "libomp.lib")
endif()
endif()
set(OpenMP_C_FLAGS ${OpenMP_FLAGS})
set(OpenMP_CXX_FLAGS ${OpenMP_FLAGS})

View File

@ -1,5 +1,5 @@
#===============================================================================
# Copyright 2017-2024 Intel Corporation
# Copyright 2017-2025 Intel Corporation
# Copyright 2021 FUJITSU LIMITED
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -30,14 +30,10 @@ macro(sdl_unix_common_ccxx_flags var)
append(${var} "-fPIC -Wformat -Wformat-security")
endmacro()
macro(sdl_gnu_common_ccxx_flags var gnu_version)
if(${gnu_version} VERSION_LESS 4.9)
append(${var} "-fstack-protector-all")
else()
append(${var} "-fstack-protector-strong")
if(NOT (${gnu_version} VERSION_LESS 8.0) AND (DNNL_TARGET_ARCH STREQUAL "X64"))
append(${var} "-fcf-protection=full")
endif()
macro(sdl_gnu_common_ccxx_flags var)
append(${var} "-fstack-protector-strong")
if(DNNL_TARGET_ARCH STREQUAL "X64")
append(${var} "-fcf-protection=full")
endif()
endmacro()
@ -65,7 +61,7 @@ if(UNIX)
append(ONEDNN_SDL_COMPILER_FLAGS "-D_FORTIFY_SOURCE=2")
endif()
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
sdl_gnu_common_ccxx_flags(ONEDNN_SDL_COMPILER_FLAGS CMAKE_CXX_COMPILER_VERSION)
sdl_gnu_common_ccxx_flags(ONEDNN_SDL_COMPILER_FLAGS)
sdl_gnu_src_ccxx_flags(CMAKE_SRC_CCXX_FLAGS)
sdl_gnu_example_ccxx_flags(CMAKE_EXAMPLE_CCXX_FLAGS)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "(Apple)?[Cc]lang")

View File

@ -1,5 +1,5 @@
#===============================================================================
# Copyright 2019-2024 Intel Corporation
# Copyright 2019-2025 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -36,11 +36,7 @@ if("${DNNL_CODE_COVERAGE}" STREQUAL "GCOV")
message(FATAL_ERROR "GCOV not found in path")
endif()
if(CMAKE_CXX_COMPILER_ID MATCHES "(Apple)?[Cc]lang")
if("${CMAKE_CXX_COMPILER_VERSION}" VERSION_LESS 3)
message(FATAL_ERROR "Clang version must be 3.0.0 or greater! Aborting...")
endif()
elseif(NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
if(NOT CMAKE_CXX_COMPILER_ID MATCHES "(Apple)?[Cc]lang|GNU")
message(FATAL_ERROR "Unsupported compiler: ${CMAKE_CXX_COMPILER_ID}")
endif()

View File

@ -1,5 +1,5 @@
#===============================================================================
# Copyright 2021-2024 Intel Corporation
# Copyright 2021-2025 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -78,7 +78,7 @@ if(DPCPP_HOST_COMPILER_KIND MATCHES "^(GNU|CLANG)$")
if(DPCPP_HOST_COMPILER_KIND STREQUAL "GNU")
platform_gnu_nowarn_ccxx_flags(DPCPP_CXX_NOWARN_FLAGS ${DPCPP_HOST_COMPILER_MAJOR_VER}.${DPCPP_HOST_COMPILER_MINOR_VER})
sdl_gnu_common_ccxx_flags(DPCPP_HOST_COMPILER_OPTS DPCPP_HOST_COMPILER_VER)
sdl_gnu_common_ccxx_flags(DPCPP_HOST_COMPILER_OPTS)
sdl_gnu_src_ccxx_flags(DPCPP_SRC_CXX_FLAGS)
sdl_gnu_example_ccxx_flags(DPCPP_EXAMPLE_CXX_FLAGS)

View File

@ -1,5 +1,5 @@
#===============================================================================
# Copyright 2024 Intel Corporation
# Copyright 2024-2025 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -98,13 +98,13 @@ message(STATUS "Host compiler version: ${DPCPP_HOST_COMPILER_MAJOR_VER}.${DPCPP_
# Check the version of the provided host compiler.
if(DPCPP_HOST_COMPILER_KIND STREQUAL "GNU")
if((DPCPP_HOST_COMPILER_MAJOR_VER LESS 7) OR (DPCPP_HOST_COMPILER_MAJOR_VER EQUAL 7 AND DPCPP_HOST_COMPILER_MINOR_VER LESS 4))
message(FATAL_ERROR "The minimum version of ${DPCPP_HOST_COMPILER_KIND} host compiler is 7.4.")
endif()
endif()
if(DPCPP_HOST_COMPILER_KIND STREQUAL "CLANG")
if(DPCPP_HOST_COMPILER_MAJOR_VER LESS 8)
message(FATAL_ERROR "The minimum version of ${DPCPP_HOST_COMPILER_KIND} host compiler is 8.0.")
endif()
endif()
if(DPCPP_HOST_COMPILER_KIND STREQUAL "CLANG")
if(DPCPP_HOST_COMPILER_MAJOR_VER LESS 11)
message(FATAL_ERROR "The minimum version of ${DPCPP_HOST_COMPILER_KIND} host compiler is 11.0.")
endif()
endif()

View File

@ -362,12 +362,6 @@ elseif(UNIX OR MINGW)
append(CMAKE_CCXX_FLAGS "-Wno-ignored-attributes")
endif()
# XXX: Suppress an erroneous warning of nested lambda visibility
# exceeding that of the containing class (GCC Bugzilla - Bug 80947).
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8 AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 6.0)
append(CMAKE_CCXX_FLAGS "-Wno-attributes")
endif()
if(DNNL_TARGET_ARCH STREQUAL "AARCH64")
if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
set(DEF_ARCH_OPT_FLAGS "-O3")
@ -486,10 +480,5 @@ if (DNNL_TARGET_ARCH STREQUAL "RV64")
message(STATUS "DNNL_RISCV_USE_RVV_INTRINSICS: ${DNNL_RISCV_USE_RVV_INTRINSICS}")
endif()
# Old compiler versions do not support warnings available on newer compilers.
if(CMAKE_CXX_COMPILER_ID MATCHES "(Apple)?[Cc]lang" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8.0.0)
append(CMAKE_CCXX_FLAGS "-Wno-unknown-warning-option")
endif()
append(CMAKE_C_FLAGS "${CMAKE_CCXX_FLAGS}")
append(CMAKE_CXX_FLAGS "${CMAKE_CCXX_FLAGS}")

View File

@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2020-2024 Intel Corporation
* Copyright 2020-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -17,11 +17,6 @@
#ifndef COMPILER_WORKAROUNDS_HPP
#define COMPILER_WORKAROUNDS_HPP
#if (defined __GNUC__) && (!defined(__INTEL_COMPILER)) \
&& (!defined(__INTEL_LLVM_COMPILER)) && (!defined(__clang__major__))
#define NEED_GCC_WA_CHECK 1
#endif
// Workaround 01: clang.
//
// Clang has an issue [1] with `#pragma omp simd` that might lead to segfault.
@ -32,7 +27,7 @@
// vectorization for clang altogether for now.
//
// [1] https://bugs.llvm.org/show_bug.cgi?id=48104
#if (defined __clang_major__) && (__clang_major__ >= 6)
#if (defined __clang_major__) && (__clang_major__ < 13)
#define CLANG_WA_01_SAFE_TO_USE_OMP_SIMD 0
#else
#define CLANG_WA_01_SAFE_TO_USE_OMP_SIMD 1
@ -40,40 +35,15 @@
// Workaround 02: clang.
//
// Clang 6+ generates incorrect code with OMP_SIMD in some particular cases.
// Clang generates incorrect code with OMP_SIMD in some particular cases.
// Unlike CLANG_WA_01_SAFE_TO_USE_OMP_SIMD, the issue happens even with -O3.
#if (defined __clang_major__) && (__clang_major__ >= 6)
#if (defined __clang_major__) && (__clang_major__ < 13)
#define CLANG_WA_02_SAFE_TO_USE_OMP_SIMD 0
#else
#define CLANG_WA_02_SAFE_TO_USE_OMP_SIMD 1
#endif
// Workaround 03: GCC
//
// For very large functions with too much control flow (i.e. if, switch, goto
// statements), GCC 7 may struggle to perform optimizations based on tree
// dominator (i.e. -ftree-dominator-opts, which is enabled with O1), thereby
// producing an internal compiler error (ICE). Specifically, it seems that the
// jump threading optimization is the culprit, which cannot be disabled on its
// own. There is no reliable way to reproduce the ICE, therefore it is not clear
// which __GCC_MINOR__ version fixes issue.
#if (defined NEED_GCC_WA_CHECK) && (__GNUC__ == 7)
#define GCC_WA_NO_TREE_DOMINATOR_OPTS 1
#else
#define GCC_WA_NO_TREE_DOMINATOR_OPTS 0
#endif
// Workaround 05: GCC
//
// NOTE: inside lambda, type cast variables captured by reference using
// either c-like "(type)var" or functional "type(var)" notation in order
// to avoid gcc7 bug with c++14 standard
// (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83204).
#if (defined NEED_GCC_WA_CHECK) && (__GNUC__ <= 7)
#define GCC_WA_LAMBDA_C_CAST
#endif
// Workaround 05: c++17 vs c++20
// Workaround 03: MSVC c++17 vs c++20
//
// C++17/20 are contradictory wrt capturing this and using default '=' capture.
// - C++17 and before have to return a warning for the [=, this] capture as

View File

@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2017-2023 Intel Corporation
* Copyright 2017-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -192,18 +192,6 @@ inline int dnnl_get_current_num_threads() {
#define PRAGMA_OMP_SIMD(...) PRAGMA_MACRO(CHAIN2(omp, simd __VA_ARGS__))
#endif // defined(_MSC_VER) && !defined(__clang__) && !defined(__INTEL_COMPILER)
// process simdlen; it is supported for Clang >= 3.9; ICC >= 17.0; GCC >= 6.1
// No support on Windows.
#if (defined(__clang_major__) \
&& (__clang_major__ < 3 \
|| (__clang_major__ == 3 && __clang_minor__ < 9))) \
|| (defined(__INTEL_COMPILER) && __INTEL_COMPILER < 1700) \
|| (!defined(__INTEL_COMPILER) && !defined(__clang__) \
&& (defined(_MSC_VER) || __GNUC__ < 6 \
|| (__GNUC__ == 6 && __GNUC_MINOR__ < 1)))
#define simdlen(x)
#endif // long simdlen if
namespace dnnl {
namespace impl {

View File

@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2024 Intel Corporation
* Copyright 2024-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -42,10 +42,7 @@ struct runtime_scales_t;
const runtime_scales_t &default_runtime_scale();
struct runtime_scales_t : public c_compatible {
// Clang-3.8.1 raises an error for a default initialization of a const
// object. Const runtime_scales_t object is used as default_scales.
// runtime_scales_t() = default;
runtime_scales_t() {}
runtime_scales_t() = default;
runtime_scales_t &operator=(const runtime_scales_t &rhs) {
mask_ = rhs.mask_;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2019-2024 Intel Corporation
* Copyright 2019-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -271,11 +271,7 @@ status_t gemm_bf16_matmul_t<dst_type>::execute_ref(
const dim_t acc_stride = gemm_based::get_scratchpad_block_elements(
batch, M, N, use_single_gemm_call, nthr);
#ifdef GCC_WA_LAMBDA_C_CAST
parallel(nthr, [= WA_THIS_COPY_CAPTURE, &st](int ithr, int nthr) {
#else
parallel(nthr, [&](int ithr, int nthr) {
#endif
size_t t_work_start {0}, t_work_end {0};
balance211(work_amount, nthr, ithr, t_work_start, t_work_end);

View File

@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2019-2024 Intel Corporation
* Copyright 2019-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -297,11 +297,7 @@ status_t gemm_x8s8s32x_matmul_t::execute_ref(const exec_ctx_t &ctx) const {
bool postops_in_matmul = need_post_processing(pd(), dst_zero_point_f32);
assert(IMPLICATION(postops_in_matmul, params.has_pp_kernel_));
#ifdef GCC_WA_LAMBDA_C_CAST
parallel(nthr, [= WA_THIS_COPY_CAPTURE, &st](int ithr, int nthr) {
#else
parallel(nthr, [&](int ithr, int nthr) {
#endif
size_t t_work_start {0}, t_work_end {0};
balance211(work_amount, nthr, ithr, t_work_start, t_work_end);

View File

@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2020-2021 Intel Corporation
* Copyright 2020-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -66,7 +66,7 @@ protected:
void copy_n(int unroll_n, Xbyak::Label &epilogue);
void copy_ns(int unroll_n, Xbyak::Label &epilogue);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
private:
static const int offset_a_ = 0, offset_b_ = 0;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2020-2021 Intel Corporation
* Copyright 2020-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -31,7 +31,7 @@ public:
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_amx_gemm_kern);
protected:
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
const int typea;
const int typeb;
const int typec;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2019-2024 Intel Corporation
* Copyright 2019-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -26,7 +26,7 @@ namespace x64 {
class jit_avx512_core_s16_48x8_copy_an_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_48x8_copy_an_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx512_core_s16_48x8_copy_an_kern();
@ -34,7 +34,7 @@ public:
class jit_avx512_core_s16_48x8_copy_at_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_48x8_copy_at_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx512_core_s16_48x8_copy_at_kern();
@ -42,7 +42,7 @@ public:
class jit_avx512_core_s16_48x8_copy_bn_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_48x8_copy_bn_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx512_core_s16_48x8_copy_bn_kern();
@ -50,7 +50,7 @@ public:
class jit_avx512_core_s16_48x8_copy_bt_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_48x8_copy_bt_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx512_core_s16_48x8_copy_bt_kern();
@ -58,7 +58,7 @@ public:
class jit_avx512_core_s16_24x8_copy_an_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_24x8_copy_an_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx512_core_s16_24x8_copy_an_kern();
@ -66,7 +66,7 @@ public:
class jit_avx512_core_s16_24x8_copy_at_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_24x8_copy_at_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx512_core_s16_24x8_copy_at_kern();
@ -74,7 +74,7 @@ public:
class jit_avx512_core_s16_24x8_copy_bn_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_24x8_copy_bn_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx512_core_s16_24x8_copy_bn_kern();
@ -82,7 +82,7 @@ public:
class jit_avx512_core_s16_24x8_copy_bt_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_24x8_copy_bt_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx512_core_s16_24x8_copy_bt_kern();

View File

@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2019-2021 Intel Corporation
* Copyright 2019-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -58,7 +58,7 @@ protected:
void innerloop(int unroll_m, int unroll_n);
void outerloop(int unroll_x, int unroll_y, Xbyak::Label *&outerloop_label);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
private:
static const int UNROLL_N_ = 8;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2020-2021 Intel Corporation
* Copyright 2020-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -52,7 +52,7 @@ protected:
void outerloop(int unroll_y, Xbyak::Label *&cur_outerloop_label,
Xbyak::Label *&outerloop_end_label);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
private:
static const int UNROLL_M_ = 64;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2019-2024 Intel Corporation
* Copyright 2019-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -26,7 +26,7 @@ namespace x64 {
class jit_avx512_core_f32_copy_an_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_f32_copy_an_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx512_core_f32_copy_an_kern();
@ -34,11 +34,10 @@ public:
class jit_avx512_core_f32_copy_at_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_f32_copy_at_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
void generate_part1(const Xbyak::Label &, const Xbyak::Label &,
const Xbyak::Label &, const Xbyak::Label &) ATTRIBUTE_OPTIMIZE;
void generate_part2(Xbyak::Label, Xbyak::Label, Xbyak::Label,
Xbyak::Label) ATTRIBUTE_OPTIMIZE;
const Xbyak::Label &, const Xbyak::Label &);
void generate_part2(Xbyak::Label, Xbyak::Label, Xbyak::Label, Xbyak::Label);
public:
jit_avx512_core_f32_copy_at_kern();
@ -46,7 +45,7 @@ public:
class jit_avx512_core_f32_copy_bn_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_f32_copy_bn_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx512_core_f32_copy_bn_kern();
@ -54,7 +53,7 @@ public:
class jit_avx512_core_f32_copy_bt_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_f32_copy_bt_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx512_core_f32_copy_bt_kern();
@ -62,7 +61,7 @@ public:
class jit_avx2_f32_copy_an_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_f32_copy_an_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx2_f32_copy_an_kern();
@ -70,7 +69,7 @@ public:
class jit_avx2_f32_copy_at_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_f32_copy_at_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx2_f32_copy_at_kern();
@ -78,7 +77,7 @@ public:
class jit_avx2_f32_copy_bn_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_f32_copy_bn_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx2_f32_copy_bn_kern();
@ -86,7 +85,7 @@ public:
class jit_avx2_f32_copy_bt_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_f32_copy_bt_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx2_f32_copy_bt_kern();
@ -94,7 +93,7 @@ public:
class jit_avx_f32_copy_an_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_f32_copy_an_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx_f32_copy_an_kern();
@ -102,7 +101,7 @@ public:
class jit_avx_f32_copy_at_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_f32_copy_at_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx_f32_copy_at_kern();
@ -110,7 +109,7 @@ public:
class jit_avx_f32_copy_bn_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_f32_copy_bn_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx_f32_copy_bn_kern();
@ -118,7 +117,7 @@ public:
class jit_avx_f32_copy_bt_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_f32_copy_bt_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx_f32_copy_bt_kern();
@ -126,11 +125,10 @@ public:
class jit_avx_kernel_b0_sgemm_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_b0_sgemm_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
void generate_part1(const Xbyak::Label &, const Xbyak::Label &,
const Xbyak::Label &, const Xbyak::Label &) ATTRIBUTE_OPTIMIZE;
void generate_part2(Xbyak::Label, Xbyak::Label, Xbyak::Label,
Xbyak::Label) ATTRIBUTE_OPTIMIZE;
const Xbyak::Label &, const Xbyak::Label &);
void generate_part2(Xbyak::Label, Xbyak::Label, Xbyak::Label, Xbyak::Label);
public:
jit_avx_kernel_b0_sgemm_kern();
@ -138,11 +136,10 @@ public:
class jit_avx_kernel_sgemm_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_sgemm_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate_part1(const Xbyak::Label &, const Xbyak::Label &,
const Xbyak::Label &) ATTRIBUTE_OPTIMIZE;
void generate_part2(
Xbyak::Label &, Xbyak::Label &, Xbyak::Label &) ATTRIBUTE_OPTIMIZE;
void generate() override;
void generate_part1(
const Xbyak::Label &, const Xbyak::Label &, const Xbyak::Label &);
void generate_part2(Xbyak::Label &, Xbyak::Label &, Xbyak::Label &);
public:
jit_avx_kernel_sgemm_kern();
@ -150,7 +147,7 @@ public:
class jit_sse41_f32_copy_an_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_f32_copy_an_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_sse41_f32_copy_an_kern();
@ -158,7 +155,7 @@ public:
class jit_sse41_f32_copy_at_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_f32_copy_at_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_sse41_f32_copy_at_kern();
@ -166,7 +163,7 @@ public:
class jit_sse41_f32_copy_bn_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_f32_copy_bn_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_sse41_f32_copy_bn_kern();
@ -174,7 +171,7 @@ public:
class jit_sse41_f32_copy_bt_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_f32_copy_bt_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_sse41_f32_copy_bt_kern();
@ -182,7 +179,7 @@ public:
class jit_sse41_kernel_b0_sgemm_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_b0_sgemm_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_sse41_kernel_b0_sgemm_kern();
@ -190,7 +187,7 @@ public:
class jit_sse41_kernel_sgemm_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_sgemm_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_sse41_kernel_sgemm_kern();

View File

@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2019-2024 Intel Corporation
* Copyright 2019-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -79,7 +79,7 @@ class jit_avx2_kernel_sgemm_kern : public jit_generator {
void prefetchA_beforeFMA(int um, int un, int k_idx, int n_idx, int m_idx);
void prefetchC_afterBload(int um, int un, int k_idx, int n_idx);
void prefetchC_beforeKloop(int um);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
template <typename T_reg, typename T_desta, typename T_srca>
void loadA_betweenFMAs(int um, int un, int k_idx, int n_idx, int m_idx,

View File

@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2017-2024 Intel Corporation
* Copyright 2017-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -70,7 +70,7 @@ struct xbyak_gemm_t : public jit_generator {
, hasBias(hasBias)
, STACK_K_CAPACITY((STACK_CAPACITY - 256) / (SIZE * UNROLL_M)) {}
void generate() override ATTRIBUTE_OPTIMIZE {
void generate() override {
using namespace Xbyak;
bool isBeta0 = (beta == 0.0);
bool isBetaN = (!isBeta0 && beta != 1.0);

View File

@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2020-2024 Intel Corporation
* Copyright 2020-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -49,7 +49,7 @@ struct xbyak_gemm_smalln_tn_t : public jit_generator {
xbyak_gemm_smalln_tn_t(int N, float beta, float alpha)
: jit_generator(jit_name()), N(N), beta(beta), alpha(alpha) {}
void generate() override ATTRIBUTE_OPTIMIZE {
void generate() override {
using namespace Xbyak;
/**
* numN = 1 : 16 rows of A, 1x16 accumulators

View File

@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2016-2024 Intel Corporation
* Copyright 2016-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -1966,7 +1966,7 @@ struct xbyak_gemm_t : public jit_generator {
if (hasBias) { add(BIAS, unroll_m * SIZE); }
}
void generate() override ATTRIBUTE_OPTIMIZE {
void generate() override {
assert(IMPLICATION(!is_avx2, mayiuse(avx)));
preamble();

View File

@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2019-2021 Intel Corporation
* Copyright 2019-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -40,7 +40,7 @@ protected:
void innerloop(int unroll_m, int unroll_n);
void outerloop(int unroll_x, int unroll_y, Xbyak::Label *&outerloop_label);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
private:
static const int M_UNROLL_ = 16;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2021 Intel Corporation
* Copyright 2021-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -61,7 +61,7 @@ protected:
Xbyak::Label *&cur_outerloop_label,
Xbyak::Label *&outerloop_end_label);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
private:
static const int max_um_vecs_ = 16;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2019-2021 Intel Corporation
* Copyright 2019-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -38,7 +38,7 @@ protected:
void innerloop(int unroll_m, int unroll_n);
void outerloop(int unroll_x, int unroll_y, Xbyak::Label *&outerloop_label);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
private:
static const int M_UNROLL_ = 8;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2019-2024 Intel Corporation
* Copyright 2019-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -32,7 +32,7 @@ namespace x64 {
class jit_avx512_core_u8_copy_an_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_an_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx512_core_u8_copy_an_kern();
@ -40,7 +40,7 @@ public:
class jit_avx512_core_u8_copy_at_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_at_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx512_core_u8_copy_at_kern();
@ -48,7 +48,7 @@ public:
class jit_avx512_core_u8_copy_bn_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_bn_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
bool s8_case;
public:
@ -57,7 +57,7 @@ public:
class jit_avx512_core_u8_copy_bt_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_bt_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
bool s8_case;
public:
@ -66,7 +66,7 @@ public:
class jit_avx512_core_u8_copy_sum_an_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_sum_an_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx512_core_u8_copy_sum_an_kern();
@ -74,7 +74,7 @@ public:
class jit_avx512_core_u8_copy_sum_at_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_sum_at_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx512_core_u8_copy_sum_at_kern();
@ -82,7 +82,7 @@ public:
class jit_avx512_core_u8_copy_sum_bn_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_sum_bn_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
bool s8_case;
public:
@ -91,7 +91,7 @@ public:
class jit_avx512_core_u8_copy_sum_bt_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_sum_bt_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
bool s8_case;
public:
@ -100,7 +100,7 @@ public:
class jit_avx2_vnni_u8_copy_an_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_an_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx2_vnni_u8_copy_an_kern();
@ -108,7 +108,7 @@ public:
class jit_avx2_vnni_u8_copy_at_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_at_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx2_vnni_u8_copy_at_kern();
@ -116,7 +116,7 @@ public:
class jit_avx2_vnni_u8_copy_bn_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_bn_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx2_vnni_u8_copy_bn_kern();
@ -124,7 +124,7 @@ public:
class jit_avx2_vnni_u8_copy_bt_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_bt_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx2_vnni_u8_copy_bt_kern();
@ -132,7 +132,7 @@ public:
class jit_avx2_vnni_u8_copy_sum_an_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_sum_an_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx2_vnni_u8_copy_sum_an_kern();
@ -140,7 +140,7 @@ public:
class jit_avx2_vnni_u8_copy_sum_at_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_sum_at_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx2_vnni_u8_copy_sum_at_kern();
@ -148,7 +148,7 @@ public:
class jit_avx2_vnni_u8_copy_sum_bn_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_sum_bn_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx2_vnni_u8_copy_sum_bn_kern();
@ -156,7 +156,7 @@ public:
class jit_avx2_vnni_u8_copy_sum_bt_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_vnni_u8_copy_sum_bt_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx2_vnni_u8_copy_sum_bt_kern();
@ -164,7 +164,7 @@ public:
class jit_avx2_u8_copy_an_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_an_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx2_u8_copy_an_kern();
@ -172,7 +172,7 @@ public:
class jit_avx2_u8_copy_at_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_at_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx2_u8_copy_at_kern();
@ -180,7 +180,7 @@ public:
class jit_avx2_u8_copy_bn_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_bn_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx2_u8_copy_bn_kern();
@ -188,7 +188,7 @@ public:
class jit_avx2_u8_copy_bt_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_bt_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx2_u8_copy_bt_kern();
@ -196,7 +196,7 @@ public:
class jit_avx2_u8_copy_sum_an_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_sum_an_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx2_u8_copy_sum_an_kern();
@ -204,7 +204,7 @@ public:
class jit_avx2_u8_copy_sum_at_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_sum_at_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx2_u8_copy_sum_at_kern();
@ -212,7 +212,7 @@ public:
class jit_avx2_u8_copy_sum_bn_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_sum_bn_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx2_u8_copy_sum_bn_kern();
@ -220,7 +220,7 @@ public:
class jit_avx2_u8_copy_sum_bt_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_u8_copy_sum_bt_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx2_u8_copy_sum_bt_kern();
@ -228,7 +228,7 @@ public:
class jit_avx_u8_copy_an_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_an_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx_u8_copy_an_kern();
@ -236,7 +236,7 @@ public:
class jit_avx_u8_copy_at_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_at_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx_u8_copy_at_kern();
@ -244,7 +244,7 @@ public:
class jit_avx_u8_copy_bn_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_bn_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx_u8_copy_bn_kern();
@ -252,7 +252,7 @@ public:
class jit_avx_u8_copy_bt_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_bt_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx_u8_copy_bt_kern();
@ -260,7 +260,7 @@ public:
class jit_avx_u8_copy_sum_an_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_sum_an_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx_u8_copy_sum_an_kern();
@ -268,7 +268,7 @@ public:
class jit_avx_u8_copy_sum_at_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_sum_at_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx_u8_copy_sum_at_kern();
@ -276,7 +276,7 @@ public:
class jit_avx_u8_copy_sum_bn_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_sum_bn_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx_u8_copy_sum_bn_kern();
@ -284,7 +284,7 @@ public:
class jit_avx_u8_copy_sum_bt_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_u8_copy_sum_bt_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx_u8_copy_sum_bt_kern();
@ -292,7 +292,7 @@ public:
class jit_avx_kernel_b0_gemm_s8u8s32_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_b0_gemm_s8u8s32_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx_kernel_b0_gemm_s8u8s32_kern();
@ -300,7 +300,7 @@ public:
class jit_avx_kernel_b0_b_gemm_s8u8s32_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_b0_b_gemm_s8u8s32_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx_kernel_b0_b_gemm_s8u8s32_kern();
@ -308,7 +308,7 @@ public:
class jit_avx_kernel_b0_r_gemm_s8u8s32_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_b0_r_gemm_s8u8s32_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx_kernel_b0_r_gemm_s8u8s32_kern();
@ -316,7 +316,7 @@ public:
class jit_avx_kernel_b0_c_gemm_s8u8s32_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_b0_c_gemm_s8u8s32_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx_kernel_b0_c_gemm_s8u8s32_kern();
@ -324,7 +324,7 @@ public:
class jit_avx_kernel_gemm_s8u8s32_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_gemm_s8u8s32_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx_kernel_gemm_s8u8s32_kern();
@ -332,7 +332,7 @@ public:
class jit_avx_kernel_b_gemm_s8u8s32_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_b_gemm_s8u8s32_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx_kernel_b_gemm_s8u8s32_kern();
@ -340,7 +340,7 @@ public:
class jit_avx_kernel_r_gemm_s8u8s32_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_r_gemm_s8u8s32_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx_kernel_r_gemm_s8u8s32_kern();
@ -348,7 +348,7 @@ public:
class jit_avx_kernel_c_gemm_s8u8s32_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_kernel_c_gemm_s8u8s32_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_avx_kernel_c_gemm_s8u8s32_kern();
@ -356,7 +356,7 @@ public:
class jit_sse41_u8_copy_an_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_an_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_sse41_u8_copy_an_kern();
@ -364,7 +364,7 @@ public:
class jit_sse41_u8_copy_at_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_at_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_sse41_u8_copy_at_kern();
@ -372,7 +372,7 @@ public:
class jit_sse41_u8_copy_bn_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_bn_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_sse41_u8_copy_bn_kern();
@ -380,7 +380,7 @@ public:
class jit_sse41_u8_copy_bt_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_bt_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_sse41_u8_copy_bt_kern();
@ -388,7 +388,7 @@ public:
class jit_sse41_u8_copy_sum_an_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_sum_an_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_sse41_u8_copy_sum_an_kern();
@ -396,7 +396,7 @@ public:
class jit_sse41_u8_copy_sum_at_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_sum_at_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_sse41_u8_copy_sum_at_kern();
@ -404,7 +404,7 @@ public:
class jit_sse41_u8_copy_sum_bn_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_sum_bn_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_sse41_u8_copy_sum_bn_kern();
@ -412,7 +412,7 @@ public:
class jit_sse41_u8_copy_sum_bt_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_u8_copy_sum_bt_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_sse41_u8_copy_sum_bt_kern();
@ -420,7 +420,7 @@ public:
class jit_sse41_kernel_b0_gemm_s8u8s32_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_b0_gemm_s8u8s32_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_sse41_kernel_b0_gemm_s8u8s32_kern();
@ -428,7 +428,7 @@ public:
class jit_sse41_kernel_b0_b_gemm_s8u8s32_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_b0_b_gemm_s8u8s32_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_sse41_kernel_b0_b_gemm_s8u8s32_kern();
@ -436,7 +436,7 @@ public:
class jit_sse41_kernel_b0_r_gemm_s8u8s32_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_b0_r_gemm_s8u8s32_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_sse41_kernel_b0_r_gemm_s8u8s32_kern();
@ -444,7 +444,7 @@ public:
class jit_sse41_kernel_b0_c_gemm_s8u8s32_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_b0_c_gemm_s8u8s32_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_sse41_kernel_b0_c_gemm_s8u8s32_kern();
@ -452,7 +452,7 @@ public:
class jit_sse41_kernel_gemm_s8u8s32_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_gemm_s8u8s32_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_sse41_kernel_gemm_s8u8s32_kern();
@ -460,7 +460,7 @@ public:
class jit_sse41_kernel_b_gemm_s8u8s32_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_b_gemm_s8u8s32_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_sse41_kernel_b_gemm_s8u8s32_kern();
@ -468,7 +468,7 @@ public:
class jit_sse41_kernel_r_gemm_s8u8s32_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_r_gemm_s8u8s32_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_sse41_kernel_r_gemm_s8u8s32_kern();
@ -476,7 +476,7 @@ public:
class jit_sse41_kernel_c_gemm_s8u8s32_kern : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse41_kernel_c_gemm_s8u8s32_kern);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
public:
jit_sse41_kernel_c_gemm_s8u8s32_kern();

View File

@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2018-2021 Intel Corporation
* Copyright 2018-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -51,7 +51,7 @@ protected:
void innerloop(int unroll_m, int unroll_n);
void outerloop(int unroll_x, int unroll_y, Xbyak::Label *&outerloop_label);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
private:
static const int IGEMM_UNROLL_N_ = 4;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2018-2021 Intel Corporation
* Copyright 2018-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -50,7 +50,7 @@ protected:
void innerloop(int unroll_m, int unroll_n);
void outerloop(int unroll_x, int unroll_y, Xbyak::Label *&outerloop_label);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
private:
static const int IGEMM_UNROLL_M_ = 48;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2019-2024 Intel Corporation
* Copyright 2019-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -44,7 +44,7 @@ class jit_avx512_core_gemv_s8x8s32_kern : public jit_generator {
void shuffle_and_add(
Xbyak::Zmm, Xbyak::Zmm, Xbyak::Zmm, Xbyak::Zmm, Xbyak::Zmm);
void update_c(int, Xbyak::Reg64, int, Xbyak::Opmask);
void generate() override ATTRIBUTE_OPTIMIZE;
void generate() override;
cpu_isa_t isa = isa_undef;
ver_t ver = ver_t::undef;

View File

@ -39,12 +39,6 @@
#define OFFSET_SHADOWSPACE 0x28
#endif
#if GCC_WA_NO_TREE_DOMINATOR_OPTS
#define ATTRIBUTE_OPTIMIZE __attribute__((optimize("no-tree-dominator-opts")))
#else
#define ATTRIBUTE_OPTIMIZE
#endif
#define DECLARE_CPU_JIT_AUX_FUNCTIONS(gen_name) \
const char *name() const override { return STRINGIFY(gen_name); } \
const char *source_file() const override { return __FILE__; } \

View File

@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2019-2023 Intel Corporation
* Copyright 2019-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -436,13 +436,6 @@ status_t jit_xf16_sum_t<src_data_type, dst_data_type, isa>::execute(
const dim_t num_blocks = nelems / num_elems_in_block;
const dim_t tail = nelems % num_elems_in_block;
#if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8 \
&& __GNUC_PATCHLEVEL__ == 3
// GCC issues a false positive warning 'array subscript is above array bounds'
// with gcc 4.8.3 + -march=native option, so disable it for now
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Warray-bounds"
#endif
parallel(0, [&](const int ithr, const int nthr) {
dim_t start {0}, end {0};
balance211(num_blocks, nthr, ithr, start, end);
@ -477,10 +470,6 @@ status_t jit_xf16_sum_t<src_data_type, dst_data_type, isa>::execute(
(*kernel_)(&arg);
}
});
#if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8 \
&& __GNUC_PATCHLEVEL__ == 3
#pragma GCC diagnostic pop
#endif
return status::success;
}

View File

@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2021-2024 Intel Corporation
* Copyright 2021-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -25,12 +25,6 @@
#include <vector>
#include <initializer_list>
#if defined(__GNUC__) && __GNUC__ == 7
// GCC 7.x issues a false positive warning 'array subscript is above array bounds'
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Warray-bounds"
#endif
namespace dnnl {
namespace impl {
namespace gpu {
@ -849,7 +843,3 @@ bank_conflict_allocation_t bank_conflict_allocation_t::create(
} // namespace gpu
} // namespace impl
} // namespace dnnl
#if defined(__GNUC__) && __GNUC__ == 7
#pragma GCC diagnostic pop
#endif

View File

@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2020-2024 Intel Corporation
* Copyright 2020-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -825,7 +825,7 @@ SC_API void dnnl_brgemm_call(brgemm_kernel_info *brg_desc, const void *A,
brgemm_batch_element_t *batch = (brgemm_batch_element_t *)_malloca(
num * sizeof(brgemm_batch_element_t));
#else
#if SC_IS_DPCPP() || (defined(CLANGVERSION) && CLANGVERSION <= 3)
#if SC_IS_DPCPP()
std::unique_ptr<brgemm_batch_element_t[]> batch_v(
new brgemm_batch_element_t[num]);
brgemm_batch_element_t *batch = batch_v.get();
@ -876,7 +876,7 @@ SC_API void dnnl_brgemm_call_postops(brgemm_kernel_info *brg_desc,
brgemm_batch_element_t *batch = (brgemm_batch_element_t *)_malloca(
num * sizeof(brgemm_batch_element_t));
#else
#if SC_IS_DPCPP() || (defined(CLANGVERSION) && CLANGVERSION <= 3)
#if SC_IS_DPCPP()
std::unique_ptr<brgemm_batch_element_t[]> batch_v(
new brgemm_batch_element_t[num]);
brgemm_batch_element_t *batch = batch_v.get();
@ -930,7 +930,7 @@ SC_API void dnnl_brgemm_list_call(brgemm_kernel_info *brg_desc,
brgemm_batch_element_t *batch = (brgemm_batch_element_t *)_malloca(
batch_num * sizeof(brgemm_batch_element_t));
#else
#if SC_IS_DPCPP() || (defined(CLANGVERSION) && CLANGVERSION <= 3)
#if SC_IS_DPCPP()
std::unique_ptr<brgemm_batch_element_t[]> batch_v(
new brgemm_batch_element_t[batch_num]);
brgemm_batch_element_t *batch = batch_v.get();
@ -995,7 +995,7 @@ SC_API void dnnl_brgemm_list_call_postops(brgemm_kernel_info *brg_desc,
brgemm_batch_element_t *batch = (brgemm_batch_element_t *)_malloca(
batch_num * sizeof(brgemm_batch_element_t));
#else
#if SC_IS_DPCPP() || (defined(CLANGVERSION) && CLANGVERSION <= 3)
#if SC_IS_DPCPP()
std::unique_ptr<brgemm_batch_element_t[]> batch_v(
new brgemm_batch_element_t[batch_num]);
brgemm_batch_element_t *batch = batch_v.get();
@ -1087,7 +1087,7 @@ SC_API int dnnl_brgemm_init_update(const void *A, const void *B, void *C,
brgemm_batch_element_t *batch = (brgemm_batch_element_t *)_malloca(
batch_num * sizeof(brgemm_batch_element_t));
#else
#if SC_IS_DPCPP() || (defined(CLANGVERSION) && CLANGVERSION <= 3)
#if SC_IS_DPCPP()
std::unique_ptr<brgemm_batch_element_t[]> batch_v(
new brgemm_batch_element_t[batch_num]);
brgemm_batch_element_t *batch = batch_v.get();
@ -1154,7 +1154,7 @@ SC_API int dnnl_brgemm_update(const void *A, const void *B, void *C, int num,
brgemm_batch_element_t *batch = (brgemm_batch_element_t *)_malloca(
batch_num * sizeof(brgemm_batch_element_t));
#else
#if SC_IS_DPCPP() || (defined(CLANGVERSION) && CLANGVERSION <= 3)
#if SC_IS_DPCPP()
std::unique_ptr<brgemm_batch_element_t[]> batch_v(
new brgemm_batch_element_t[batch_num]);
brgemm_batch_element_t *batch = batch_v.get();
@ -1214,7 +1214,7 @@ static int dnnl_brgemm_list_update_func(const void **A_list,
brgemm_batch_element_t *batch = (brgemm_batch_element_t *)_malloca(
batch_num * sizeof(brgemm_batch_element_t));
#else
#if SC_IS_DPCPP() || (defined(CLANGVERSION) && CLANGVERSION <= 3)
#if SC_IS_DPCPP()
std::unique_ptr<brgemm_batch_element_t[]> batch_v(
new brgemm_batch_element_t[batch_num]);
brgemm_batch_element_t *batch = batch_v.get();