diff --git a/aten/src/ATen/native/cpu/AtomicAddFloat.h b/aten/src/ATen/native/cpu/AtomicAddFloat.h index db96e1760de5..6ea873dbc517 100644 --- a/aten/src/ATen/native/cpu/AtomicAddFloat.h +++ b/aten/src/ATen/native/cpu/AtomicAddFloat.h @@ -1,14 +1,20 @@ #ifndef ATOMIC_ADD_FLOAT #define ATOMIC_ADD_FLOAT -#if (defined(__x86_64__) || defined(__i386__)) +#if (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__)) #include -#else -#define _mm_pause() #endif #include +#ifdef __aarch64__ +static __inline void _mm_pause() { + __asm__ __volatile__("yield;" : : : "memory"); +} +#else +#define _mm_pause() +#endif + static inline void cpu_atomic_add_float(float* dst, float fvalue) { typedef union { diff --git a/aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp b/aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp index b7d8a89f4349..3c6cb0b4b917 100644 --- a/aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp +++ b/aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp @@ -3790,8 +3790,8 @@ void quantize_tensor_per_channel_impl( // channels_last contig. // If axis = 0 and channels_last contig, implementation for channels // first (NCHW) works. - for (const auto b : c10::irange(batches)) { - for (const auto e : c10::irange(elements_per_channel)) { + for (const auto b C10_UNUSED : c10::irange(batches)) { + for (const auto e C10_UNUSED : c10::irange(elements_per_channel)) { uint32_t c = 0; while (c + 8 < channels) { const int16x8_t vzero_point = vld1q_s16(&zero_points_int16t[c]); @@ -3821,7 +3821,7 @@ void quantize_tensor_per_channel_impl( } } } else { - for (const auto b : c10::irange(batches)) { + for (const auto b C10_UNUSED : c10::irange(batches)) { for (const auto c : c10::irange(channels)) { uint32_t e = 0; const int16x8_t vzero_point = vdupq_n_s16(zero_points_int16t[c]); diff --git a/defs.bzl b/defs.bzl index a40ee4b9091d..1ccc35f1e839 100644 --- a/defs.bzl +++ b/defs.bzl @@ -1,10 +1,3 @@ -def get_sleef_arch_deps(): - return [ - ("x86_64", [ - "third-party//sleef:sleef", - ]), - ] - def get_blas_gomp_arch_deps(): return [ ("x86_64", [