mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Number of OSS PR were reverted, because new signed-unsigned comparison warnings, which are treated as errors in some internal builds.
Not sure how those selective rules are applied, but this PR removes `-Wno-sign-compare` from PyTorch codebase.
The only tricky part in this PR, as making sure that non-ASCII character detection works for both signed and unsigned chars here:
6e3d51b08a/torch/csrc/jit/serialization/python_print.cpp (L926)
Exclude several files from sign-compare if flash attention is used, due to the violation in cutlass, to be fixed by https://github.com/NVIDIA/cutlass/pull/869
Do not try to fix sign compare violations in caffe2 codebase
Pull Request resolved: https://github.com/pytorch/pytorch/pull/96723
Approved by: https://github.com/albanD
114 lines
2.5 KiB
C++
114 lines
2.5 KiB
C++
#include "caffe2/perfkernels/common.h"
|
|
|
|
#include <algorithm>
|
|
#include <cstdint>
|
|
#include <cmath>
|
|
|
|
namespace caffe2 {
|
|
|
|
namespace {
|
|
template <typename T>
|
|
void BoxCoxNaive(
|
|
std::size_t N,
|
|
std::size_t D,
|
|
const T* data_ptr,
|
|
const T* __restrict lambda1_ptr,
|
|
const T* __restrict lambda2_ptr,
|
|
T* output_ptr) {
|
|
constexpr T k_eps = static_cast<T>(1e-6);
|
|
|
|
for (std::size_t i = 0; i < N; i++) {
|
|
for (std::size_t j = 0; j < D; j++, data_ptr++, output_ptr++) {
|
|
T lambda1_v = lambda1_ptr[j];
|
|
T lambda2_v = lambda2_ptr[j];
|
|
T tmp = std::max(*data_ptr + lambda2_v, k_eps);
|
|
if (lambda1_v == 0) {
|
|
*output_ptr = std::log(tmp);
|
|
} else {
|
|
T lambda_1 = 1 / lambda1_v;
|
|
T pow = std::pow(tmp, lambda1_v);
|
|
*output_ptr = lambda_1 * pow - lambda_1;
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
#if defined(CAFFE2_PERF_WITH_AVX2) && defined(CAFFE2_PERF_USE_MKL)
|
|
namespace details {
|
|
template <typename T>
|
|
void compute_batch_box_cox__avx2_fma(
|
|
std::size_t N,
|
|
std::size_t D,
|
|
std::size_t block_size,
|
|
const T* data_ptr,
|
|
const T* __restrict lambda1_ptr,
|
|
const T* __restrict lambda2_ptr,
|
|
T* output_ptr);
|
|
|
|
extern template
|
|
void compute_batch_box_cox__avx2_fma<float>(
|
|
std::size_t N,
|
|
std::size_t D,
|
|
std::size_t block_size,
|
|
const float* self_data,
|
|
const float* __restrict lambda1_data,
|
|
const float* __restrict lambda2_data,
|
|
float* output_data);
|
|
|
|
extern template
|
|
void compute_batch_box_cox__avx2_fma<double>(
|
|
std::size_t N,
|
|
std::size_t D,
|
|
std::size_t block_size,
|
|
const double* self_data,
|
|
const double* __restrict lambda1_data,
|
|
const double* __restrict lambda2_data,
|
|
double* output_data);
|
|
} // namespace detail
|
|
#endif
|
|
|
|
template <typename T>
|
|
void compute_batch_box_cox(
|
|
std::size_t N,
|
|
std::size_t D,
|
|
std::size_t block_size,
|
|
const T* data,
|
|
const T* lambda1_data,
|
|
const T* lambda2_data,
|
|
T* output_data) {
|
|
#ifdef CAFFE2_PERF_WITH_AVX2
|
|
AVX2_FMA_DO(
|
|
details::compute_batch_box_cox,
|
|
N,
|
|
D,
|
|
block_size,
|
|
data,
|
|
lambda1_data,
|
|
lambda2_data,
|
|
output_data);
|
|
#endif
|
|
BoxCoxNaive<T>(N, D, data, lambda1_data, lambda2_data, output_data);
|
|
}
|
|
|
|
template void compute_batch_box_cox<float>(
|
|
std::size_t N,
|
|
std::size_t D,
|
|
std::size_t block_size,
|
|
const float* data,
|
|
const float* lambda1_data,
|
|
const float* lambda2_data,
|
|
float* output_data);
|
|
|
|
template void compute_batch_box_cox<double>(
|
|
std::size_t N,
|
|
std::size_t D,
|
|
std::size_t block_size,
|
|
const double* data,
|
|
const double* lambda1_data,
|
|
const double* lambda2_data,
|
|
double* output_data);
|
|
|
|
} // namespace caffe2
|