C10_UNUSED to [[maybe_unused]] (#6357) (#138364)

Summary: Pull Request resolved: https://github.com/pytorch/executorch/pull/6357

Pull Request resolved: https://github.com/pytorch/pytorch/pull/138364
Approved by: https://github.com/Skylion007, https://github.com/eqy
This commit is contained in:
Richard Barnes
2024-10-19 13:17:43 +00:00
committed by PyTorch MergeBot
parent 2f6a70bfea
commit fddabc6e0b
139 changed files with 834 additions and 690 deletions

View File

@ -68,7 +68,7 @@ struct strided_tensor_iter_fixed {
strided_tensor_iter_fixed(strided_tensor_iter_fixed&&) = default;
strided_tensor_iter_fixed(
Tensor& tensor,
C10_UNUSED bool sort_strides = false)
[[maybe_unused]] bool sort_strides = false)
: data_(tensor.data_ptr<T>()) {
std::memset(counter_, 0, sizeof(int64_t) * N);
if (tensor.dim() > 0) {

View File

@ -63,38 +63,38 @@ TORCH_API void record_kernel_function_dtype(std::string name);
} \
} while (0)
#define AT_PRIVATE_CASE_TYPE_USING_HINT(enum_type, HINT, ...) \
case enum_type: { \
AT_PRIVATE_CHECK_SELECTIVE_BUILD(enum_type); \
using HINT C10_UNUSED = c10::impl::ScalarTypeToCPPTypeT<enum_type>; \
return __VA_ARGS__(); \
#define AT_PRIVATE_CASE_TYPE_USING_HINT(enum_type, HINT, ...) \
case enum_type: { \
AT_PRIVATE_CHECK_SELECTIVE_BUILD(enum_type); \
using HINT [[maybe_unused]] = c10::impl::ScalarTypeToCPPTypeT<enum_type>; \
return __VA_ARGS__(); \
}
#define AT_DISPATCH_CASE(enum_type, ...) \
AT_PRIVATE_CASE_TYPE_USING_HINT(enum_type, scalar_t, __VA_ARGS__)
#define AT_DISPATCH_CASE_QINT(enum_type, scalar_type, ...) \
case enum_type: { \
AT_PRIVATE_CHECK_SELECTIVE_BUILD(enum_type); \
using scalar_t = scalar_type; \
using underlying_t C10_UNUSED = typename scalar_t::underlying; \
C10_UNUSED const auto& SCALAR_TYPE = enum_type; \
C10_UNUSED const auto& UNDERLYING_TYPE = toUnderlying(enum_type); \
return __VA_ARGS__(); \
#define AT_DISPATCH_CASE_QINT(enum_type, scalar_type, ...) \
case enum_type: { \
AT_PRIVATE_CHECK_SELECTIVE_BUILD(enum_type); \
using scalar_t = scalar_type; \
using underlying_t [[maybe_unused]] = typename scalar_t::underlying; \
[[maybe_unused]] const auto& SCALAR_TYPE = enum_type; \
[[maybe_unused]] const auto& UNDERLYING_TYPE = toUnderlying(enum_type); \
return __VA_ARGS__(); \
}
#define AT_QINT_SUB_BYTE_PRIVATE_CASE_TYPE( \
enum_type, scalar_type, bitwidth, qmin, qmax, ...) \
case enum_type: { \
AT_PRIVATE_CHECK_SELECTIVE_BUILD(enum_type); \
using scalar_t = scalar_type; \
using underlying_t C10_UNUSED = typename scalar_t::underlying; \
C10_UNUSED const auto& SCALAR_TYPE = enum_type; \
C10_UNUSED const auto& UNDERLYING_TYPE = toUnderlying(enum_type); \
C10_UNUSED int bit_width = bitwidth; \
C10_UNUSED int64_t quant_min = qmin; \
C10_UNUSED int64_t quant_max = qmax; \
return __VA_ARGS__(); \
#define AT_QINT_SUB_BYTE_PRIVATE_CASE_TYPE( \
enum_type, scalar_type, bitwidth, qmin, qmax, ...) \
case enum_type: { \
AT_PRIVATE_CHECK_SELECTIVE_BUILD(enum_type); \
using scalar_t = scalar_type; \
using underlying_t [[maybe_unused]] = typename scalar_t::underlying; \
[[maybe_unused]] const auto& SCALAR_TYPE = enum_type; \
[[maybe_unused]] const auto& UNDERLYING_TYPE = toUnderlying(enum_type); \
[[maybe_unused]] int bit_width = bitwidth; \
[[maybe_unused]] int64_t quant_min = qmin; \
[[maybe_unused]] int64_t quant_max = qmax; \
return __VA_ARGS__(); \
}
namespace detail {

View File

@ -638,7 +638,7 @@ void replace_(const ITensorListRef functional_tensor, ITensorListRef other) {
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(functional_tensor.size() == other.size());
auto functional_tensor_it = functional_tensor.begin();
auto other_it = other.begin();
for (C10_UNUSED const auto i : c10::irange(functional_tensor.size())) {
for ([[maybe_unused]] const auto i : c10::irange(functional_tensor.size())) {
replace_(*functional_tensor_it++, *other_it++);
}
}
@ -655,7 +655,7 @@ void propagate_xla_data(const ITensorListRef functional_tensor, ITensorListRef o
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(functional_tensor.size() == other.size());
auto functional_tensor_it = functional_tensor.begin();
auto other_it = other.begin();
for (C10_UNUSED const auto i : c10::irange(functional_tensor.size())) {
for ([[maybe_unused]] const auto i : c10::irange(functional_tensor.size())) {
propagate_xla_data(*functional_tensor_it++, *other_it++);
}
}
@ -670,7 +670,7 @@ void propagate_xla_data_direct(const ITensorListRef tensor,
ITensorListRef other) {
auto tensor_it = tensor.begin();
auto other_it = other.begin();
for (C10_UNUSED const auto i : c10::irange(tensor.size())) {
for ([[maybe_unused]] const auto i : c10::irange(tensor.size())) {
propagate_xla_data_direct(*tensor_it++, *other_it++);
}
}

View File

@ -205,7 +205,7 @@ struct CodeTemplate {
// or trailing newlines. It's the responsibility of the calling function
// to indent correctly in the context.
void emitIndent(std::ostream& out, size_t indent) const {
for (C10_UNUSED const auto i : c10::irange(indent)) {
for ([[maybe_unused]] const auto i : c10::irange(indent)) {
out << " ";
}
}

View File

@ -153,7 +153,7 @@ static std::tuple<double, int> __printFormat(std::ostream& stream, const Tensor&
static void __printIndent(std::ostream &stream, int64_t indent)
{
for (C10_UNUSED const auto i : c10::irange(indent)) {
for ([[maybe_unused]] const auto i : c10::irange(indent)) {
stream << " ";
}
}

View File

@ -390,7 +390,8 @@ struct TORCH_API ClassType : public NamedType {
std::string doc_string = "",
std::vector<std::string> unresolved_class_attributes = {});
std::string annotation_str_impl(C10_UNUSED const TypePrinter& printer = nullptr) const override {
std::string annotation_str_impl(
[[maybe_unused]] const TypePrinter& printer = nullptr) const override {
const auto& n = name().value();
return n.qualifiedName();
}

View File

@ -376,8 +376,8 @@ DynamicTypePtr ivalue::TupleTypeFactory<c10::DynamicType>::fallback(
return nullptr;
}
TORCH_API TupleTypePtr
ivalue::TupleTypeFactory<TupleType>::fallback(C10_UNUSED const Type& type) {
TORCH_API TupleTypePtr ivalue::TupleTypeFactory<TupleType>::fallback(
[[maybe_unused]] const Type& type) {
#ifdef C10_MOBILE
return nullptr;
#else
@ -398,5 +398,4 @@ ivalue::TupleTypeFactory<TupleType>::fallback(C10_UNUSED const Type& type) {
#endif
}
} // namespace c10

View File

@ -88,7 +88,7 @@ struct TORCH_API EnumType : public NamedType {
cu_(std::move(cu)) {}
std::string annotation_str_impl(
C10_UNUSED const TypePrinter& printer = nullptr) const override {
[[maybe_unused]] const TypePrinter& printer = nullptr) const override {
const auto& n = name().value();
return n.qualifiedName();
}

View File

@ -56,7 +56,7 @@ struct TORCH_API Function {
virtual c10::intrusive_ptr<c10::ivalue::Future> runAsync(
Stack& /*stack*/,
// NOLINTNEXTLINE(performance-unnecessary-value-param)
C10_UNUSED TaskLauncher taskLauncher = at::launch) {
[[maybe_unused]] TaskLauncher taskLauncher = at::launch) {
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(false);
return {};
}

View File

@ -1278,7 +1278,8 @@ struct TORCH_API NumberType : public Type {
protected:
NumberType(TypeKind kind = TypeKind::NumberType) : Type(kind) {}
std::string annotation_str_impl(C10_UNUSED const TypePrinter& printer = nullptr) const override {
std::string annotation_str_impl(
[[maybe_unused]] const TypePrinter& printer = nullptr) const override {
return "number"; // technically not a valid python type, but
// we need to use it when parsing back in annotations
// for implicit conversions
@ -1305,7 +1306,8 @@ struct TORCH_API FloatType : public NumberType {
private:
FloatType() : NumberType(TypeKind::FloatType) {}
std::string annotation_str_impl(C10_UNUSED const TypePrinter& printer = nullptr) const override {
std::string annotation_str_impl(
[[maybe_unused]] const TypePrinter& printer = nullptr) const override {
return "float";
}
};
@ -1330,7 +1332,8 @@ struct TORCH_API ComplexType : public NumberType {
private:
ComplexType() : NumberType(TypeKind::ComplexType) {}
std::string annotation_str_impl(C10_UNUSED const TypePrinter& printer = nullptr) const override {
std::string annotation_str_impl(
[[maybe_unused]] const TypePrinter& printer = nullptr) const override {
return "complex";
}
};
@ -1419,7 +1422,8 @@ struct TORCH_API IntType : public NumberType {
private:
IntType() : NumberType(TypeKind::IntType) {}
std::string annotation_str_impl(C10_UNUSED const TypePrinter& printer = nullptr) const override {
std::string annotation_str_impl(
[[maybe_unused]] const TypePrinter& printer = nullptr) const override {
return "int";
}
};
@ -1453,7 +1457,8 @@ struct TORCH_API StringType : public Type {
// we only use "str" (not "string") in both FunctionSchema and script
return annotation_str();
}
std::string annotation_str_impl(C10_UNUSED const TypePrinter& printer = nullptr) const override {
std::string annotation_str_impl(
[[maybe_unused]] const TypePrinter& printer = nullptr) const override {
return "str";
}
static const TypeKind Kind = TypeKind::StringType;
@ -1473,7 +1478,8 @@ struct TORCH_API StorageType : public Type {
std::string str() const override {
return annotation_str();
}
std::string annotation_str_impl(C10_UNUSED const TypePrinter& printer = nullptr) const override {
std::string annotation_str_impl(
[[maybe_unused]] const TypePrinter& printer = nullptr) const override {
return "Storage";
}
static const TypeKind Kind = TypeKind::StorageType;
@ -1508,7 +1514,8 @@ struct TORCH_API FunctionType : public NamedType {
private:
FunctionType(torch::jit::Function* function);
std::string annotation_str_impl(C10_UNUSED const TypePrinter& printer = nullptr) const override {
std::string annotation_str_impl(
[[maybe_unused]] const TypePrinter& printer = nullptr) const override {
const auto& n = name().value();
return n.qualifiedName();
}
@ -2199,7 +2206,8 @@ struct TORCH_API InterfaceType : public NamedType {
const InterfaceType& rhs,
std::ostream* why_not);
std::string annotation_str_impl(C10_UNUSED const TypePrinter& printer = nullptr) const override {
std::string annotation_str_impl(
[[maybe_unused]] const TypePrinter& printer = nullptr) const override {
return name()->qualifiedName();
}

View File

@ -1121,7 +1121,7 @@ inline void convert(const src_T *src, dst_T *dst, int64_t n) {
#ifndef _MSC_VER
# pragma unroll
#endif
for (C10_UNUSED const auto i : c10::irange(n)) {
for ([[maybe_unused]] const auto i : c10::irange(n)) {
*dst = c10::convert<dst_T>(c10::load(src));
src++;
dst++;

View File

@ -157,18 +157,19 @@ constexpr const char* _cusolver_backend_suggestion = \
// See NOTE [ USE OF NVRTC AND DRIVER API ].
#if !defined(USE_ROCM)
#define AT_CUDA_DRIVER_CHECK(EXPR) \
do { \
CUresult __err = EXPR; \
if (__err != CUDA_SUCCESS) { \
const char* err_str; \
C10_UNUSED CUresult get_error_str_err = at::globalContext().getNVRTC().cuGetErrorString(__err, &err_str); \
if (get_error_str_err != CUDA_SUCCESS) { \
AT_ERROR("CUDA driver error: unknown error"); \
} else { \
AT_ERROR("CUDA driver error: ", err_str); \
} \
} \
#define AT_CUDA_DRIVER_CHECK(EXPR) \
do { \
CUresult __err = EXPR; \
if (__err != CUDA_SUCCESS) { \
const char* err_str; \
[[maybe_unused]] CUresult get_error_str_err = \
at::globalContext().getNVRTC().cuGetErrorString(__err, &err_str); \
if (get_error_str_err != CUDA_SUCCESS) { \
AT_ERROR("CUDA driver error: unknown error"); \
} else { \
AT_ERROR("CUDA driver error: ", err_str); \
} \
} \
} while (0)
#else

View File

@ -69,8 +69,12 @@ struct TORCH_API CUDAHooksInterface : AcceleratorHooksInterface {
TORCH_CHECK(false, "Cannot initialize CUDA without ATen_cuda library. ", CUDA_HELP);
}
virtual const Generator& getDefaultCUDAGenerator(C10_UNUSED DeviceIndex device_index = -1) const {
TORCH_CHECK(false, "Cannot get default CUDA generator without ATen_cuda library. ", CUDA_HELP);
virtual const Generator& getDefaultCUDAGenerator(
[[maybe_unused]] DeviceIndex device_index = -1) const {
TORCH_CHECK(
false,
"Cannot get default CUDA generator without ATen_cuda library. ",
CUDA_HELP);
}
Device getDeviceFromPtr(void* /*data*/) const override {

View File

@ -32,12 +32,15 @@ struct TORCH_API XPUHooksInterface : AcceleratorHooksInterface{
TORCH_CHECK(false, "Cannot get XPU global device index without ATen_xpu library.");
}
virtual Generator getXPUGenerator(C10_UNUSED DeviceIndex device_index = -1) const {
virtual Generator getXPUGenerator(
[[maybe_unused]] DeviceIndex device_index = -1) const {
TORCH_CHECK(false, "Cannot get XPU generator without ATen_xpu library.");
}
virtual const Generator& getDefaultXPUGenerator(C10_UNUSED DeviceIndex device_index = -1) const {
TORCH_CHECK(false, "Cannot get default XPU generator without ATen_xpu library.");
virtual const Generator& getDefaultXPUGenerator(
[[maybe_unused]] DeviceIndex device_index = -1) const {
TORCH_CHECK(
false, "Cannot get default XPU generator without ATen_xpu library.");
}
virtual DeviceIndex getNumGPUs() const {

View File

@ -135,7 +135,7 @@ static Tensor make_feature_noise(const Tensor& input) {
sizes.reserve(input.dim());
sizes.push_back(input_sizes[0]);
sizes.push_back(input_sizes[1]);
for (C10_UNUSED const auto i : c10::irange(2, input.dim())) {
for ([[maybe_unused]] const auto i : c10::irange(2, input.dim())) {
sizes.push_back(1);
}
// NB: THIS WAS CHANGED FROM THE ORIGINAL

View File

@ -1109,7 +1109,7 @@ void unpack_pivots_cpu_kernel(TensorIterator& iter, const int64_t dim_size, cons
auto* perm_ptr = data[0];
const auto* pivots_ptr = data[1];
for (C10_UNUSED const auto elem : c10::irange(nelems)) {
for ([[maybe_unused]] const auto elem : c10::irange(nelems)) {
// WARNING: linalg.lu_factor returns int32 pivots,
// this behavior could change in the future.
const auto perm_data = reinterpret_cast<int64_t*>(perm_ptr);

View File

@ -133,30 +133,50 @@ float bf16_dot_with_fp32_arith(
#endif
template <typename scalar_t>
bool scal_use_fast_path(C10_UNUSED int64_t n, C10_UNUSED int64_t incx) {
bool scal_use_fast_path(
[[maybe_unused]] int64_t n,
[[maybe_unused]] int64_t incx) {
return false;
}
template <typename scalar_t>
bool gemv_use_fast_path(C10_UNUSED char trans, C10_UNUSED int64_t m,
C10_UNUSED int64_t n, C10_UNUSED scalar_t alpha,
C10_UNUSED int64_t lda,
C10_UNUSED int64_t incx, C10_UNUSED scalar_t beta,
C10_UNUSED int64_t incy) {
bool gemv_use_fast_path(
[[maybe_unused]] char trans,
[[maybe_unused]] int64_t m,
[[maybe_unused]] int64_t n,
[[maybe_unused]] scalar_t alpha,
[[maybe_unused]] int64_t lda,
[[maybe_unused]] int64_t incx,
[[maybe_unused]] scalar_t beta,
[[maybe_unused]] int64_t incy) {
return false;
}
template <typename scalar_t>
void scal_fast_path(C10_UNUSED int *n, C10_UNUSED scalar_t *a, C10_UNUSED scalar_t *x, C10_UNUSED int *incx) {
TORCH_INTERNAL_ASSERT(false, "scal_fast_path shouldn't be called for this configuration");
void scal_fast_path(
[[maybe_unused]] int* n,
[[maybe_unused]] scalar_t* a,
[[maybe_unused]] scalar_t* x,
[[maybe_unused]] int* incx) {
TORCH_INTERNAL_ASSERT(
false, "scal_fast_path shouldn't be called for this configuration");
}
template <typename scalar_t>
void gemv_fast_path(C10_UNUSED const char *trans, C10_UNUSED const int *m, C10_UNUSED const int *n,
C10_UNUSED const scalar_t *alpha, C10_UNUSED const scalar_t *a, C10_UNUSED const int *lda,
C10_UNUSED const scalar_t *x, C10_UNUSED const int *incx, C10_UNUSED const scalar_t *beta,
C10_UNUSED scalar_t *y, C10_UNUSED const int *incy) {
TORCH_INTERNAL_ASSERT(false, "gemv_fast_path shouldn't be called for this configuration");
void gemv_fast_path(
[[maybe_unused]] const char* trans,
[[maybe_unused]] const int* m,
[[maybe_unused]] const int* n,
[[maybe_unused]] const scalar_t* alpha,
[[maybe_unused]] const scalar_t* a,
[[maybe_unused]] const int* lda,
[[maybe_unused]] const scalar_t* x,
[[maybe_unused]] const int* incx,
[[maybe_unused]] const scalar_t* beta,
[[maybe_unused]] scalar_t* y,
[[maybe_unused]] const int* incy) {
TORCH_INTERNAL_ASSERT(
false, "gemv_fast_path shouldn't be called for this configuration");
}
#define INSTANTIATE(scalar_t) \
@ -188,15 +208,32 @@ void scal_fast_path<float>(int *n, float *a, float *x, int *incx) {
}
template <>
bool gemv_use_fast_path<float>(C10_UNUSED char trans, int64_t m, int64_t n, C10_UNUSED float alpha, int64_t lda, int64_t incx, C10_UNUSED float beta, int64_t incy) {
bool gemv_use_fast_path<float>(
[[maybe_unused]] char trans,
int64_t m,
int64_t n,
[[maybe_unused]] float alpha,
int64_t lda,
int64_t incx,
[[maybe_unused]] float beta,
int64_t incy) {
auto intmax = std::numeric_limits<int>::max();
return (m <= intmax) && (n <= intmax) && (lda <= intmax) &&
(incx > 0) && (incx <= intmax) && (incy > 0) && (incy <= intmax);
}
template <>
bool gemv_use_fast_path<double>(C10_UNUSED char trans, int64_t m, int64_t n, C10_UNUSED double alpha, int64_t lda, int64_t incx, C10_UNUSED double beta, int64_t incy) {
return gemv_use_fast_path<float>(trans, m, n, (float)alpha, lda, incx, (float)beta, incy);
bool gemv_use_fast_path<double>(
[[maybe_unused]] char trans,
int64_t m,
int64_t n,
[[maybe_unused]] double alpha,
int64_t lda,
int64_t incx,
[[maybe_unused]] double beta,
int64_t incy) {
return gemv_use_fast_path<float>(
trans, m, n, (float)alpha, lda, incx, (float)beta, incy);
}
template <>
@ -220,38 +257,40 @@ INSTANTIATE(int);
INSTANTIATE(int64_t);
#if defined(__aarch64__) && !defined(C10_MOBILE)
template <>
bool scal_use_fast_path<at::Half>(C10_UNUSED int64_t n, C10_UNUSED int64_t incx) {
bool scal_use_fast_path<at::Half>(
[[maybe_unused]] int64_t n,
[[maybe_unused]] int64_t incx) {
return false;
}
template <>
bool gemv_use_fast_path<at::Half>(
C10_UNUSED char trans,
C10_UNUSED int64_t m,
C10_UNUSED int64_t n,
[[maybe_unused]] char trans,
[[maybe_unused]] int64_t m,
[[maybe_unused]] int64_t n,
at::Half alpha,
C10_UNUSED int64_t lda,
C10_UNUSED int64_t incx,
[[maybe_unused]] int64_t lda,
[[maybe_unused]] int64_t incx,
at::Half beta,
C10_UNUSED int64_t incy) {
[[maybe_unused]] int64_t incy) {
return incx == 1 && c10::detail::fp16_from_bits(alpha.x) == 1.0f &&
c10::detail::fp16_from_bits(beta.x) == 0.0f;
c10::detail::fp16_from_bits(beta.x) == 0.0f;
}
template <>
bool gemv_use_fast_path<at::BFloat16>(
C10_UNUSED char trans,
C10_UNUSED int64_t m,
C10_UNUSED int64_t n,
[[maybe_unused]] char trans,
[[maybe_unused]] int64_t m,
[[maybe_unused]] int64_t n,
at::BFloat16 alpha,
C10_UNUSED int64_t lda,
C10_UNUSED int64_t incx,
[[maybe_unused]] int64_t lda,
[[maybe_unused]] int64_t incx,
at::BFloat16 beta,
C10_UNUSED int64_t incy) {
return (trans == 'T' || trans == 't') && incx == 1 && alpha == 1.0 && beta == 0.0;
[[maybe_unused]] int64_t incy) {
return (trans == 'T' || trans == 't') && incx == 1 && alpha == 1.0 &&
beta == 0.0;
}
#ifdef __ARM_FEATURE_FP16_SCALAR_ARITHMETIC
static inline float16_t reduce(float16x4_t x) {
auto sum = vpadd_f16(x, x);

View File

@ -34,7 +34,7 @@ Tensor make_feature_noise(const Tensor& input) {
sizes.reserve(input.dim());
sizes.push_back(input_sizes[0]);
sizes.push_back(input_sizes[1]);
for (C10_UNUSED const auto i : c10::irange(2, input.dim())) {
for ([[maybe_unused]] const auto i : c10::irange(2, input.dim())) {
sizes.push_back(1);
}
return input.new_empty_symint(sizes);

View File

@ -13,9 +13,11 @@ static void invalid_mask(const Tensor & self, int64_t idx, const Tensor & mask,
" does not match the shape of the indexed tensor ", self.sizes(), " at index ", idx);
}
C10_UNUSED static std::vector<Tensor> expandTensors(const Tensor & self, IOptTensorListRef indices) {
// If indices come in as ByteTensor or BoolTensor (masks), expand them into the equivalent indexing by LongTensors
[[maybe_unused]] static std::vector<Tensor> expandTensors(
const Tensor& self,
IOptTensorListRef indices) {
// If indices come in as ByteTensor or BoolTensor (masks), expand them into
// the equivalent indexing by LongTensors
std::vector<Tensor> result;
for (const auto& index_opt : indices) {
if (!index_opt.has_value()) {
@ -48,7 +50,9 @@ C10_UNUSED static std::vector<Tensor> expandTensors(const Tensor & self, IOptTen
return result;
}
C10_UNUSED static void checkIndexTensorTypes(IOptTensorListRef indices, bool allow_int=false) {
[[maybe_unused]] static void checkIndexTensorTypes(
IOptTensorListRef indices,
bool allow_int = false) {
for (const auto& tensor : indices) {
if (tensor.has_value() && tensor->defined()) {
auto scalarType = tensor->scalar_type();
@ -83,7 +87,7 @@ inline torch::List<std::optional<Tensor>> toListOfOptionalTensors(ArrayRef<IValu
return result;
}
C10_UNUSED static bool hasContiguousSubspace(TensorList tl) {
[[maybe_unused]] static bool hasContiguousSubspace(TensorList tl) {
// true if all the non-null tensors are adjacent
auto isDefined = [](const Tensor & tensor){ return tensor.defined(); };
auto isNull = [](const Tensor & tensor){ return !tensor.defined(); };
@ -93,15 +97,15 @@ C10_UNUSED static bool hasContiguousSubspace(TensorList tl) {
return it == stop.base();
}
// Transposes the tensor and indices together so that all the non-null indices
// index the first k dimensions of the tensor. Returns the transposed tensor
// and the reordered indices. For example:
// transposeToFront(tensor, {nullptr, a, nullptr, b})
// returns
// tensor.permute([1, 3, 0, 2]), {a, b, nullptr, nullptr}
C10_UNUSED static std::tuple<Tensor, std::vector<Tensor>>
transposeToFront(const Tensor& self, TensorList indices) {
[[maybe_unused]] static std::tuple<Tensor, std::vector<Tensor>> transposeToFront(
const Tensor& self,
TensorList indices) {
std::vector<int64_t> dims;
std::vector<Tensor> transposedIndices;
dims.reserve(self.dim());

View File

@ -241,8 +241,9 @@ void batch_iterator_with_broadcasting(const Tensor& a, const Tensor& b, const fu
auto* b_batch_idx_ptr = data[0];
auto* a_batch_idx_ptr = data[1];
for (C10_UNUSED const auto elem : c10::irange(nelems)) {
auto b_curr_linear_batch_idx = *reinterpret_cast<int64_t*>(b_batch_idx_ptr);
for ([[maybe_unused]] const auto elem : c10::irange(nelems)) {
auto b_curr_linear_batch_idx =
*reinterpret_cast<int64_t*>(b_batch_idx_ptr);
auto a_curr_linear_batch_idx = *reinterpret_cast<int64_t*>(a_batch_idx_ptr);
check_if_copy_needed_for_a(a_curr_linear_batch_idx);

View File

@ -76,7 +76,7 @@ static void multilabel_margin_loss_forward_out_frame(
accscalar_t sum = 0;
for (C10_UNUSED const auto t : c10::irange(nframe)) {
for ([[maybe_unused]] const auto t : c10::irange(nframe)) {
sum += multilabel_margin_loss_forward_inner_sum_cpu(
input_data, target_data, is_target_data, dim);
@ -180,7 +180,7 @@ static void multilabel_margin_loss_backward_out_frame(
reduction == Reduction::Mean ? 1. / (nframe * dim) : 1. / dim);
scalar_t* grad_input_row_data = grad_input.mutable_data_ptr<scalar_t>();
for (C10_UNUSED const auto t : c10::irange(nframe)) {
for ([[maybe_unused]] const auto t : c10::irange(nframe)) {
for (const auto dt : c10::irange(dim)) {
int64_t target_idx = target_data[dt];
if (target_idx < 0) {

View File

@ -1204,22 +1204,30 @@ scalar_t calc_igamma(scalar_t a, scalar_t x) {
}
template <>
C10_UNUSED inline c10::BFloat16 calc_igamma<c10::BFloat16>(c10::BFloat16 a, c10::BFloat16 x) {
[[maybe_unused]] inline c10::BFloat16 calc_igamma<c10::BFloat16>(
c10::BFloat16 a,
c10::BFloat16 x) {
return calc_igamma<float>(float(a), float(x));
}
template <>
C10_UNUSED inline c10::Half calc_igamma<c10::Half>(c10::Half a, c10::Half x) {
[[maybe_unused]] inline c10::Half calc_igamma<c10::Half>(
c10::Half a,
c10::Half x) {
return calc_igamma<float>(float(a), float(x));
}
template <>
C10_UNUSED inline c10::BFloat16 calc_igammac<c10::BFloat16>(c10::BFloat16 a, c10::BFloat16 x) {
[[maybe_unused]] inline c10::BFloat16 calc_igammac<c10::BFloat16>(
c10::BFloat16 a,
c10::BFloat16 x) {
return calc_igammac<float>(float(a), float(x));
}
template <>
C10_UNUSED inline c10::Half calc_igammac<c10::Half>(c10::Half a, c10::Half x) {
[[maybe_unused]] inline c10::Half calc_igammac<c10::Half>(
c10::Half a,
c10::Half x) {
return calc_igammac<float>(float(a), float(x));
}
@ -1231,7 +1239,7 @@ inline T abs_impl(T v) {
}
template <>
C10_UNUSED inline uint8_t abs_impl(uint8_t v) {
[[maybe_unused]] inline uint8_t abs_impl(uint8_t v) {
return v;
}

View File

@ -188,7 +188,7 @@ std::tuple<Tensor, Tensor> _pad_packed_sequence(const Tensor& data, const Tensor
}
int64_t dec = prev_batch_size - batch_size;
if (dec > 0) {
for (C10_UNUSED const auto j : c10::irange(dec)) {
for ([[maybe_unused]] const auto j : c10::irange(dec)) {
(*lengths--) = i;
}
}

View File

@ -1889,7 +1889,8 @@ static DEFINE_QUANTIZED_RNN_CELL_DYNAMIC(quantized_rnn_tanh_cell_dynamic, simple
namespace {
C10_UNUSED static auto ensure_linear_params_registered = register_linear_params();
[[maybe_unused]] static auto ensure_linear_params_registered =
register_linear_params();
static auto cell_params_base_registry =
torch::selective_class_<CellParamsBase>("rnn", TORCH_SELECTIVE_CLASS("CellParamsBase"))

View File

@ -931,7 +931,7 @@ static inline Tensor diff_helper(const Tensor& self, int64_t n, int64_t dim) {
bool is_kBool = (self.dtype() == at::kBool);
n = n > self.sym_size(dim) ? self.sym_size(dim).guard_int(__FILE__, __LINE__) : n;
for (C10_UNUSED const auto i : c10::irange(n)) {
for ([[maybe_unused]] const auto i : c10::irange(n)) {
if (is_kBool) {
result = at::logical_xor(
at::narrow_symint(result, dim, 1, out_len),
@ -2255,7 +2255,7 @@ bool cpu_equal(const Tensor& self, const Tensor& other) {
return;
}
char* self_data = data[0];
for (C10_UNUSED const auto i : c10::irange(dim_size)) {
for ([[maybe_unused]] const auto i : c10::irange(dim_size)) {
if (isnan_(c10::load<scalar_t>(self_data))) {
result = false;
return;
@ -2282,7 +2282,7 @@ bool cpu_equal(const Tensor& self, const Tensor& other) {
}
char* self_data = data[0];
char* other_data = data[1];
for (C10_UNUSED const auto i : c10::irange(dim_size)) {
for ([[maybe_unused]] const auto i : c10::irange(dim_size)) {
if (c10::load<scalar_t>(self_data) != c10::load<scalar_t>(other_data)) {
result = false;
return;

View File

@ -207,9 +207,13 @@ inline TensorIterator make_reduction(
return TensorIterator::reduce_op(viewed_result, self.to(in_dtype));
}
C10_UNUSED inline TensorIterator make_reduction(
const char* name, Tensor& result, const Tensor& self,
at::OptionalIntArrayRef dim, bool keepdim, ScalarType out_dtype) {
[[maybe_unused]] inline TensorIterator make_reduction(
const char* name,
Tensor& result,
const Tensor& self,
at::OptionalIntArrayRef dim,
bool keepdim,
ScalarType out_dtype) {
// special case for type promotion in mixed precision, improves computational
// efficiency.
// not generalize this to common mismatched input/output types to avoid cross
@ -259,9 +263,14 @@ inline TensorIterator make_reduction(
return TensorIterator::reduce_op(viewed_result1, viewed_result2, self.to(dtype1));
}
C10_UNUSED inline TensorIterator make_reduction(
const char* name, Tensor& result1, Tensor& result2, const Tensor& self,
at::OptionalIntArrayRef dim, bool keepdim, ScalarType dtype) {
[[maybe_unused]] inline TensorIterator make_reduction(
const char* name,
Tensor& result1,
Tensor& result2,
const Tensor& self,
at::OptionalIntArrayRef dim,
bool keepdim,
ScalarType dtype) {
return make_reduction(name, result1, result2, self, dim, keepdim, dtype, dtype);
}
@ -313,9 +322,13 @@ inline std::vector<int64_t> get_zero_numel_tensor_size(
// This function should be called when you are reducing a zero-numel tensor and want to
// resize the output and return it. This function exists for resizing zero-numel
// tensors when the size of the reduction dimension is non-zero.
C10_UNUSED inline void zero_numel_tensor_resize(Tensor& result, Tensor& result_indices,
const Tensor& self, const int64_t dim,
const bool keepdim, const char *fn_name) {
[[maybe_unused]] inline void zero_numel_tensor_resize(
Tensor& result,
Tensor& result_indices,
const Tensor& self,
const int64_t dim,
const bool keepdim,
const char* fn_name) {
auto sizes = get_zero_numel_tensor_size(self, dim, keepdim, fn_name);
at::native::resize_output(result, sizes);
at::native::resize_output(result_indices, sizes);
@ -349,11 +362,11 @@ inline ScalarType get_dtype_from_result(Tensor& result, std::optional<ScalarType
namespace at::meta {
C10_UNUSED inline DimVector get_reduction_shape(
[[maybe_unused]] inline DimVector get_reduction_shape(
const Tensor& self,
IntArrayRef dims,
bool keepdim,
bool allow_empty_dims=false) {
bool allow_empty_dims = false) {
auto mask = native::make_dim_mask(dims, self.dim(), allow_empty_dims);
return native::shape_from_dim_mask(self, mask, keepdim);
}
@ -434,7 +447,7 @@ inline TensorIterator make_reduction(
return TensorIterator::reduce_op(viewed_result1, viewed_result2, self.to(dtype1));
}
C10_UNUSED inline TensorIterator make_reduction_from_out_ty(
[[maybe_unused]] inline TensorIterator make_reduction_from_out_ty(
const Tensor& self,
const Tensor& result,
OptionalIntArrayRef opt_dims,

View File

@ -2409,7 +2409,7 @@ Tensor& nonzero_out_cpu(const Tensor& self, Tensor& result) {
for (const auto i : c10::irange(n2)) {
const char* ptr = data[0] + i * strides[1];
for (C10_UNUSED const auto j : c10::irange(n1)) {
for ([[maybe_unused]] const auto j : c10::irange(n1)) {
const auto& val = c10::load<scalar_t>(ptr);
// If nonzero, write index
if (val != scalar_t(0)) {

View File

@ -50,7 +50,8 @@ const Tensor& value){
}
}
}
for (C10_UNUSED const auto i : c10::irange(num_ind, self.ndimension())) {
for ([[maybe_unused]] const auto i :
c10::irange(num_ind, self.ndimension())) {
mask = mask.unsqueeze(-1);
}
return std::make_tuple(true, mask);

View File

@ -1945,7 +1945,7 @@ Tensor index_select_sparse_cpu(const Tensor& self, int64_t dim, const Tensor& in
at::parallel_for(0, index_len, at::internal::GRAIN_SIZE, [&](int64_t start, int64_t end) {
const auto* src = ptr_index + start;
auto* dst = ptr_nneg_index + start;
for (C10_UNUSED const auto _ : c10::irange(start, end)) {
for ([[maybe_unused]] const auto _ : c10::irange(start, end)) {
auto idx = *src++;
if (idx < -size || idx >= size) {
// Mark self and dim as used if code is compiled with STRIP_ERROR_MESSAGES
@ -2051,36 +2051,42 @@ Tensor index_select_sparse_cpu(const Tensor& self, int64_t dim, const Tensor& in
const auto* ptr_sorted_start = ptr_sorted;
const auto* ptr_sorted_end = ptr_sorted + sorted_len;
at::parallel_for(0, n_threads_src, 1, [&](int64_t tid, C10_UNUSED int64_t _) {
const auto start = tid * chunk_size_src;
const auto end = std::min(start + chunk_size_src, src_len);
auto* ptr_tid_src_int_idx = src_int_idx.select(0, tid).data_ptr<int64_t>();
auto* ptr_tid_sorted_int_idx = sorted_int_idx.select(0, tid).data_ptr<int64_t>();
auto* ptr_tid_int_counts = int_counts.select(0, tid).data_ptr<int64_t>();
const auto* ptr_src = src.const_data_ptr<int64_t>() + start;
at::parallel_for(
0, n_threads_src, 1, [&](int64_t tid, [[maybe_unused]] int64_t _) {
const auto start = tid * chunk_size_src;
const auto end = std::min(start + chunk_size_src, src_len);
auto* ptr_tid_src_int_idx =
src_int_idx.select(0, tid).data_ptr<int64_t>();
auto* ptr_tid_sorted_int_idx =
sorted_int_idx.select(0, tid).data_ptr<int64_t>();
auto* ptr_tid_int_counts =
int_counts.select(0, tid).data_ptr<int64_t>();
const auto* ptr_src = src.const_data_ptr<int64_t>() + start;
for (const auto i : c10::irange(start, end)) {
const auto src_val = *ptr_src++;
const auto src_val_lb = std::lower_bound(ptr_sorted_start, ptr_sorted_end, src_val);
// We cannot just use *src_val_lb != src_val because when
// src_val_lb == ptr_sorted_end, dereferencing past-the-end value
// is not well-defined.
if (src_val_lb == ptr_sorted_end || *src_val_lb != src_val) {
++ptr_tid_src_int_idx;
++ptr_tid_sorted_int_idx;
++ptr_tid_int_counts;
continue;
for (const auto i : c10::irange(start, end)) {
const auto src_val = *ptr_src++;
const auto src_val_lb =
std::lower_bound(ptr_sorted_start, ptr_sorted_end, src_val);
// We cannot just use *src_val_lb != src_val because when
// src_val_lb == ptr_sorted_end, dereferencing past-the-end
// value is not well-defined.
if (src_val_lb == ptr_sorted_end || *src_val_lb != src_val) {
++ptr_tid_src_int_idx;
++ptr_tid_sorted_int_idx;
++ptr_tid_int_counts;
continue;
}
const auto src_val_ub =
std::upper_bound(ptr_sorted_start, ptr_sorted_end, src_val);
const int64_t count = src_val_ub - src_val_lb;
const int64_t j = src_val_lb - ptr_sorted_start;
*ptr_tid_src_int_idx++ = i;
*ptr_tid_sorted_int_idx++ = j;
*ptr_tid_int_counts++ = count;
}
const auto src_val_ub = std::upper_bound(ptr_sorted_start, ptr_sorted_end, src_val);
const int64_t count = src_val_ub - src_val_lb;
const int64_t j = src_val_lb - ptr_sorted_start;
*ptr_tid_src_int_idx++ = i;
*ptr_tid_sorted_int_idx++ = j;
*ptr_tid_int_counts++ = count;
}
});
});
}
const auto compressed_int_counts = int_counts.sum(-1);
@ -2111,29 +2117,35 @@ Tensor index_select_sparse_cpu(const Tensor& self, int64_t dim, const Tensor& in
const auto thread_offsets = compressed_int_counts.cumsum(0).sub_(compressed_int_counts);
const auto* ptr_sorted_idx = sorted_idx.const_data_ptr<int64_t>();
at::parallel_for(0, n_threads_src, 1, [&](int64_t tid, C10_UNUSED int64_t _) {
const auto start = tid * chunk_size_src;
const auto end = std::min(start + chunk_size_src, src_len);
const auto tid_offset = thread_offsets.const_data_ptr<int64_t>()[tid];
const auto* ptr_tid_src_int_idx = src_int_idx.select(0, tid).const_data_ptr<int64_t>();
const auto* ptr_tid_sorted_int_idx = sorted_int_idx.select(0, tid).const_data_ptr<int64_t>();
const auto* ptr_tid_int_counts = int_counts.select(0, tid).const_data_ptr<int64_t>();
auto* ptr_tid_selected_sorted = ptr_selected_sorted + tid_offset;
auto* ptr_tid_selected_src = ptr_selected_src + tid_offset;
at::parallel_for(
0, n_threads_src, 1, [&](int64_t tid, [[maybe_unused]] int64_t _) {
const auto start = tid * chunk_size_src;
const auto end = std::min(start + chunk_size_src, src_len);
const auto tid_offset =
thread_offsets.const_data_ptr<int64_t>()[tid];
const auto* ptr_tid_src_int_idx =
src_int_idx.select(0, tid).const_data_ptr<int64_t>();
const auto* ptr_tid_sorted_int_idx =
sorted_int_idx.select(0, tid).const_data_ptr<int64_t>();
const auto* ptr_tid_int_counts =
int_counts.select(0, tid).const_data_ptr<int64_t>();
auto* ptr_tid_selected_sorted = ptr_selected_sorted + tid_offset;
auto* ptr_tid_selected_src = ptr_selected_src + tid_offset;
for (C10_UNUSED const auto _ : c10::irange(start, end)) {
const auto count = *ptr_tid_int_counts++;
const auto i = *ptr_tid_src_int_idx++;
const auto j = *ptr_tid_sorted_int_idx++;
if (!count) continue;
for ([[maybe_unused]] const auto _ : c10::irange(start, end)) {
const auto count = *ptr_tid_int_counts++;
const auto i = *ptr_tid_src_int_idx++;
const auto j = *ptr_tid_sorted_int_idx++;
if (!count)
continue;
std::fill_n(ptr_tid_selected_src, count, i);
std::copy_n(ptr_sorted_idx + j, count, ptr_tid_selected_sorted);
std::fill_n(ptr_tid_selected_src, count, i);
std::copy_n(ptr_sorted_idx + j, count, ptr_tid_selected_sorted);
ptr_tid_selected_sorted += count;
ptr_tid_selected_src += count;
}
});
ptr_tid_selected_sorted += count;
ptr_tid_selected_src += count;
}
});
}
return search_in_dim_indices
@ -2192,7 +2204,7 @@ Tensor index_select_sparse_cpu(const Tensor& self, int64_t dim, const Tensor& in
else {
auto* ptr_counts = counts.data_ptr<int64_t>();
const auto* ptr_vals = t.const_data_ptr<int64_t>();
for (C10_UNUSED const auto _ : c10::irange(t.numel())) {
for ([[maybe_unused]] const auto _ : c10::irange(t.numel())) {
++ptr_counts[*ptr_vals++];
}
}
@ -2212,14 +2224,19 @@ Tensor index_select_sparse_cpu(const Tensor& self, int64_t dim, const Tensor& in
const auto run_in_parallel = (n_threads == 1);
auto counts_per_thread = at::zeros({n_threads, size}, idx.options());
at::parallel_for(0, n_threads, 1, [&](int64_t tid, C10_UNUSED int64_t _) {
const auto start = tid * chunk_size;
const auto end = std::min(start + chunk_size, idx_len);
const auto tid_idx = idx.slice(0, start, end);
auto tid_counts = counts_per_thread.select(0, tid);
get_counts(tid_counts, tid_idx, /*bins=*/size,
/*is_sorted=*/is_sorted, /*run_in_parallel=*/run_in_parallel);
});
at::parallel_for(
0, n_threads, 1, [&](int64_t tid, [[maybe_unused]] int64_t _) {
const auto start = tid * chunk_size;
const auto end = std::min(start + chunk_size, idx_len);
const auto tid_idx = idx.slice(0, start, end);
auto tid_counts = counts_per_thread.select(0, tid);
get_counts(
tid_counts,
tid_idx,
/*bins=*/size,
/*is_sorted=*/is_sorted,
/*run_in_parallel=*/run_in_parallel);
});
return counts_per_thread;
};
@ -2310,32 +2327,38 @@ Tensor index_select_sparse_cpu(const Tensor& self, int64_t dim, const Tensor& in
1, std::min<int64_t>((src_len + grain_size - 1) / grain_size, at::get_num_threads())
);
const auto chunk_size = (src_len + n_threads_src - 1) / n_threads_src;
at::parallel_for(0, n_threads_src, 1, [&](int64_t tid, C10_UNUSED int64_t _) {
const auto start = tid * chunk_size;
const auto end = std::min(start + chunk_size, src_len);
auto* ptr_src_tid = ptr_src + start;
const auto* ptr_src_counts_per_thread
= src_counts_per_thread.select(0, tid).const_data_ptr<int64_t>();
const auto* ptr_src_offset_counts_per_thread
= src_offset_counts_per_thread.select(0, tid).const_data_ptr<int64_t>();
auto tid_counts = at::zeros({size}, src.options());
auto* ptr_tid_counts = tid_counts.data_ptr<int64_t>();
at::parallel_for(
0, n_threads_src, 1, [&](int64_t tid, [[maybe_unused]] int64_t _) {
const auto start = tid * chunk_size;
const auto end = std::min(start + chunk_size, src_len);
auto* ptr_src_tid = ptr_src + start;
const auto* ptr_src_counts_per_thread =
src_counts_per_thread.select(0, tid)
.const_data_ptr<int64_t>();
const auto* ptr_src_offset_counts_per_thread =
src_offset_counts_per_thread.select(0, tid)
.const_data_ptr<int64_t>();
auto tid_counts = at::zeros({size}, src.options());
auto* ptr_tid_counts = tid_counts.data_ptr<int64_t>();
for (const auto i : c10::irange(start, end)) {
const auto idx_val = *ptr_src_tid++;
// skip idx value if not in the intersection
if (!ptr_intersection_counts[idx_val]) continue;
const auto idx_val_offset
= ptr_src_intersection_offsets[idx_val]
- ptr_src_intersection_counts[idx_val];
const auto idx_val_tid_offset
= ptr_src_offset_counts_per_thread[idx_val]
- ptr_src_counts_per_thread[idx_val];
auto& idx_val_local_tid_count = ptr_tid_counts[idx_val];
ptr_src_idx[idx_val_offset + idx_val_tid_offset + idx_val_local_tid_count] = i;
++idx_val_local_tid_count;
}
});
for (const auto i : c10::irange(start, end)) {
const auto idx_val = *ptr_src_tid++;
// skip idx value if not in the intersection
if (!ptr_intersection_counts[idx_val])
continue;
const auto idx_val_offset =
ptr_src_intersection_offsets[idx_val] -
ptr_src_intersection_counts[idx_val];
const auto idx_val_tid_offset =
ptr_src_offset_counts_per_thread[idx_val] -
ptr_src_counts_per_thread[idx_val];
auto& idx_val_local_tid_count = ptr_tid_counts[idx_val];
ptr_src_idx
[idx_val_offset + idx_val_tid_offset +
idx_val_local_tid_count] = i;
++idx_val_local_tid_count;
}
});
const auto src_idx_offsets = src_intersection_offsets.sub_(src_intersection_counts);
@ -2369,26 +2392,28 @@ Tensor index_select_sparse_cpu(const Tensor& self, int64_t dim, const Tensor& in
1, std::min<int64_t>((idx_len + grain_size - 1) / grain_size, at::get_num_threads())
);
const auto chunk_size = (idx_len + n_threads_idx - 1) / n_threads_idx;
at::parallel_for(0, n_threads_idx, 1, [&](int64_t tid, C10_UNUSED int64_t _) {
const auto start = tid * chunk_size;
const auto end = std::min(start + chunk_size, idx_len);
const auto tid_offset = ptr_thread_offset[tid];
const auto* ptr_idx_tid = ptr_idx + start;
auto* ptr_idx_selected_tid = ptr_idx_selected + tid_offset;
auto* ptr_src_selected_tid = ptr_src_selected + tid_offset;
at::parallel_for(
0, n_threads_idx, 1, [&](int64_t tid, [[maybe_unused]] int64_t _) {
const auto start = tid * chunk_size;
const auto end = std::min(start + chunk_size, idx_len);
const auto tid_offset = ptr_thread_offset[tid];
const auto* ptr_idx_tid = ptr_idx + start;
auto* ptr_idx_selected_tid = ptr_idx_selected + tid_offset;
auto* ptr_src_selected_tid = ptr_src_selected + tid_offset;
for (const auto i : c10::irange(start, end)) {
const auto idx_val = *ptr_idx_tid++;
// skip if idx_val is not in the intersection
if (!ptr_intersection_counts[idx_val]) continue;
const auto count = ptr_src_counts[idx_val];
const auto j = ptr_src_idx_offsets[idx_val];
std::fill_n(ptr_idx_selected_tid, count, i);
std::copy_n(ptr_src_idx + j, count, ptr_src_selected_tid);
ptr_idx_selected_tid += count;
ptr_src_selected_tid += count;
}
});
for (const auto i : c10::irange(start, end)) {
const auto idx_val = *ptr_idx_tid++;
// skip if idx_val is not in the intersection
if (!ptr_intersection_counts[idx_val])
continue;
const auto count = ptr_src_counts[idx_val];
const auto j = ptr_src_idx_offsets[idx_val];
std::fill_n(ptr_idx_selected_tid, count, i);
std::copy_n(ptr_src_idx + j, count, ptr_src_selected_tid);
ptr_idx_selected_tid += count;
ptr_src_selected_tid += count;
}
});
return std::make_tuple(idx_selected, src_selected);
}();

View File

@ -29,13 +29,12 @@ namespace {
// grad_in does not mean that it is a gradient wrt to input,
// grad_in/grad_out is just an input/output of unfold_backward kernel.
C10_UNUSED static TensorIterator _make_unfold_backward_iter_over_grad_out(
Tensor& grad_out,
const Tensor& grad_in,
int64_t dim,
int64_t size,
int64_t step
) {
[[maybe_unused]] static TensorIterator _make_unfold_backward_iter_over_grad_out(
Tensor& grad_out,
const Tensor& grad_in,
int64_t dim,
int64_t size,
int64_t step) {
dim = maybe_wrap_dim(dim, grad_out.dim());
// last dim stores the folds
@ -106,7 +105,6 @@ C10_UNUSED static TensorIterator _make_unfold_backward_iter_over_grad_out(
return iter;
}
}
} // namespace at::native

View File

@ -103,7 +103,9 @@ DECLARE_DISPATCH(upsampling_bicubic2d, upsample_bicubic2d_kernel);
DECLARE_DISPATCH(_upsampling_bicubic2d_aa, _upsample_bicubic2d_aa_kernel);
DECLARE_DISPATCH(_upsampling_bicubic2d_aa, _upsample_bicubic2d_aa_backward_kernel);
C10_UNUSED inline std::array<int64_t, 3> upsample_1d_common_check(IntArrayRef input_size, IntArrayRef output_size) {
[[maybe_unused]] inline std::array<int64_t, 3> upsample_1d_common_check(
IntArrayRef input_size,
IntArrayRef output_size) {
TORCH_CHECK(
output_size.size() == 1,
"It is expected output_size equals to 1, but got size ",
@ -131,7 +133,9 @@ C10_UNUSED inline std::array<int64_t, 3> upsample_1d_common_check(IntArrayRef in
return {nbatch, channels, output_width};
}
C10_UNUSED inline std::array<int64_t, 4> upsample_2d_common_check(IntArrayRef input_size, IntArrayRef output_size) {
[[maybe_unused]] inline std::array<int64_t, 4> upsample_2d_common_check(
IntArrayRef input_size,
IntArrayRef output_size) {
TORCH_CHECK(
output_size.size() == 2,
"It is expected output_size equals to 2, but got size ",
@ -167,8 +171,9 @@ C10_UNUSED inline std::array<int64_t, 4> upsample_2d_common_check(IntArrayRef in
return {nbatch, channels, output_height, output_width};
}
C10_UNUSED inline
std::array<int64_t, 5> upsample_3d_common_check(IntArrayRef input_size, IntArrayRef output_size) {
[[maybe_unused]] inline std::array<int64_t, 5> upsample_3d_common_check(
IntArrayRef input_size,
IntArrayRef output_size) {
TORCH_CHECK(
output_size.size() == 3,
"It is expected output_size equals to 3, but got size ",

View File

@ -40,7 +40,6 @@ int register_linear_params() {
}
namespace {
C10_UNUSED static auto linear_params = register_linear_params();
} // namespace
[[maybe_unused]] static auto linear_params = register_linear_params();
} // namespace
}} // namespace ao::sparse

View File

@ -82,7 +82,7 @@ static void reduced_float_copy_kernel(TensorIteratorBase &iter, bool requires_ne
std::copy_n(base, 2, data.data());
const int64_t *outer_strides = &strides[2];
for (C10_UNUSED const auto it : c10::irange(size1)) {
for ([[maybe_unused]] const auto it : c10::irange(size1)) {
Vecd dst_s;
if (strides_in[0] == 0) {
dst_s = Vecd(dest_t(*((scalar_t*)data[1])));
@ -151,7 +151,7 @@ static void reduced_float_copy_kernel(TensorIteratorBase &iter, bool requires_ne
std::copy_n(base, 2, data.data());
const int64_t *outer_strides = &strides[2];
for (C10_UNUSED const auto it : c10::irange(size1)) {
for ([[maybe_unused]] const auto it : c10::irange(size1)) {
Vecd dst_s;
if (strides_in[0] == 0) {
dst_s = Vecd(dest_t(*((source_t*)data[1])));

View File

@ -395,7 +395,7 @@ struct Dist {
const scalar_t * t1_end = t1 + l1_size;
const scalar_t * t2_end = t2 + l2_size;
for (C10_UNUSED const auto l : c10::irange(d)) {
for ([[maybe_unused]] const auto l : c10::irange(d)) {
for (; t1 != t1_end; t1 += m, res += m) {
const Vec vec_t1 = Vec::loadu(t1, count);
Vec res_vec = Vec::loadu(res, count);

View File

@ -473,7 +473,7 @@ void cpu_flash_attention(
scalar_t* transpose_buffer_ptr = transpose_buffer.get();
std::unique_ptr<scalar_t[]> v_copy_buffer = std::make_unique<scalar_t[]>(ekvSplitSize * packb_size);
scalar_t* v_copy_buffer_ptr = v_copy_buffer.get();
for (C10_UNUSED auto z : c10::irange(begin, end)) {
for ([[maybe_unused]] auto z : c10::irange(begin, end)) {
n = l * kvSplitSize;
int64_t kvBlockSize = std::min(kvSplitSize, kvSize - n);
int64_t ekvBlockSize = kvBlockSize % 2 == 0 ? kvBlockSize : kvBlockSize + 1;
@ -566,7 +566,7 @@ void cpu_flash_attention(
? query_padding_ptr + ompIdx * qSplitSize * eheadSize
: nullptr;
for (C10_UNUSED auto z : c10::irange(begin, end)) {
for ([[maybe_unused]] auto z : c10::irange(begin, end)) {
int64_t m = k * qSplitSize;
int64_t qBlockSize = std::min(qSplitSize, qSize - m);
// Initialize max and sum
@ -931,7 +931,7 @@ void cpu_flash_attention_backward(
at::Tensor dsum = at::empty({qSplitSize}, query.options().dtype(accumulate_dtype));
accum_t* dsum_data = dsum.data_ptr<accum_t>();
for (C10_UNUSED auto z : c10::irange(begin, end)) {
for ([[maybe_unused]] auto z : c10::irange(begin, end)) {
// rowsum of grad_out * out
for (int64_t m = 0; m < qSize; m += qSplitSize) {
int64_t qBlockSize = std::min(qSplitSize, qSize - m);

View File

@ -30,7 +30,7 @@ void _compute_linear_combination_cpu_kernel(
auto* RESTRICT in_ptr = data[1];
auto* RESTRICT coeff_ptr = data[2];
for (C10_UNUSED const auto elem : c10::irange(n)) {
for ([[maybe_unused]] const auto elem : c10::irange(n)) {
auto* RESTRICT out_data = reinterpret_cast<scalar_t*>(out_ptr);
auto* RESTRICT in_data = reinterpret_cast<scalar_t*>(in_ptr);
using primitive_t = typename scalar_value_type<scalar_t>::type;

View File

@ -78,7 +78,7 @@ void cpu_take_put_kernel(
auto loop = [&](char** data, const int64_t* strides, int64_t n) {
auto* iterated_data_bytes = data[0];
auto* index_data_bytes = data[1];
for (C10_UNUSED const auto elem : c10::irange(n)) {
for ([[maybe_unused]] const auto elem : c10::irange(n)) {
auto idx = *reinterpret_cast<int64_t*>(index_data_bytes);
auto& iterated = *reinterpret_cast<scalar_t*>(iterated_data_bytes);
@ -203,7 +203,7 @@ void index_fill_kernel(
auto handle_nonzero_idx_stride = [&](char** data, const int64_t* strides, int64_t n) {
auto* self_data_bytes = data[0];
auto* index_data_bytes = data[1];
for (C10_UNUSED const auto elem : c10::irange(n)) {
for ([[maybe_unused]] const auto elem : c10::irange(n)) {
auto* self_data = reinterpret_cast<scalar_t*>(self_data_bytes);
auto idx = *reinterpret_cast<int64_t*>(index_data_bytes);
TORCH_CHECK_INDEX(idx >= -self_dim_size && idx < self_dim_size,
@ -229,7 +229,7 @@ void index_fill_kernel(
if (idx < 0) {
idx += self_dim_size;
}
for (C10_UNUSED const auto elem: c10::irange(n)) {
for ([[maybe_unused]] const auto elem : c10::irange(n)) {
auto* self_data = reinterpret_cast<scalar_t*>(self_data_bytes);
self_data[idx * self_dim_stride] = fill_val;
@ -262,7 +262,7 @@ void index_copy_kernel(
auto* self_data_bytes = data[0];
auto* index_data_bytes = data[1];
auto* source_data_bytes = data[2];
for (C10_UNUSED const auto elem : c10::irange(n)) {
for ([[maybe_unused]] const auto elem : c10::irange(n)) {
auto* self_data = reinterpret_cast<scalar_t*>(self_data_bytes);
auto idx = *reinterpret_cast<int64_t*>(index_data_bytes);
auto* source_data = reinterpret_cast<scalar_t*>(source_data_bytes);
@ -285,7 +285,7 @@ void index_copy_kernel(
TORCH_CHECK_INDEX(idx >= 0 && idx < self_dim_size,
"index_copy_(): index ", idx, " is out of bounds for dimension ",
dim, " with size ", self_dim_size);
for (C10_UNUSED const auto elem : c10::irange(n)) {
for ([[maybe_unused]] const auto elem : c10::irange(n)) {
auto* self_data = reinterpret_cast<scalar_t*>(self_data_bytes);
auto* source_data = reinterpret_cast<scalar_t*>(source_data_bytes);
@ -474,8 +474,7 @@ void cpu_hflip_vec(at::TensorIterator& iter) {
constexpr auto stride = sizeof(scalar_t);
TORCH_INTERNAL_ASSERT(stride == -strides[0] && stride == strides[1]);
for (C10_UNUSED const auto j : c10::irange(size1)) {
for ([[maybe_unused]] const auto j : c10::irange(size1)) {
// vectorized loop with negative stride for output
char** C10_RESTRICT data_ = data_arr.data();
int64_t n = size0;
@ -543,8 +542,7 @@ void cpu_vflip_memcpy(at::TensorIterator& iter) {
TORCH_INTERNAL_ASSERT(strides[0] == strides[1]);
const int64_t stride = strides[0];
for (C10_UNUSED const auto j : c10::irange(size1)) {
for ([[maybe_unused]] const auto j : c10::irange(size1)) {
char** C10_RESTRICT data_ = data_arr.data();
int64_t n = size0;

View File

@ -271,7 +271,7 @@ struct VectorizedLoop2d {
const int64_t *outer_strides = &strides[ntensors];
if (is_contiguous<traits>(strides)) {
for (C10_UNUSED const auto i : c10::irange(size1)) {
for ([[maybe_unused]] const auto i : c10::irange(size1)) {
vectorized_loop(data.data(), size0, 0, op, vop);
advance(data, outer_strides);
}
@ -279,12 +279,12 @@ struct VectorizedLoop2d {
using Indices = std::make_index_sequence<traits::arity>;
unroll_contiguous_scalar_checks<traits>(strides, Indices{}, [&](size_t idx) {
if (idx) {
for (C10_UNUSED const auto i : c10::irange(size1)) {
for ([[maybe_unused]] const auto i : c10::irange(size1)) {
vectorized_loop(data.data(), size0, idx, op, vop);
advance(data, outer_strides);
}
} else {
for (C10_UNUSED const auto i : c10::irange(size1)) {
for ([[maybe_unused]] const auto i : c10::irange(size1)) {
basic_loop(data.data(), strides, 0, size0, op);
advance(data, outer_strides);
}

View File

@ -70,7 +70,7 @@ inline void vectorized_reduction(char** data, int64_t n, int64_t stride,
template <typename F>
inline void UNARY_OUTER_LOOP(char* data[2], const int64_t strides[2], int64_t n, F f) {
for (C10_UNUSED const auto j : c10::irange(n)) {
for ([[maybe_unused]] const auto j : c10::irange(n)) {
f();
data[0] += strides[0];
data[1] += strides[1];

View File

@ -62,11 +62,12 @@ static inline void cpu_cum_base_kernel(const Tensor& result,
auto* result_data_bytes = data[0];
const auto* self_data_bytes = data[1];
for (C10_UNUSED const auto i : c10::irange(n)) {
f(
(scalar_t*)result_data_bytes, result_dim_stride,
(scalar_t*)self_data_bytes, self_dim_stride, init_val
);
for ([[maybe_unused]] const auto i : c10::irange(n)) {
f((scalar_t*)result_data_bytes,
result_dim_stride,
(scalar_t*)self_data_bytes,
self_dim_stride,
init_val);
result_data_bytes += strides[0];
self_data_bytes += strides[1];
}

View File

@ -215,7 +215,7 @@ struct cpu_scatter_gather_base_kernel {
// vs dim-TensorIterator loop order depending on
// whether dim is the last dimension
if (dim== buffer.dim() - 1) {
for (C10_UNUSED const auto nelem : c10::irange(n)) {
for ([[maybe_unused]] const auto nelem : c10::irange(n)) {
// dim loop is a separate code block
// for better performance
loop_func.template operator()<scalar_t, func_t>(
@ -232,7 +232,7 @@ struct cpu_scatter_gather_base_kernel {
for (const auto i : c10::irange(index_dim_size)) {
auto* self_data = self_data_bytes;
auto* index_data = (char*)((int64_t*)index_data_bytes + i * index_dim_stride);
for (C10_UNUSED const auto nelem : c10::irange(n)) {
for ([[maybe_unused]] const auto nelem : c10::irange(n)) {
int64_t idx_dim = *(int64_t*)index_data;
// we are not putting idx_dim in the error message because it disables
// loop optimization in clang-7
@ -306,7 +306,7 @@ struct cpu_scatter_gather_base_kernel {
// vs dim-TensorIterator loop order depending on
// whether dim is the last dimension
if (dim== buffer.dim() - 1) {
for (C10_UNUSED const auto nelem : c10::irange(n)) {
for ([[maybe_unused]] const auto nelem : c10::irange(n)) {
// dim loop is a separate code block
// for better performance
loop_func.template operator()<scalar_t, func_t>(
@ -327,7 +327,7 @@ struct cpu_scatter_gather_base_kernel {
auto* self_data = self_data_bytes;
auto* index_data = (char*)((int64_t*)index_data_bytes + i * index_dim_stride);
auto* src_data = src_data_bytes;
for (C10_UNUSED const auto nelem : c10::irange(n)) {
for ([[maybe_unused]] const auto nelem : c10::irange(n)) {
int64_t idx_dim = *(int64_t*)index_data;
// we are not putting idx_dim in the error message because it disables
// loop optimization in clang-7
@ -402,7 +402,7 @@ struct cpu_scatter_gather_base_kernel {
// vs dim-TensorIterator loop order depending on
// whether dim is the last dimension
if (dim== buffer.dim() - 1) {
for (C10_UNUSED const auto nelem : c10::irange(n)) {
for ([[maybe_unused]] const auto nelem : c10::irange(n)) {
// dim loop is a separate code block
// for better performance
loop_func.template operator()<scalar_t, ReduceMean>(
@ -423,7 +423,7 @@ struct cpu_scatter_gather_base_kernel {
auto* self_data = self_data_bytes;
auto* index_data = (char*)((int64_t*)index_data_bytes + i * index_dim_stride);
auto* src_data = src_data_bytes;
for (C10_UNUSED const auto nelem : c10::irange(n)) {
for ([[maybe_unused]] const auto nelem : c10::irange(n)) {
int64_t idx_dim = *(int64_t*)index_data;
// we are not putting idx_dim in the error message because it disables
// loop optimization in clang-7
@ -497,7 +497,7 @@ struct cpu_scatter_gather_base_kernel {
// vs dim-TensorIterator loop order depending on
// whether dim is the last dimension
if (dim== buffer.dim() - 1) {
for (C10_UNUSED const auto nelem : c10::irange(n)) {
for ([[maybe_unused]] const auto nelem : c10::irange(n)) {
// dim loop is a separate code block
// for better performance
loop_func.template operator()<scalar_t, ReduceMaximum>(
@ -518,7 +518,7 @@ struct cpu_scatter_gather_base_kernel {
auto* self_data = self_data_bytes;
auto* index_data = (char*)((int64_t*)index_data_bytes + i * index_dim_stride);
auto* src_data = src_data_bytes;
for (C10_UNUSED const auto nelem : c10::irange(n)) {
for ([[maybe_unused]] const auto nelem : c10::irange(n)) {
int64_t idx_dim = *(int64_t*)index_data;
// we are not putting idx_dim in the error message because it disables
// loop optimization in clang-7
@ -593,7 +593,7 @@ struct cpu_scatter_gather_base_kernel {
// vs dim-TensorIterator loop order depending on
// whether dim is the last dimension
if (dim== buffer.dim() - 1) {
for (C10_UNUSED const auto nelem : c10::irange(n)) {
for ([[maybe_unused]] const auto nelem : c10::irange(n)) {
// dim loop is a separate code block
// for better performance
loop_func.template operator()<scalar_t, ReduceMinimum>(
@ -614,7 +614,7 @@ struct cpu_scatter_gather_base_kernel {
auto* self_data = self_data_bytes;
auto* index_data = (char*)((int64_t*)index_data_bytes + i * index_dim_stride);
auto* src_data = src_data_bytes;
for (C10_UNUSED const auto nelem : c10::irange(n)) {
for ([[maybe_unused]] const auto nelem : c10::irange(n)) {
int64_t idx_dim = *(int64_t*)index_data;
// we are not putting idx_dim in the error message because it disables
// loop optimization in clang-7

View File

@ -53,14 +53,12 @@ void _dim_apply(
return;
}
for (C10_UNUSED const auto i : c10::irange(n)) {
f(
reinterpret_cast<scalar_t*>(values_data_bytes),
for ([[maybe_unused]] const auto i : c10::irange(n)) {
f(reinterpret_cast<scalar_t*>(values_data_bytes),
values_dim_stride,
reinterpret_cast<int64_t*>(indices_data_bytes),
indices_dim_stride,
dim_size
);
dim_size);
values_data_bytes += strides[0];
indices_data_bytes += strides[1];

View File

@ -83,7 +83,7 @@ static inline void compare_base_kernel(const Tensor& result1, const Tensor& resu
auto* result1_data_bytes = data[0];
auto* result2_data_bytes = data[1];
const auto* self_data_bytes = data[2];
for (C10_UNUSED const auto i : c10::irange(n)) {
for ([[maybe_unused]] const auto i : c10::irange(n)) {
f((scalar_t*)result1_data_bytes,
(scalar_t_2*)result2_data_bytes,
(scalar_t*)self_data_bytes,
@ -253,7 +253,7 @@ static void mode_kernel_impl(
std::vector<std::pair<scalar_t, int64_t>> elements(self_dim_size);
for (C10_UNUSED const auto k : c10::irange(n)) {
for ([[maybe_unused]] const auto k : c10::irange(n)) {
scalar_t* values_data = (scalar_t*)values_data_bytes;
int64_t* indices_data = (int64_t*)indices_data_bytes;
const scalar_t* self_data = (scalar_t*)self_data_bytes;

View File

@ -353,8 +353,9 @@ static void unfolded2d_copy_channels_last(
int64_t x = 0;
data_index_init(start, y, output_height, x, output_width);
for (const auto k C10_UNUSED: c10::irange(start, end)) {
scalar_t* dst = finput_data + y * output_width * kH * kW * n_input_plane + x * kH * kW * n_input_plane;
for (const auto k [[maybe_unused]] : c10::irange(start, end)) {
scalar_t* dst = finput_data + y * output_width * kH * kW * n_input_plane +
x * kH * kW * n_input_plane;
const scalar_t* src = input_data;
if (padW > 0 || padH > 0) {

View File

@ -76,7 +76,7 @@ void _unfold_backward_internal_kernel(
auto* RESTRICT grad_in_ptr = data[1];
auto* RESTRICT idx_dim_ptr = data[2];
for (C10_UNUSED const auto elem : c10::irange(nelems)) {
for ([[maybe_unused]] const auto elem : c10::irange(nelems)) {
auto* RESTRICT grad_out_data = reinterpret_cast<scalar_t*>(grad_out_ptr);
auto* RESTRICT grad_in_data = reinterpret_cast<scalar_t*>(grad_in_ptr);

View File

@ -733,8 +733,9 @@ struct HelperInterpBase {
auto new_shape = std::vector<int64_t>(ndims, 1);
new_shape[reshape_dim] = output_size;
for (C10_UNUSED const auto j : c10::irange(interp_size)) {
output.emplace_back(empty(new_shape, CPU(c10::CppTypeToScalarType<int64_t>())));
for ([[maybe_unused]] const auto j : c10::irange(interp_size)) {
output.emplace_back(
empty(new_shape, CPU(c10::CppTypeToScalarType<int64_t>())));
output.emplace_back(empty(new_shape, CPU(output_type)));
}
}
@ -1047,8 +1048,9 @@ struct HelperInterpNearest : public HelperInterpBase {
auto new_shape = std::vector<int64_t>(ndims, 1);
new_shape[reshape_dim] = output_size;
for (C10_UNUSED const auto j : c10::irange(interp_size)) {
output.emplace_back(empty(new_shape, CPU(c10::CppTypeToScalarType<int64_t>())));
for ([[maybe_unused]] const auto j : c10::irange(interp_size)) {
output.emplace_back(
empty(new_shape, CPU(c10::CppTypeToScalarType<int64_t>())));
// Defines weights for consistency, but not used
output.emplace_back(at::ones(new_shape, CPU(output_type)));
}

View File

@ -102,7 +102,7 @@ void pack_rgb(
TORCH_INTERNAL_ASSERT(unpacked_increment == 3 || unpacked_increment == 4);
for (C10_UNUSED const auto i : c10::irange(num_pixels)) {
for ([[maybe_unused]] const auto i : c10::irange(num_pixels)) {
for (const auto j : c10::irange(num_channels)) {
packed[j * packed_stride] = unpacked[j];
}

View File

@ -723,7 +723,7 @@ void int4pack_mm_kernel_(
int mb{0}, nb{0};
data_index_init(begin, mb, MB, nb, NB);
for (C10_UNUSED const auto i : c10::irange(begin, end)) {
for ([[maybe_unused]] const auto i : c10::irange(begin, end)) {
int mb_start = mb * BLOCK_M;
int mb_size = std::min(BLOCK_M, M - mb_start);
int nb_start = nb * BLOCK_N;

View File

@ -177,7 +177,7 @@ struct KthValueLauncher {
cuda::detail::TensorInfo<scalar_t, index_t> values_info,
int collapse_values_dim,
cuda::detail::TensorInfo<int64_t, index_t> indices_info,
C10_UNUSED int collapse_indices_dim,
[[maybe_unused]] int collapse_indices_dim,
cuda::detail::TensorInfo<const scalar_t, index_t> self_info,
int collapse_self_dim,
int64_t num_slices,
@ -212,9 +212,9 @@ struct MedianLauncher {
template <typename scalar_t, typename index_t, int all_dims>
inline void launch(
cuda::detail::TensorInfo<scalar_t, index_t> values_info,
C10_UNUSED int collapse_values_dim,
[[maybe_unused]] int collapse_values_dim,
cuda::detail::TensorInfo<int64_t, index_t> indices_info,
C10_UNUSED int collapse_indices_dim,
[[maybe_unused]] int collapse_indices_dim,
cuda::detail::TensorInfo<const scalar_t, index_t> self_info,
int collapse_self_dim,
int64_t num_slices,

View File

@ -1374,7 +1374,7 @@ std::tuple<Tensor, Tensor, Tensor> layer_norm_cuda(
for (const auto idx: c10::irange(axis)) {
stat_shape.push_back(input_shape[idx]);
}
for (C10_UNUSED const auto idx: c10::irange(axis, input.dim())) {
for ([[maybe_unused]] const auto idx : c10::irange(axis, input.dim())) {
stat_shape.push_back(1);
}

View File

@ -74,7 +74,7 @@ cudnn_frontend::Tensor getTensorDescriptorWithTypeVirtual(
// Ubuntu-22+ if `libnvrtc.so` is not found on the system, which strictly
// speaking is not necessary for usecases below See
// https://github.com/pytorch/pytorch/issues/97041
C10_UNUSED static auto cudnn_cnn_infer_handler = [] {
[[maybe_unused]] static auto cudnn_cnn_infer_handler = [] {
void* handle = dlopen("libcudnn_cnn_infer.so.8", RTLD_LAZY);
char* err = dlerror();
if (!handle) {

View File

@ -51,7 +51,7 @@ static void layer_norm_with_mean_rstd_out(
for (const auto idx : c10::irange(axis)) {
stat_shape.emplace_back(input_shape[idx]);
}
for (C10_UNUSED const auto idx : c10::irange(axis, input.dim())) {
for ([[maybe_unused]] const auto idx : c10::irange(axis, input.dim())) {
stat_shape.emplace_back(1);
}
@ -256,7 +256,7 @@ std::tuple<Tensor, Tensor, Tensor> math_native_layer_norm(
for (const auto idx : c10::irange(axis)) {
stat_shape.push_back(input_shape[idx]);
}
for (C10_UNUSED const auto idx : c10::irange(axis, input.dim())) {
for ([[maybe_unused]] const auto idx : c10::irange(axis, input.dim())) {
stat_shape.push_back(1);
}
mean = mean.view(stat_shape);

View File

@ -163,7 +163,7 @@ static void linalg_lu_factor_out_mps_impl(const Tensor& A, bool pivot, Tensor& L
status_tensors.reserve(batchSize);
pivots_list.reserve(batchSize);
for (C10_UNUSED const auto i : c10::irange(batchSize)) {
for ([[maybe_unused]] const auto i : c10::irange(batchSize)) {
status_tensors.push_back(at::zeros(1, kInt, std::nullopt, kMPS, std::nullopt));
pivots_list.push_back(at::zeros(numPivots, kInt, std::nullopt, kMPS, std::nullopt));
}

View File

@ -922,7 +922,7 @@ std::tuple<Tensor, Tensor, Tensor> layer_norm_mps(const Tensor& input,
for (const auto idx : c10::irange(axis)) {
stat_shape.push_back(input_shape[idx]);
}
for (C10_UNUSED auto idx : c10::irange(axis, input.dim())) {
for ([[maybe_unused]] auto idx : c10::irange(axis, input.dim())) {
stat_shape.push_back(1);
}
mean = mean.view(stat_shape);

View File

@ -706,7 +706,7 @@ static ViewCachedGraph* createViewGraph(const Tensor& self,
// Self is the input tensor we are creating view of
newCachedGraph->inputTensor = mpsGraphRankedPlaceHolder(mpsGraph, inputType, getMPSShape(base_shape));
newCachedGraph->storageOffsetTensor = mpsGraphRankedPlaceHolder(mpsGraph, MPSDataTypeInt32, @[ @1 ]);
for (C10_UNUSED const auto i : c10::irange(size.size())) {
for ([[maybe_unused]] const auto i : c10::irange(size.size())) {
newCachedGraph->strideTensors.push_back(mpsGraphRankedPlaceHolder(mpsGraph, MPSDataTypeInt32, @[ @1 ]));
}
if (needsScatter) {

View File

@ -444,7 +444,7 @@ Tensor qnnpack_avg_pool2d(
} // namespace at
namespace {
C10_UNUSED std::vector<float> generate_requantization_scales(
[[maybe_unused]] std::vector<float> generate_requantization_scales(
const at::Tensor& weight_scales,
const float input_scale,
const float output_scale,
@ -468,11 +468,11 @@ C10_UNUSED std::vector<float> generate_requantization_scales(
return requant_scales;
}
C10_UNUSED std::pair<std::vector<uint8_t>, at::Tensor> make_zero_points_and_scales_tensor(
[[maybe_unused]] std::pair<std::vector<uint8_t>, at::Tensor>
make_zero_points_and_scales_tensor(
const at::Tensor& weight_contig,
bool transpose = false,
uint32_t groups = 1
) {
uint32_t groups = 1) {
const int out_ch_idx = transpose ? 1 : 0;
const auto num_output_channels = weight_contig.size(out_ch_idx) * (transpose ? groups : 1);
// Add 8 to account for bufferring needed by QNNPACK.

View File

@ -186,8 +186,9 @@ inline TensorQuantizationParams ChooseQuantizationParams(
// This function helps to convert the Conv1D dimensions usable by the Conv2d op.
constexpr int64_t kConv1dSqueezeDim = 0;
C10_UNUSED static torch::List<int64_t> MakeArgForConv1d(const torch::List<int64_t>& arg,
int64_t base_value) {
[[maybe_unused]] static torch::List<int64_t> MakeArgForConv1d(
const torch::List<int64_t>& arg,
int64_t base_value) {
TORCH_CHECK(!arg.empty(), "Argument must have elements.");
torch::List<int64_t> result({arg.get(0), base_value});
if (arg.size() == 1) {

View File

@ -71,7 +71,7 @@ static void upsample_nearest3d_out_frame(
const auto* pos1 = &i_p[d1 * input_height * input_width + h1 * input_width + w1];
auto* pos2 = &o_p[d2 * output_height * output_width + h2 * output_width + w2];
for (C10_UNUSED const auto c : c10::irange(channels)) {
for ([[maybe_unused]] const auto c : c10::irange(channels)) {
pos2[0] = pos1[0];
pos1 += input_depth * input_height * input_width;
pos2 += output_depth * output_height * output_width;

View File

@ -143,7 +143,7 @@ ConvParamsSerializationTypeV3 parse_conv_serialized_state(c10::IValue v) {
config_vals.push_back(dilation[0].item<int16_t>());
}
// output_padding does not exist in v1, so we fill in a default value
for (C10_UNUSED const auto i : c10::irange(kSpatialDim)) {
for ([[maybe_unused]] const auto i : c10::irange(kSpatialDim)) {
config_vals.push_back(0);
}
config_vals.push_back(groups[0].item<int16_t>());
@ -294,21 +294,24 @@ c10::intrusive_ptr<ConvPackedParamsBase<kSpatialDim>> deserialize_conv(
torch::List<int64_t> stride, padding, output_padding, dilation;
// skip kSpatialDim
int idx = 1;
for (C10_UNUSED const auto i : c10::irange(kSpatialDim)) {
for ([[maybe_unused]] const auto i : c10::irange(kSpatialDim)) {
stride.emplace_back(config_vals.at(idx));
idx++;
}
for (C10_UNUSED const auto i : c10::irange(kSpatialDim)) {
for ([[maybe_unused]] const auto i : c10::irange(kSpatialDim)) {
padding.emplace_back(config_vals.at(idx));
idx++;
}
for (C10_UNUSED const auto i : c10::irange(kSpatialDim)) {
for ([[maybe_unused]] const auto i : c10::irange(kSpatialDim)) {
dilation.emplace_back(config_vals.at(idx));
idx++;
}
for (C10_UNUSED const auto i : c10::irange(kSpatialDim)) {
TORCH_INTERNAL_ASSERT(idx < static_cast<int64_t>(config_vals.size()),
"Unexpected index = ", idx, " for config_vals of size ",
for ([[maybe_unused]] const auto i : c10::irange(kSpatialDim)) {
TORCH_INTERNAL_ASSERT(
idx < static_cast<int64_t>(config_vals.size()),
"Unexpected index = ",
idx,
" for config_vals of size ",
config_vals.size());
output_padding.emplace_back(config_vals.at(idx));
idx++;

View File

@ -554,9 +554,9 @@ int register_embedding_params() {
namespace {
C10_UNUSED static auto conv2d_params = register_conv_params<2>();
C10_UNUSED static auto conv3d_params = register_conv_params<3>();
C10_UNUSED static auto linear_params = register_linear_params();
C10_UNUSED static auto embedding_params = register_embedding_params();
[[maybe_unused]] static auto conv2d_params = register_conv_params<2>();
[[maybe_unused]] static auto conv3d_params = register_conv_params<3>();
[[maybe_unused]] static auto linear_params = register_linear_params();
[[maybe_unused]] static auto embedding_params = register_embedding_params();
} // namespace

View File

@ -2293,7 +2293,7 @@ void qupsample_bilinear2d_nhwc_kernel(
int64_t b{0}, h2{0}, w2{0};
data_index_init(begin, b, nbatch, h2, output_height, w2, output_width);
for (C10_UNUSED const auto i : c10::irange(begin, end)) {
for ([[maybe_unused]] const auto i : c10::irange(begin, end)) {
auto* i_p = reinterpret_cast<typename scalar_t::underlying*>(
idata + b * input_height * input_width * channels);
auto* o_p = reinterpret_cast<typename scalar_t::underlying*>(
@ -3818,8 +3818,8 @@ void quantize_tensor_per_channel_impl<c10::quint8>(
// channels_last contig.
// If axis = 0 and channels_last contig, implementation for channels
// first (NCHW) works.
for (C10_UNUSED const auto b : c10::irange(batches)) {
for (C10_UNUSED const auto e : c10::irange(elements_per_channel)) {
for ([[maybe_unused]] const auto b : c10::irange(batches)) {
for ([[maybe_unused]] const auto e : c10::irange(elements_per_channel)) {
uint32_t c = 0;
while (c + 8 < channels) {
const int32x4_t voffset0123 = vld1q_s32(&zero_points_int32t[c]);
@ -3853,7 +3853,7 @@ void quantize_tensor_per_channel_impl<c10::quint8>(
}
}
} else {
for (C10_UNUSED const auto b : c10::irange(batches)) {
for ([[maybe_unused]] const auto b : c10::irange(batches)) {
for (const auto c : c10::irange(channels)) {
uint32_t e = 0;
const int32x4_t voffset = vdupq_n_s32(zero_points_int32t[c]);
@ -3900,8 +3900,8 @@ void quantize_tensor_per_channel_impl<c10::quint8>(
// channels_last contig.
// If axis = 0 and channels_last contig, implementation for channels
// first (NCHW) works.
for (C10_UNUSED const auto b : c10::irange(batches)) {
for (C10_UNUSED const auto e : c10::irange(elements_per_channel)) {
for ([[maybe_unused]] const auto b : c10::irange(batches)) {
for ([[maybe_unused]] const auto e : c10::irange(elements_per_channel)) {
uint32_t c = 0;
while (c + 8 < channels) {
const int16x8_t vzero_point = vld1q_s16(&zero_points_int16t[c]);
@ -3931,8 +3931,8 @@ void quantize_tensor_per_channel_impl<c10::quint8>(
}
}
} else {
for (C10_UNUSED const auto b : c10::irange(batches)) {
for (C10_UNUSED const auto c : c10::irange(channels)) {
for ([[maybe_unused]] const auto b : c10::irange(batches)) {
for ([[maybe_unused]] const auto c : c10::irange(channels)) {
uint32_t e = 0;
const int16x8_t vzero_point = vdupq_n_s16(zero_points_int16t[c]);
const float32x4_t vinv_scale = vdupq_n_f32(inv_scales[c]);

View File

@ -634,7 +634,7 @@ class QConvPackWeightInt8 final {
int64_t groups) {
torch::List<int64_t> output_padding;
output_padding.reserve(kSpatialDim);
for (C10_UNUSED const auto idx : c10::irange(kSpatialDim)) {
for ([[maybe_unused]] const auto idx : c10::irange(kSpatialDim)) {
output_padding.push_back((int64_t)0);
}
return _run(weight, bias, stride, padding, output_padding, dilation, groups,

View File

@ -139,7 +139,7 @@ class QConvPackWeightInt8Cudnn final {
int64_t groups) {
torch::List<int64_t> output_padding;
output_padding.reserve(kSpatialDim);
for (C10_UNUSED const auto idx : c10::irange(kSpatialDim)) {
for ([[maybe_unused]] const auto idx : c10::irange(kSpatialDim)) {
output_padding.push_back((int64_t)0);
}
return _run(weight, bias, stride, padding, output_padding, dilation, groups,

View File

@ -159,7 +159,7 @@ void _csr_matmult(
}
}
for (C10_UNUSED const auto jj : c10::irange(length)) {
for ([[maybe_unused]] const auto jj : c10::irange(length)) {
// NOTE: the linked list that encodes col indices
// is not guaranteed to be sorted.
Cj[nnz] = head;

View File

@ -11,7 +11,7 @@
// Compiler Macros
// Suppress an unused variable. Copied from C10_UNUSED
// Suppress an unused variable. Copied from [[maybe_unused]]
#if defined(_MSC_VER) && !defined(__clang__)
#define VK_UNUSED __pragma(warning(suppress : 4100 4101))
#else

View File

@ -31,7 +31,7 @@ bool initialize() {
return is_initialized_;
}
C10_UNUSED bool deinitialize() {
[[maybe_unused]] bool deinitialize() {
using namespace internal;
// This implementation allows for retries.

View File

@ -89,7 +89,7 @@ void TestAdd(DeprecatedTypeProperties& type) {
void TestZeros(DeprecatedTypeProperties& type) {
auto begin = std::chrono::high_resolution_clock::now();
Tensor a = zeros({1024, 1024}, type);
for (C10_UNUSED const auto i : c10::irange(1, 1000)) {
for ([[maybe_unused]] const auto i : c10::irange(1, 1000)) {
a = zeros({128, 128}, type);
}
auto end = std::chrono::high_resolution_clock::now();
@ -107,7 +107,7 @@ void TestLoadsOfAdds(DeprecatedTypeProperties& type) {
auto begin = std::chrono::high_resolution_clock::now();
Tensor d = ones({3, 4}, type);
Tensor r = zeros({3, 4}, type);
for (C10_UNUSED const auto i : c10::irange(1000)) {
for ([[maybe_unused]] const auto i : c10::irange(1000)) {
add_out(r, r, d);
}
auto end = std::chrono::high_resolution_clock::now();
@ -124,7 +124,7 @@ void TestLoadOfAddsWithCopy(DeprecatedTypeProperties& type) {
auto begin = std::chrono::high_resolution_clock::now();
Tensor d = ones({3, 4}, type);
Tensor r = zeros({3, 4}, type);
for (C10_UNUSED const auto i : c10::irange(1000)) {
for ([[maybe_unused]] const auto i : c10::irange(1000)) {
r = add(r, d);
}
auto end = std::chrono::high_resolution_clock::now();

View File

@ -161,7 +161,7 @@ TEST(CPUGeneratorImpl, TestPhiloxEngineOffset1) {
// So if you want to skip 8 values, offset would
// be 2, since 2*4=8.
at::Philox4_32 engine2(123, 1, 2);
for (C10_UNUSED const auto i : c10::irange(8)) {
for ([[maybe_unused]] const auto i : c10::irange(8)) {
// Note: instead of using the engine() call 8 times
// we could have achieved the same functionality by
// calling the incr() function twice.
@ -222,14 +222,14 @@ TEST(CPUGeneratorImpl, TestMT19937EngineReproducibility) {
// test with zero seed
at::mt19937 engine1(0);
std::mt19937 engine2(0);
for (C10_UNUSED const auto i : c10::irange(10000)) {
for ([[maybe_unused]] const auto i : c10::irange(10000)) {
ASSERT_EQ(engine1(), engine2());
}
// test with large seed
engine1 = at::mt19937(2147483647);
engine2 = std::mt19937(2147483647);
for (C10_UNUSED const auto i : c10::irange(10000)) {
for ([[maybe_unused]] const auto i : c10::irange(10000)) {
ASSERT_EQ(engine1(), engine2());
}
@ -238,10 +238,9 @@ TEST(CPUGeneratorImpl, TestMT19937EngineReproducibility) {
auto seed = rd();
engine1 = at::mt19937(seed);
engine2 = std::mt19937(seed);
for (C10_UNUSED const auto i : c10::irange(10000)) {
for ([[maybe_unused]] const auto i : c10::irange(10000)) {
ASSERT_EQ(engine1(), engine2());
}
}
TEST(CPUGeneratorImpl, TestPhiloxEngineReproducibilityRandN) {

View File

@ -170,7 +170,7 @@ TEST(VmapTest, TestBatchedTensorActualDim) {
{
// ActualDim on kVmapMaxTensorDims sized underlying tensor
auto tensor = ones({});
for (C10_UNUSED const auto i : c10::irange(kVmapMaxTensorDims)) {
for ([[maybe_unused]] const auto i : c10::irange(kVmapMaxTensorDims)) {
tensor = tensor.unsqueeze(0);
}
ASSERT_EQ(tensor.dim(), kVmapMaxTensorDims);

View File

@ -14,7 +14,7 @@ void test(int given_num_threads) {
ASSERT_TRUE(given_num_threads >= 0);
ASSERT_EQ(at::get_num_threads(), given_num_threads);
auto t_sum = t.sum();
for (C10_UNUSED const auto i : c10::irange(1000)) {
for ([[maybe_unused]] const auto i : c10::irange(1000)) {
t_sum = t_sum + t.sum();
}
}

View File

@ -1122,24 +1122,28 @@ namespace {
float minv = static_cast<float>(static_cast<double>(min_val) * 2.0);
float maxv = static_cast<float>(static_cast<double>(max_val) * 2.0);
ValueGen<float> gen(minv, maxv, seed.add(2));
for (C10_UNUSED const auto i : c10::irange(trials)) {
float scale = generator_sc.get();
float inv_scale = 1.0f / static_cast<float>(scale);
auto zero_point_val = generator_zp.get();
int index = 0;
for (int j = 0; j < vec::float_num_vecs(); j++) {
//generate vals
for (auto& v : unit_float_vec) {
v = gen.get();
expected_qint_vals[index] = quantize_val<underlying>(scale, zero_point_val, v);
index++;
}
float_ret[j] = vfloat::loadu(unit_float_vec);
for ([[maybe_unused]] const auto i : c10::irange(trials)) {
float scale = generator_sc.get();
float inv_scale = 1.0f / static_cast<float>(scale);
auto zero_point_val = generator_zp.get();
int index = 0;
for (int j = 0; j < vec::float_num_vecs(); j++) {
// generate vals
for (auto& v : unit_float_vec) {
v = gen.get();
expected_qint_vals[index] =
quantize_val<underlying>(scale, zero_point_val, v);
index++;
}
auto expected = vec::loadu(expected_qint_vals);
auto actual = vec::quantize(float_ret, scale, zero_point_val, inv_scale);
if (AssertVectorized<vec>(NAME_INFO(Quantize), expected, actual).check()) return;
} //trials;
float_ret[j] = vfloat::loadu(unit_float_vec);
}
auto expected = vec::loadu(expected_qint_vals);
auto actual =
vec::quantize(float_ret, scale, zero_point_val, inv_scale);
if (AssertVectorized<vec>(NAME_INFO(Quantize), expected, actual)
.check())
return;
} // trials;
}
#if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && !defined(_MSC_VER)
// This test case aims to test at::vec::QuantizeAvx512 and
@ -1168,7 +1172,7 @@ namespace {
float minv = static_cast<float>(static_cast<double>(min_val) * 2.0);
float maxv = static_cast<float>(static_cast<double>(max_val) * 2.0);
ValueGen<float> gen(minv, maxv, seed.add(2));
for (C10_UNUSED const auto i : c10::irange(trials)) {
for ([[maybe_unused]] const auto i : c10::irange(trials)) {
float scale = generator_sc.get();
float inv_scale = 1.0f / static_cast<float>(scale);
auto zero_point_val = generator_zp.get();
@ -1227,35 +1231,36 @@ namespace {
ValueGen<int> generator(min_val, max_val, seed.add(1));
//scale
ValueGen<float> generator_sc(1.f, 15.f, seed.add(2));
for (C10_UNUSED const auto i : c10::irange(trials)) {
float scale = generator_sc.get();
int32_t zero_point_val = generator.get();
float scale_zp_premul = -(scale * zero_point_val);
vfloat vf_scale = vfloat{ scale };
vfloat vf_zp = vfloat{ static_cast<float>(zero_point_val) };
vfloat vf_scale_zp = vfloat{ scale_zp_premul };
//generate vals
for (auto& x : qint_vals) {
x = generator.get();
for ([[maybe_unused]] const auto i : c10::irange(trials)) {
float scale = generator_sc.get();
int32_t zero_point_val = generator.get();
float scale_zp_premul = -(scale * zero_point_val);
vfloat vf_scale = vfloat{scale};
vfloat vf_zp = vfloat{static_cast<float>(zero_point_val)};
vfloat vf_scale_zp = vfloat{scale_zp_premul};
// generate vals
for (auto& x : qint_vals) {
x = generator.get();
}
// get expected
int index = 0;
auto qint_vec = vec::loadu(qint_vals);
auto actual_float_ret =
qint_vec.dequantize(vf_scale, vf_zp, vf_scale_zp);
for (int j = 0; j < vec::float_num_vecs(); j++) {
for (auto& v : unit_exp_vals) {
v = dequantize_val(scale, zero_point_val, qint_vals[index]);
index++;
}
//get expected
int index = 0;
auto qint_vec = vec::loadu(qint_vals);
auto actual_float_ret = qint_vec.dequantize(vf_scale, vf_zp, vf_scale_zp);
for (int j = 0; j < vec::float_num_vecs(); j++) {
for (auto& v : unit_exp_vals) {
v = dequantize_val(scale, zero_point_val, qint_vals[index]);
index++;
}
vfloat expected = vfloat::loadu(unit_exp_vals);
const auto& actual = actual_float_ret[j];
vfloat expected = vfloat::loadu(unit_exp_vals);
const auto& actual = actual_float_ret[j];
#if defined(CHECK_DEQUANT_WITH_LOW_PRECISION)
if (AssertVectorized<vfloat>(NAME_INFO(DeQuantize), seed, expected, actual).check(false, true, 1.e-3f)) return;
#else
if (AssertVectorized<vfloat>(NAME_INFO(DeQuantize), seed, expected, actual).check()) return;
#endif
}
} //trials;
} // trials;
}
TYPED_TEST(QuantizationTests, ReQuantizeFromInt) {
using vec = TypeParam;
@ -1274,25 +1279,29 @@ namespace {
ValueGen<int32_t> generator(min_val, max_val, seed);
//scale
ValueGen<float> generator_sc(1.f, 15.f, seed.add(1));
for (C10_UNUSED const auto i : c10::irange(trials)) {
float multiplier = 1.f / (generator_sc.get());
auto zero_point_val = generator.get();
int index = 0;
for (int j = 0; j < vec::float_num_vecs(); j++) {
//generate vals
for (auto& v : unit_int_vec) {
v = c10::qint32(generator.get());
expected_qint_vals[index] = requantize_from_int<underlying>(multiplier, zero_point_val, v.val_);
index++;
}
int_ret[j] = vqint::loadu(unit_int_vec);
for ([[maybe_unused]] const auto i : c10::irange(trials)) {
float multiplier = 1.f / (generator_sc.get());
auto zero_point_val = generator.get();
int index = 0;
for (int j = 0; j < vec::float_num_vecs(); j++) {
// generate vals
for (auto& v : unit_int_vec) {
v = c10::qint32(generator.get());
expected_qint_vals[index] = requantize_from_int<underlying>(
multiplier, zero_point_val, v.val_);
index++;
}
auto expected = vec::loadu(expected_qint_vals);
auto actual = vec::requantize_from_int(int_ret, multiplier, zero_point_val);
if (AssertVectorized<vec>(NAME_INFO(ReQuantizeFromInt), seed, expected, actual).check()) {
return;
}
} //trials;
int_ret[j] = vqint::loadu(unit_int_vec);
}
auto expected = vec::loadu(expected_qint_vals);
auto actual =
vec::requantize_from_int(int_ret, multiplier, zero_point_val);
if (AssertVectorized<vec>(
NAME_INFO(ReQuantizeFromInt), seed, expected, actual)
.check()) {
return;
}
} // trials;
}
TYPED_TEST(QuantizationTests, WideningSubtract) {
using vec = TypeParam;
@ -1311,30 +1320,33 @@ namespace {
typename vec::int_vec_return_type expected_int_ret;
auto seed = TestSeed();
ValueGen<underlying> generator(min_val, max_val, seed);
for (C10_UNUSED const auto i : c10::irange(trials)) {
//generate vals
for (int j = 0; j < vec::size(); j++) {
qint_vals[j] = generator.get();
qint_b[j] = generator.get();
if constexpr (std::is_same_v<underlying, int>) {
//filter overflow cases
filter_sub_overflow(qint_vals[j], qint_b[j]);
}
for ([[maybe_unused]] const auto i : c10::irange(trials)) {
// generate vals
for (int j = 0; j < vec::size(); j++) {
qint_vals[j] = generator.get();
qint_b[j] = generator.get();
if constexpr (std::is_same_v<underlying, int>) {
// filter overflow cases
filter_sub_overflow(qint_vals[j], qint_b[j]);
}
int index = 0;
auto qint_vec = vec::loadu(qint_vals);
auto qint_vec_b = vec::loadu(qint_b);
auto actual_int_ret = qint_vec.widening_subtract(qint_vec_b);
for (int j = 0; j < vec::float_num_vecs(); j++) {
for (auto& v : unit_exp_vals) {
v = widening_subtract(qint_vals[index], qint_b[index]);
index++;
}
auto expected = vqint::loadu(unit_exp_vals);
const auto& actual = actual_int_ret[j];
if (AssertVectorized<vqint>(NAME_INFO(WideningSubtract), seed, expected, actual).check()) return;
}
int index = 0;
auto qint_vec = vec::loadu(qint_vals);
auto qint_vec_b = vec::loadu(qint_b);
auto actual_int_ret = qint_vec.widening_subtract(qint_vec_b);
for (int j = 0; j < vec::float_num_vecs(); j++) {
for (auto& v : unit_exp_vals) {
v = widening_subtract(qint_vals[index], qint_b[index]);
index++;
}
} //trials;
auto expected = vqint::loadu(unit_exp_vals);
const auto& actual = actual_int_ret[j];
if (AssertVectorized<vqint>(
NAME_INFO(WideningSubtract), seed, expected, actual)
.check())
return;
}
} // trials;
}
TYPED_TEST(QuantizationTests, Relu) {
using vec = TypeParam;

View File

@ -943,22 +943,25 @@ void test_unary(
UVT start = dmn_argc > 0 ? dmn.ArgsDomain[0].start : default_start;
UVT end = dmn_argc > 0 ? dmn.ArgsDomain[0].end : default_end;
ValueGen<VT> generator(start, end, seed.add(changeSeedBy));
for (C10_UNUSED const auto trial : c10::irange(trialCount)) {
for (const auto k : c10::irange(el_count)) {
vals[k] = generator.get();
call_filter(filter, vals[k]);
//map operator
expected[k] = expectedFunction(vals[k]);
}
// test
auto input = vec_type::loadu(vals);
auto actual = actualFunction(input);
auto vec_expected = vec_type::loadu(expected);
AssertVectorized<vec_type> vecAssert(testNameInfo, seed, vec_expected, actual, input);
if (vecAssert.check(bitwise, dmn.CheckWithTolerance, dmn.ToleranceError)) return;
for ([[maybe_unused]] const auto trial : c10::irange(trialCount)) {
for (const auto k : c10::irange(el_count)) {
vals[k] = generator.get();
call_filter(filter, vals[k]);
// map operator
expected[k] = expectedFunction(vals[k]);
}
// test
auto input = vec_type::loadu(vals);
auto actual = actualFunction(input);
auto vec_expected = vec_type::loadu(expected);
AssertVectorized<vec_type> vecAssert(
testNameInfo, seed, vec_expected, actual, input);
if (vecAssert.check(
bitwise, dmn.CheckWithTolerance, dmn.ToleranceError))
return;
}// trial
//inrease Seed
} // trial
// inrease Seed
changeSeedBy += 1;
}
for (auto& custom : testCase.getCustomChecks()) {
@ -1002,22 +1005,25 @@ void test_binary(
UVT end1 = dmn_argc > 1 ? dmn.ArgsDomain[1].end : default_end;
ValueGen<VT> generator0(start0, end0, seed.add(changeSeedBy));
ValueGen<VT> generator1(start1, end1, seed.add(changeSeedBy + 1));
for (C10_UNUSED const auto trial : c10::irange(trialCount)) {
for (const auto k : c10::irange(el_count)) {
vals0[k] = generator0.get();
vals1[k] = generator1.get();
call_filter(filter, vals0[k], vals1[k]);
//map operator
expected[k] = expectedFunction(vals0[k], vals1[k]);
}
// test
auto input0 = vec_type::loadu(vals0);
auto input1 = vec_type::loadu(vals1);
auto actual = actualFunction(input0, input1);
auto vec_expected = vec_type::loadu(expected);
AssertVectorized<vec_type> vecAssert(testNameInfo, seed, vec_expected, actual, input0, input1);
if (vecAssert.check(bitwise, dmn.CheckWithTolerance, dmn.ToleranceError))return;
}// trial
for ([[maybe_unused]] const auto trial : c10::irange(trialCount)) {
for (const auto k : c10::irange(el_count)) {
vals0[k] = generator0.get();
vals1[k] = generator1.get();
call_filter(filter, vals0[k], vals1[k]);
// map operator
expected[k] = expectedFunction(vals0[k], vals1[k]);
}
// test
auto input0 = vec_type::loadu(vals0);
auto input1 = vec_type::loadu(vals1);
auto actual = actualFunction(input0, input1);
auto vec_expected = vec_type::loadu(expected);
AssertVectorized<vec_type> vecAssert(
testNameInfo, seed, vec_expected, actual, input0, input1);
if (vecAssert.check(
bitwise, dmn.CheckWithTolerance, dmn.ToleranceError))
return;
} // trial
changeSeedBy += 1;
}
for (auto& custom : testCase.getCustomChecks()) {
@ -1067,24 +1073,27 @@ void test_ternary(
ValueGen<VT> generator1(start1, end1, seed.add(changeSeedBy + 1));
ValueGen<VT> generator2(start2, end2, seed.add(changeSeedBy + 2));
for (C10_UNUSED const auto trial : c10::irange(trialCount)) {
for (const auto k : c10::irange(el_count)) {
vals0[k] = generator0.get();
vals1[k] = generator1.get();
vals2[k] = generator2.get();
call_filter(filter, vals0[k], vals1[k], vals2[k]);
//map operator
expected[k] = expectedFunction(vals0[k], vals1[k], vals2[k]);
}
// test
auto input0 = vec_type::loadu(vals0);
auto input1 = vec_type::loadu(vals1);
auto input2 = vec_type::loadu(vals2);
auto actual = actualFunction(input0, input1, input2);
auto vec_expected = vec_type::loadu(expected);
AssertVectorized<vec_type> vecAssert(testNameInfo, seed, vec_expected, actual, input0, input1, input2);
if (vecAssert.check(bitwise, dmn.CheckWithTolerance, dmn.ToleranceError)) return;
}// trial
for ([[maybe_unused]] const auto trial : c10::irange(trialCount)) {
for (const auto k : c10::irange(el_count)) {
vals0[k] = generator0.get();
vals1[k] = generator1.get();
vals2[k] = generator2.get();
call_filter(filter, vals0[k], vals1[k], vals2[k]);
// map operator
expected[k] = expectedFunction(vals0[k], vals1[k], vals2[k]);
}
// test
auto input0 = vec_type::loadu(vals0);
auto input1 = vec_type::loadu(vals1);
auto input2 = vec_type::loadu(vals2);
auto actual = actualFunction(input0, input1, input2);
auto vec_expected = vec_type::loadu(expected);
AssertVectorized<vec_type> vecAssert(
testNameInfo, seed, vec_expected, actual, input0, input1, input2);
if (vecAssert.check(
bitwise, dmn.CheckWithTolerance, dmn.ToleranceError))
return;
} // trial
changeSeedBy += 1;
}
}

View File

@ -72,11 +72,11 @@ inline bool is_thp_alloc(size_t nbytes) {
return (is_thp_alloc_enabled() && (nbytes >= gAlloc_threshold_thp));
}
#elif !defined(__ANDROID__) && !defined(_MSC_VER)
constexpr size_t c10_compute_alignment(C10_UNUSED size_t nbytes) {
constexpr size_t c10_compute_alignment([[maybe_unused]] size_t nbytes) {
return gAlignment;
}
constexpr bool is_thp_alloc(C10_UNUSED size_t nbytes) {
constexpr bool is_thp_alloc([[maybe_unused]] size_t nbytes) {
return false;
}
#endif

View File

@ -196,7 +196,7 @@ CUDAKernelLaunchRegistry::CUDAKernelLaunchRegistry()
dsa_check_if_all_devices_support_managed_memory()),
gather_launch_stacktrace(check_env_for_enable_launch_stacktracing()),
enabled_at_runtime(check_env_for_dsa_enabled()) {
for (C10_UNUSED const auto _ : c10::irange(dsa_get_device_count())) {
for ([[maybe_unused]] const auto _ : c10::irange(dsa_get_device_count())) {
uvm_assertions.emplace_back(nullptr, uvm_deleter);
}

View File

@ -23,7 +23,7 @@ void c10_cuda_check_implementation(
return;
}
C10_UNUSED auto error_unused = cudaGetLastError();
[[maybe_unused]] auto error_unused = cudaGetLastError();
(void)error_unused;
std::string check_message;

View File

@ -40,7 +40,7 @@ class C10_CUDA_API CUDAError : public c10::Error {
do { \
const cudaError_t __err = EXPR; \
if (C10_UNLIKELY(__err != cudaSuccess)) { \
C10_UNUSED auto error_unused = cudaGetLastError(); \
[[maybe_unused]] auto error_unused = cudaGetLastError(); \
TORCH_WARN("CUDA warning: ", cudaGetErrorString(__err)); \
} \
} while (0)
@ -49,18 +49,18 @@ class C10_CUDA_API CUDAError : public c10::Error {
#define C10_CUDA_ERROR_HANDLED(EXPR) EXPR
// Intentionally ignore a CUDA error
#define C10_CUDA_IGNORE_ERROR(EXPR) \
do { \
const cudaError_t __err = EXPR; \
if (C10_UNLIKELY(__err != cudaSuccess)) { \
C10_UNUSED cudaError_t error_unused = cudaGetLastError(); \
} \
#define C10_CUDA_IGNORE_ERROR(EXPR) \
do { \
const cudaError_t __err = EXPR; \
if (C10_UNLIKELY(__err != cudaSuccess)) { \
[[maybe_unused]] cudaError_t error_unused = cudaGetLastError(); \
} \
} while (0)
// Clear the last CUDA error
#define C10_CUDA_CLEAR_ERROR() \
do { \
C10_UNUSED cudaError_t error_unused = cudaGetLastError(); \
#define C10_CUDA_CLEAR_ERROR() \
do { \
[[maybe_unused]] cudaError_t error_unused = cudaGetLastError(); \
} while (0)
// This should be used directly after every kernel launch to ensure

View File

@ -22,7 +22,7 @@ int device_count_impl(bool fail_if_no_driver) {
// Clear out the error state, so we don't spuriously trigger someone else.
// (This shouldn't really matter, since we won't be running very much CUDA
// code in this regime.)
C10_UNUSED cudaError_t last_err = cudaGetLastError();
[[maybe_unused]] cudaError_t last_err = cudaGetLastError();
switch (err) {
case cudaErrorNoDevice:
// Zero devices is ok here
@ -170,7 +170,7 @@ std::optional<DeviceIndex> getDeviceIndexWithPrimaryContext() {
}
namespace _internal {
bool dummyHasPrimaryContext(C10_UNUSED DeviceIndex device_index) {
bool dummyHasPrimaryContext([[maybe_unused]] DeviceIndex device_index) {
TORCH_CHECK(false, "Should never been called");
}
bool (*hasPrimaryContext)(DeviceIndex) = dummyHasPrimaryContext;

View File

@ -8,7 +8,7 @@
CUresult __err = EXPR; \
if (__err != CUDA_SUCCESS) { \
const char* err_str; \
CUresult get_error_str_err C10_UNUSED = \
CUresult get_error_str_err [[maybe_unused]] = \
c10::cuda::DriverAPI::get()->cuGetErrorString_(__err, &err_str); \
if (get_error_str_err != CUDA_SUCCESS) { \
AT_ERROR("CUDA driver error: unknown error"); \

View File

@ -118,9 +118,6 @@
#define C10_HAS_CPP_ATTRIBUTE(x) (0)
#endif
// suppress an unused variable.
#define C10_UNUSED [[maybe_unused]]
#if !defined(__has_attribute)
#define __has_attribute(x) 0
#endif

View File

@ -35,12 +35,12 @@ dict_int_int test_dict(dict_int_int& dict) {
// erase via iterators
auto begin = dict.begin();
for (C10_UNUSED const auto i : c10::irange(20)) {
for ([[maybe_unused]] const auto i : c10::irange(20)) {
begin++;
}
auto end = begin;
for (C10_UNUSED const auto i : c10::irange(20)) {
for ([[maybe_unused]] const auto i : c10::irange(20)) {
erase_set.insert(end->first);
end++;
}
@ -134,11 +134,11 @@ TEST(OrderedPreservingDictTest, DictCollisions) {
// erase a few entries via iterator
auto begin = dict.begin();
for (C10_UNUSED const auto j : c10::irange(10)) {
for ([[maybe_unused]] const auto j : c10::irange(10)) {
begin++;
}
auto end = begin;
for (C10_UNUSED const auto j : c10::irange(7)) {
for ([[maybe_unused]] const auto j : c10::irange(7)) {
erase_set.insert(end->first);
end++;
}

View File

@ -26,7 +26,7 @@ ApproximateClockToUnixTimeConverter::measurePair() {
ApproximateClockToUnixTimeConverter::time_pairs
ApproximateClockToUnixTimeConverter::measurePairs() {
static constexpr auto n_warmup = 5;
for (C10_UNUSED const auto _ : c10::irange(n_warmup)) {
for ([[maybe_unused]] const auto _ : c10::irange(n_warmup)) {
getApproximateTime();
static_cast<void>(steady_clock_t::now());
}

View File

@ -658,12 +658,12 @@ namespace c10::detail {
// Report a warning to the user only once. Accepts an arbitrary number of extra
// arguments which are concatenated into the warning message using operator<<
//
#define _TORCH_WARN_ONCE(...) \
C10_UNUSED static const auto C10_ANONYMOUS_VARIABLE(torch_warn_once_) = \
[&] { \
TORCH_WARN(__VA_ARGS__); \
return true; \
}()
#define _TORCH_WARN_ONCE(...) \
[[maybe_unused]] static const auto C10_ANONYMOUS_VARIABLE( \
torch_warn_once_) = [&] { \
TORCH_WARN(__VA_ARGS__); \
return true; \
}()
#ifdef DISABLE_WARN
#define TORCH_WARN_ONCE(...) ((void)0);

View File

@ -322,8 +322,8 @@ C10_API const std::unique_ptr<EventSampledHandler>& GetEventSampledHandler(
* // Logs caller info with an arbitrary text event, if there is a usage.
* C10_LOG_API_USAGE_ONCE("my_api");
*/
#define C10_LOG_API_USAGE_ONCE(...) \
C10_UNUSED static bool C10_ANONYMOUS_VARIABLE(logFlag) = \
#define C10_LOG_API_USAGE_ONCE(...) \
[[maybe_unused]] static bool C10_ANONYMOUS_VARIABLE(logFlag) = \
::c10::detail::LogAPIUsageFakeReturn(__VA_ARGS__);
// API usage logging capabilities

View File

@ -115,7 +115,7 @@ TEST(XPUStreamTest, StreamPoolRoundRobinTest) {
}
std::vector<c10::xpu::XPUStream> streams{};
for (C10_UNUSED const auto _ : c10::irange(200)) {
for ([[maybe_unused]] const auto _ : c10::irange(200)) {
streams.emplace_back(c10::xpu::getStreamFromPool());
}

View File

@ -2220,7 +2220,7 @@ TEST(DataLoaderTest, ChunkDatasetCrossChunkShuffle) {
for (const auto i : c10::irange(
(chunk_count + cross_chunk_shuffle_count - 1) /
cross_chunk_shuffle_count)) {
for (C10_UNUSED const auto j : c10::irange(chunk_size)) {
for ([[maybe_unused]] const auto j : c10::irange(chunk_size)) {
for (const auto k : c10::irange(cross_chunk_shuffle_count)) {
if (i * cross_chunk_shuffle_count + k < chunk_count) {
expected_result.push_back(i * cross_chunk_shuffle_count + k);

View File

@ -1343,7 +1343,7 @@ TEST_F(FunctionalTest, GumbelSoftmax) {
auto counts = torch::zeros_like(logits);
torch::Tensor y_draw;
for (C10_UNUSED const auto i : c10::irange(num_draws)) {
for ([[maybe_unused]] const auto i : c10::irange(num_draws)) {
y_draw =
F::gumbel_softmax(logits, F::GumbelSoftmaxFuncOptions().hard(true));
counts += y_draw;

View File

@ -123,7 +123,7 @@ bool test_mnist(
torch::Device device(with_cuda ? torch::kCUDA : torch::kCPU);
model->to(device);
for (C10_UNUSED const auto epoch : c10::irange(number_of_epochs)) {
for ([[maybe_unused]] const auto epoch : c10::irange(number_of_epochs)) {
// NOLINTNEXTLINE(performance-for-range-copy)
for (torch::data::Example<> batch : *data_loader) {
auto data = batch.data.to(device);

View File

@ -3511,7 +3511,7 @@ void _multihead_attn_test_helper(
std::uniform_int_distribution<int> d_2_10(2, 10);
std::uniform_int_distribution<int> d_3_10(3, 10);
bool registration_checked = false;
for (C10_UNUSED const auto i : c10::irange(100)) {
for ([[maybe_unused]] const auto i : c10::irange(100)) {
const auto batch_sz = d_2_10(generator);
const auto seq_len = d_2_10(generator);
const auto d_head = d_3_10(generator);

View File

@ -398,7 +398,8 @@ std::vector<torch::Tensor> PackedSequenceTest_ordered_sequence(
torch::ScalarType tensor_type) {
std::vector<torch::Tensor> seqs;
seqs.reserve(PackedSequenceTest_batch_size);
for (C10_UNUSED const auto i : c10::irange(PackedSequenceTest_batch_size)) {
for ([[maybe_unused]] const auto i :
c10::irange(PackedSequenceTest_batch_size)) {
seqs.emplace_back(torch::empty(
{torch::randint(1, PackedSequenceTest_max_length, {1}).item<int64_t>()},
tensor_type));

View File

@ -12,7 +12,7 @@ struct OperationTest : torch::test::SeedingFixture {
};
TEST_F(OperationTest, Lerp) {
for (C10_UNUSED const auto i : c10::irange(TEST_AMOUNT)) {
for ([[maybe_unused]] const auto i : c10::irange(TEST_AMOUNT)) {
// test lerp_kernel_scalar
auto start = torch::rand({3, 5});
auto end = torch::rand({3, 5});
@ -36,7 +36,7 @@ TEST_F(OperationTest, Lerp) {
}
TEST_F(OperationTest, Cross) {
for (C10_UNUSED const auto i : c10::irange(TEST_AMOUNT)) {
for ([[maybe_unused]] const auto i : c10::irange(TEST_AMOUNT)) {
// input
auto a = torch::rand({10, 3});
auto b = torch::rand({10, 3});

View File

@ -157,7 +157,7 @@ void check_exact_values(
TEST(OptimTest, OptimizerAccessors) {
auto options = AdagradOptions(1.0);
std::vector<torch::Tensor> params;
for (C10_UNUSED const auto i : c10::irange(3)) {
for ([[maybe_unused]] const auto i : c10::irange(3)) {
params.push_back(torch::randn(10));
}
auto optimizer = Adagrad(params, options);

View File

@ -99,14 +99,14 @@ void stressTestStore(std::string path, std::string prefix = "") {
std::vector<std::thread> threads;
c10d::test::Semaphore sem1, sem2;
for (C10_UNUSED const auto i : c10::irange(numThreads)) {
for ([[maybe_unused]] const auto i : c10::irange(numThreads)) {
threads.emplace_back([&] {
auto fileStore =
c10::make_intrusive<c10d::FileStore>(path, numThreads + 1);
c10d::PrefixStore store(prefix, fileStore);
sem1.post();
sem2.wait();
for (C10_UNUSED const auto j : c10::irange(numIterations)) {
for ([[maybe_unused]] const auto j : c10::irange(numIterations)) {
store.add("counter", 1);
}
});

View File

@ -62,11 +62,11 @@ void stressTestStore(std::string prefix = "") {
auto hashStore = c10::make_intrusive<c10d::HashStore>();
c10d::PrefixStore store(std::move(prefix), hashStore);
for (C10_UNUSED const auto i : c10::irange(numThreads)) {
for ([[maybe_unused]] const auto i : c10::irange(numThreads)) {
threads.emplace_back([&] {
sem1.post();
sem2.wait();
for (C10_UNUSED const auto j : c10::irange(numIterations)) {
for ([[maybe_unused]] const auto j : c10::irange(numIterations)) {
store.add("counter", 1);
}
});

View File

@ -15,12 +15,12 @@ using at::cuda::CUDAStream;
template <typename T, typename... Args>
std::vector<T> initialize(const std::string& path, size_t N, Args&&... args) {
std::vector<T> tests;
for (C10_UNUSED const auto i : c10::irange(N)) {
for ([[maybe_unused]] const auto i : c10::irange(N)) {
tests.push_back(std::move(T(path, std::forward<Args>(args)...)));
}
std::vector<std::thread> threads;
for (C10_UNUSED const auto i : c10::irange(N)) {
for ([[maybe_unused]] const auto i : c10::irange(N)) {
threads.push_back(std::thread([i, N, &tests] { tests[i].start(i, N); }));
}

View File

@ -123,7 +123,7 @@ class CollectiveTest {
int num,
bool delayed = false) {
std::vector<CollectiveTest> tests;
for (C10_UNUSED const auto i : c10::irange(num)) {
for ([[maybe_unused]] const auto i : c10::irange(num)) {
tests.emplace_back(path);
}

View File

@ -102,7 +102,7 @@ void testHelper(bool useLibUV, const std::string& prefix = "") {
for (const auto i : c10::irange(numThreads)) {
threads.emplace_back([=, &sem1, &sem2, &clientStores, &expectedCounterRes] {
for (C10_UNUSED const auto j : c10::irange(numIterations)) {
for ([[maybe_unused]] const auto j : c10::irange(numIterations)) {
clientStores[i]->add("counter", 1);
}
// Let each thread set and get key on its client store

View File

@ -1043,7 +1043,7 @@ TEST(Reductions, ReduceSplitRfactor) {
SimpleIREvaluator cg(s, {b, c});
cg.call({in, out});
for (C10_UNUSED const auto i : c10::irange(M)) {
for ([[maybe_unused]] const auto i : c10::irange(M)) {
ASSERT_EQ(out[0], 4950);
}
}

View File

@ -3884,7 +3884,7 @@ TEST(Simplify, SimplifyEliminateEmptyFor) {
{
// Flatten many layers around an empty block to an empty block.
StmtPtr last = alloc<Block>(std::vector<StmtPtr>({}));
for (C10_UNUSED const auto i : c10::irange(11)) {
for ([[maybe_unused]] const auto i : c10::irange(11)) {
VarHandle loopVar("loopVar", kInt);
last = For::make(loopVar, 0, 10, last);
}
@ -3968,7 +3968,7 @@ TEST(Simplify, SimplifyFlattenBlock) {
{
// Flatten many layers around an empty block to an empty block.
StmtPtr last = alloc<Block>(std::vector<StmtPtr>({}));
for (C10_UNUSED const auto i : c10::irange(11)) {
for ([[maybe_unused]] const auto i : c10::irange(11)) {
last = alloc<Block>(std::vector<StmtPtr>({last}));
}

View File

@ -12,7 +12,7 @@ torch::List<torch::Tensor> custom_op(
int64_t repeat) {
torch::List<torch::Tensor> output;
output.reserve(repeat);
for (C10_UNUSED const auto i : c10::irange(repeat)) {
for ([[maybe_unused]] const auto i : c10::irange(repeat)) {
output.push_back(tensor * scalar);
}
return output;

View File

@ -41,13 +41,13 @@ namespace torch::autograd {
namespace VariableType {
namespace{
C10_UNUSED void reset_grad_accumulator(Variable & self) {
AutogradMeta* meta = torch::autograd::impl::get_autograd_meta(self);
if (meta != nullptr) {
meta->grad_accumulator_.reset();
}
[[maybe_unused]] void reset_grad_accumulator(Variable& self) {
AutogradMeta* meta = torch::autograd::impl::get_autograd_meta(self);
if (meta != nullptr) {
meta->grad_accumulator_.reset();
}
}
}
namespace {

Some files were not shown because too many files have changed in this diff Show More