Revert D30652629: use irange for loops

Test Plan: revert-hammer

Differential Revision:
D30652629 (687c2267d4)

Original commit changeset: 0ae6c4bbbb55

fbshipit-source-id: 5c4f067b584a021c8c9656454d1ee60999600fb3
This commit is contained in:
Xue Li
2021-10-15 15:19:28 -07:00
committed by Facebook GitHub Bot
parent 1e2b2ee5ff
commit 2f099c7555
487 changed files with 21930 additions and 22184 deletions

View File

@ -4,7 +4,6 @@
#include <string>
#include <c10/core/MemoryFormat.h>
#include <c10/util/irange.h>
#include <fbjni/ByteBuffer.h>
#include <fbjni/fbjni.h>
@ -98,7 +97,7 @@ static at::Tensor newAtTensor(
std::vector<int64_t> shapeVec{};
shapeVec.reserve(rank);
auto numel = 1;
for (const auto i : c10::irange(rank)) {
for (auto i = 0; i < rank; ++i) {
shapeVec.push_back(shapeArr[i]);
numel *= shapeArr[i];
}
@ -522,7 +521,7 @@ at::IValue JIValue::JIValueToAtIValue(
std::vector<at::IValue> elements;
elements.reserve(n);
for (const auto i : c10::irange(n)) {
for (auto i = 0; i < n; ++i) {
auto jivalue_element = jarray->getElement(i);
auto element = JIValue::JIValueToAtIValue(jivalue_element);
elements.push_back(std::move(element));
@ -536,7 +535,7 @@ at::IValue JIValue::JIValueToAtIValue(
size_t n = jArrayPinned.size();
c10::List<bool> list{};
list.reserve(n);
for (const auto i : c10::irange(n)) {
for (size_t i = 0; i < n; ++i) {
list.push_back(jArrayPinned[i]);
}
return at::IValue{std::move(list)};
@ -548,7 +547,7 @@ at::IValue JIValue::JIValueToAtIValue(
size_t n = jArrayPinned.size();
c10::List<int64_t> list{};
list.reserve(n);
for (const auto i : c10::irange(n)) {
for (size_t i = 0; i < n; ++i) {
list.push_back(jArrayPinned[i]);
}
return at::IValue{std::move(list)};
@ -560,7 +559,7 @@ at::IValue JIValue::JIValueToAtIValue(
size_t n = jArrayPinned.size();
c10::List<double> list{};
list.reserve(n);
for (const auto i : c10::irange(n)) {
for (size_t i = 0; i < n; ++i) {
list.push_back(jArrayPinned[i]);
}
return at::IValue{std::move(list)};
@ -573,7 +572,7 @@ at::IValue JIValue::JIValueToAtIValue(
size_t n = jArray->size();
c10::List<at::Tensor> list{};
list.reserve(n);
for (const auto i : c10::irange(n)) {
for (size_t i = 0; i < n; ++i) {
list.push_back(
TensorHybrid::newAtTensorFromJTensor(jArray->getElement(i)));
}
@ -595,7 +594,7 @@ at::IValue JIValue::JIValueToAtIValue(
c10::impl::GenericList list{c10::unshapedType(first_element.type())};
list.reserve(n);
list.push_back(first_element);
for (const auto i : c10::irange(1, n)) {
for (auto i = 1; i < n; ++i) {
auto jivalue_element = jarray->getElement(i);
auto element = JIValue::JIValueToAtIValue(jivalue_element);
list.push_back(element);

View File

@ -6,7 +6,6 @@
#include <fbjni/ByteBuffer.h>
#include <fbjni/fbjni.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/mobile/import.h>
#include <torch/csrc/jit/mobile/module.h>
#include <torch/script.h>
@ -158,7 +157,7 @@ class PytorchJni : public facebook::jni::HybridClass<PytorchJni> {
std::vector<at::IValue> inputs{};
size_t n = jinputs->size();
inputs.reserve(n);
for (const auto i : c10::irange(n)) {
for (size_t i = 0; i < n; i++) {
at::IValue atIValue = JIValue::JIValueToAtIValue(jinputs->getElement(i));
if (at::kVulkan == deviceType_) {
inputs.push_back(
@ -187,7 +186,7 @@ class PytorchJni : public facebook::jni::HybridClass<PytorchJni> {
std::vector<at::IValue> inputs{};
size_t n = jinputs->size();
inputs.reserve(n);
for (const auto i : c10::irange(n)) {
for (size_t i = 0; i < n; i++) {
at::IValue atIValue = JIValue::JIValueToAtIValue(jinputs->getElement(i));
if (at::kVulkan == deviceType_) {
inputs.push_back(

View File

@ -3,7 +3,6 @@
#include <ATen/BatchedFallback.h>
#include <ATen/native/ResizeCommon.h>
#include <ATen/ATen.h>
#include <c10/util/irange.h>
namespace at {
@ -330,7 +329,7 @@ Tensor permute_batching_rule(const Tensor& self, IntArrayRef dims) {
VmapDimVector all_dims_physical;
all_dims_physical.reserve(self_physical.tensor().dim());
for (const auto bdim : c10::irange(self_physical.numBatchDims())) {
for (int64_t bdim = 0; bdim < self_physical.numBatchDims(); bdim++) {
all_dims_physical.push_back(bdim);
}
all_dims_physical.insert(

View File

@ -2,7 +2,6 @@
#include <ATen/Parallel.h>
#include <ATen/TensorUtils.h>
#include <c10/util/irange.h>
#include <limits>
#include <utility>
#include <cstring>
@ -131,7 +130,7 @@ inline Tensor sort_strides(Tensor& tensor_) {
IntArrayRef strides = tensor_.strides();
std::vector<int64_t> indices;
indices.reserve(tensor_.ndimension());
for (const auto i : c10::irange(tensor_.ndimension())) {
for (int64_t i = 0; i < tensor_.ndimension(); i++) {
indices.push_back(i);
}
std::sort(indices.begin(), indices.end(), [&strides](int64_t i1, int64_t i2) {
@ -197,7 +196,7 @@ inline bool _all_equal_numel(at::ArrayRef<Tensor> tensors) {
if (tensors.size() == 0)
return true;
int64_t all_numel = tensors[0].numel();
for (const auto i : c10::irange(1, tensors.size())) {
for (size_t i = 1; i < tensors.size(); i++) {
if (tensors[i].numel() != all_numel)
return false;
}

View File

@ -11,7 +11,6 @@
#include <c10/util/Exception.h>
#include <c10/core/impl/DeviceGuardImplInterface.h>
#include <c10/core/QEngine.h>
#include <c10/util/irange.h>
#include <memory>
#include <mutex>
@ -352,7 +351,7 @@ static inline void manual_seed(uint64_t seed) {
// available. In that case, we must not seed CUDA; it will fail!
const auto num_gpus = detail::getCUDAHooks().getNumGPUs();
if (hasCUDA() && num_gpus > 0) {
for (const auto i : c10::irange(num_gpus)) {
for (int i = 0; i < num_gpus; i++) {
auto cuda_gen = globalContext().defaultGenerator(
Device(at::kCUDA, static_cast<c10::DeviceIndex>(i))
);

View File

@ -197,7 +197,7 @@ std::vector<int64_t> infer_dense_strides(IntArrayRef tensor_sizes, IntArrayRef t
// compute output strides which preserves the input tensor's memory layout
std::vector<int64_t> out_strides(ndim);
int64_t curr_stride = 1;
for (const auto i : c10::irange(ndim)) {
for (size_t i = 0; i < ndim; ++i) {
int64_t idx = perm[i];
out_strides[idx] = curr_stride;
// Note: for size 0, we simply treated it as 1, it really doesn't matter here

View File

@ -4,7 +4,6 @@
#include <ATen/Tensor.h>
#include <c10/util/Exception.h>
#include <c10/util/MaybeOwned.h>
#include <c10/util/irange.h>
#include <functional>
#include <sstream>
@ -267,7 +266,7 @@ inline std::vector<Tensor> expand_outplace(TensorList to_expand) {
// expands a list of Tensors; ignores undefined (null) tensors
bool first = true;
DimVector sizes;
for (const auto i : c10::irange(to_expand.size())) {
for (size_t i = 0; i < to_expand.size(); ++i) {
if (!to_expand[i].defined()) {
continue;
} else if (first) {
@ -279,7 +278,7 @@ inline std::vector<Tensor> expand_outplace(TensorList to_expand) {
}
std::vector<Tensor> result(to_expand.size());
for (const auto i : c10::irange(to_expand.size())) {
for (size_t i = 0; i < to_expand.size(); ++i) {
if (!to_expand[i].defined()) {
continue;
} else if (to_expand[i].sizes().equals(sizes)) {
@ -300,7 +299,7 @@ static inline Tensor sum_to(Tensor tensor, const IntArrayRef shape) {
c10::SmallVector<int64_t, 8> reduce_dims;
const at::IntArrayRef sizes = tensor.sizes();
const int64_t leading_dims = sizes.size() - shape.size();
for (const auto i : c10::irange(leading_dims)) {
for (int64_t i = 0; i < leading_dims; ++i) {
reduce_dims.push_back(i);
}
for (int64_t i = leading_dims; i < static_cast<int64_t>(sizes.size()); ++i) {
@ -321,7 +320,7 @@ static inline bool is_expandable_to(IntArrayRef shape, IntArrayRef desired) {
if (ndim > target_dim) {
return false;
}
for (const auto i : c10::irange(ndim)) {
for (size_t i = 0; i < ndim; i++) {
int64_t size = shape[ndim - i - 1];
int64_t target = desired[target_dim - i - 1];
if (size != target && size != 1) {

View File

@ -1,7 +1,6 @@
#include <ATen/MemoryOverlap.h>
#include <ATen/core/TensorBase.h>
#include <c10/core/Layout.h>
#include <c10/util/irange.h>
namespace at {
@ -18,7 +17,7 @@ MemOverlap has_internal_overlap(TensorImpl* t) {
auto strides = t->strides();
auto sizes = t->sizes();
for (const auto i : c10::irange(strides.size())) {
for (size_t i = 0; i < strides.size(); ++i) {
if (strides[i] == 0 && sizes[i] > 1) {
return MemOverlap::YES;
}

View File

@ -225,7 +225,7 @@ std::vector<Dimname> compute_squeeze_outnames(const Tensor& tensor) {
}
std::vector<Dimname> outnames;
auto tensor_names = tensor.names();
for (const auto d : c10::irange(tensor.dim())) {
for (int64_t d = 0; d < tensor.dim(); d++) {
if (tensor.sizes()[d] != 1) {
outnames.push_back(tensor_names[d]);
}
@ -242,7 +242,7 @@ std::vector<Dimname> compute_diagonal_outnames(
}
std::vector<Dimname> outnames;
auto tensor_names = tensor.names();
for (const auto d : c10::irange(tensor.dim())) {
for (int64_t d = 0; d < tensor.dim(); d++) {
if (d == dim1 || d == dim2) {
continue;
}

View File

@ -6,7 +6,6 @@
#ifndef C10_MOBILE
#include <c10/core/thread_pool.h>
#include <c10/util/irange.h>
#else
#include <caffe2/utils/threadpool/pthreadpool-cpp.h>
#endif // C10_MOBILE
@ -88,7 +87,7 @@ TaskThreadPoolBase& _get_intraop_pool() {
// `fn` will be called with params: (thread_pool_task_id, task_id).
void _run_with_pool(const std::function<void(int, size_t)>& fn, size_t range) {
#ifndef C10_MOBILE
for (const auto i : c10::irange(1, range)) {
for (size_t i = 1; i < range; ++i) {
_get_intraop_pool().run([fn, i]() { fn((int)i, i); });
}
// Run the first task on the current thread directly.

View File

@ -3,7 +3,6 @@
#include <ATen/Tensor.h>
#include <c10/core/TensorImpl.h>
#include <c10/util/Exception.h>
#include <c10/util/irange.h>
namespace at {
struct TORCH_API SparseTensorImpl : public TensorImpl {
@ -110,7 +109,7 @@ public:
bool shrinking_dense_dim = false;
auto sparse_size_original = sizes().slice(0, sparse_dim);
auto sparse_size_new = size.slice(0, sparse_dim);
for (const auto i : c10::irange(sparse_dim)) {
for (int64_t i = 0; i < sparse_dim; i++) {
if (sparse_size_new[i] < sparse_size_original[i]) {
shrinking_sparse_dims = true;
break;
@ -118,7 +117,7 @@ public:
}
auto dense_size_original = sizes().slice(sparse_dim);
auto dense_size_new = size.slice(sparse_dim);
for (const auto i : c10::irange(dense_dim)) {
for (int64_t i = 0; i < dense_dim; i++) {
if (dense_size_new[i] < dense_size_original[i]) {
shrinking_dense_dim = true;
break;

View File

@ -3,7 +3,6 @@
#include <ATen/ATen.h>
#include <ATen/SparseTensorImpl.h>
#include <ATen/Parallel.h>
#include <c10/util/irange.h>
namespace at { namespace sparse {
@ -99,7 +98,7 @@ Tensor coo_to_csr(const int64_t* indices, int64_t dim, int64_t nnz) {
at::parallel_for(0, nnz, 10000, [&](int64_t start, int64_t end) {
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
int64_t h, hp0, hp1;
for (const auto i : c10::irange(start, end)) {
for (auto i = start; i < end; i++) {
hp0 = indices[i];
hp1 = (i+1 == nnz) ? dim : indices[i+1];
if (hp0 != hp1) {

View File

@ -1,7 +1,6 @@
#include <ATen/TensorIndexing.h>
#include <c10/util/Exception.h>
#include <c10/util/irange.h>
namespace at {
namespace indexing {
@ -32,7 +31,7 @@ std::ostream& operator<<(std::ostream& stream, const TensorIndex& tensor_index)
std::ostream& operator<<(std::ostream& stream, const std::vector<TensorIndex>& tensor_indices) {
stream << "(";
for (const auto i : c10::irange(tensor_indices.size())) {
for (size_t i = 0; i < tensor_indices.size(); i++) {
stream << tensor_indices[i];
if (i < tensor_indices.size() - 1) stream << ", ";
}

View File

@ -1,7 +1,6 @@
#pragma once
#include <c10/util/Optional.h>
#include <c10/util/irange.h>
#include <ATen/core/TensorBody.h>
#include <ATen/ExpandUtils.h>
#include <ATen/Functions.h>
@ -336,7 +335,7 @@ static inline Tensor scalarToTensor(const Scalar& v, const TensorOptions& option
// strip away unit dimensions from the left of 'src'
static inline IntArrayRef slicePrefix1sSize(const IntArrayRef& sizes) {
size_t first_non1_src = sizes.size();
for (const auto i : c10::irange(sizes.size())) {
for (size_t i = 0; i < sizes.size(); ++i) {
if (sizes[i] != 1) {
first_non1_src = i;
break;
@ -440,7 +439,7 @@ static inline Tensor applySlicing(
"too many indices for tensor of dimension ", (int)self_sizes.size());
Tensor result = self;
for (const auto i : c10::irange(indices.size())) {
for (size_t i = 0; i < indices.size(); i++) {
auto& obj = indices[i];
result = handleDimInMultiDimIndexing(
/*prev_dim_result=*/result,

View File

@ -36,8 +36,8 @@ inline void get_base_ptrs(char** ptrs, ArrayRef<OperandInfo> operands) {
}
inline void get_strides(int64_t* strides, ArrayRef<OperandInfo> operands, int64_t ndim) {
for (const auto dim : c10::irange(ndim)) {
for (const auto arg : c10::irange(operands.size())) {
for (int64_t dim = 0; dim < ndim; ++dim) {
for (size_t arg = 0; arg < operands.size(); ++arg) {
*strides++ = operands[arg].stride_bytes[dim];
}
}
@ -214,7 +214,7 @@ void TensorIteratorBase::reorder_dimensions() {
// returns 1 if the dim0 should come after dim1, -1 if dim0 should come
// before dim1, and 0 if the comparison is ambiguous.
auto should_swap = [&](size_t dim0, size_t dim1) {
for (const auto arg : c10::irange(ntensors())) {
for (int arg = 0; arg < ntensors(); arg++) {
// ignore undefined or incorrectly sized tensors
if (operands_[arg].stride_bytes.empty() || operands_[arg].will_resize) {
continue;
@ -251,7 +251,7 @@ void TensorIteratorBase::reorder_dimensions() {
};
// insertion sort with support for ambiguous comparisons
for (const auto i : c10::irange(1, ndim())) {
for (int i = 1; i < ndim(); i++) {
int dim1 = i;
for (int dim0 = i - 1; dim0 >= 0; dim0--) {
int comparison = should_swap(perm_[dim0], perm_[dim1]);
@ -497,7 +497,7 @@ void TensorIteratorBase::compute_types(const TensorIteratorConfig& config) {
StrideVector TensorIteratorBase::compatible_stride(int element_size) const {
auto stride = StrideVector();
int64_t next_stride = element_size;
for (const auto dim : c10::irange(ndim())) {
for (int dim = 0; dim < ndim(); dim++) {
stride.push_back(next_stride);
next_stride *= shape_[dim];
}
@ -510,14 +510,14 @@ DimVector TensorIteratorBase::invert_perm(IntArrayRef input) const {
TORCH_INTERNAL_ASSERT(!has_coalesced_dimensions_);
TORCH_INTERNAL_ASSERT(input.size()==perm_.size());
auto res = DimVector(input.size()); //no initialization needed, every value in res should be written to.
for (const auto dim : c10::irange(ndim())) {
for (int dim = 0; dim < ndim(); dim++) {
res[perm_[dim]] = input[dim];
}
return res;
}
void TensorIteratorBase::allocate_or_resize_outputs() {
for (const auto i : c10::irange(num_outputs_)) {
for (int i = 0; i < num_outputs_; i++) {
auto& op = operands_[i];
if (!op.tensor_base().defined() || op.will_resize) {
TORCH_INTERNAL_ASSERT(op.is_type_defined(), "no type for operand", i);
@ -525,7 +525,7 @@ void TensorIteratorBase::allocate_or_resize_outputs() {
op.stride_bytes = compatible_stride(element_size);
// check if permutation is just an inverted order
bool inverted = true;
for (const auto i : c10::irange(ndim())) {
for (int i = 0; i < ndim(); i++) {
if (perm_[i] != ndim() - i - 1) {
inverted = false;
break;
@ -539,7 +539,7 @@ void TensorIteratorBase::allocate_or_resize_outputs() {
set_output(i, tensor_shape, {}, original_options(op), names_);
} else {
auto tensor_stride = invert_perm(op.stride_bytes);
for (const auto dim : c10::irange(ndim())) {
for (int dim = 0; dim < ndim(); dim++) {
tensor_stride[dim] /= element_size;
}
set_output(i, tensor_shape, tensor_stride, original_options(op), names_);
@ -593,7 +593,7 @@ void TensorIteratorBase::coalesce_dimensions() {
if (shape0 == 1 || shape1 == 1) {
return true;
}
for (const auto i : c10::irange(ntensors())) {
for (int i = 0; i < ntensors(); i++) {
auto& stride = operands_[i].stride_bytes;
if (shape0 * stride[dim0] != stride[dim1]) {
return false;
@ -604,14 +604,14 @@ void TensorIteratorBase::coalesce_dimensions() {
// replace each operands stride at dim0 with its stride at dim1
auto replace_stride = [&](int dim0, int dim1) {
for (const auto i : c10::irange(ntensors())) {
for (int i = 0; i < ntensors(); i++) {
auto& stride = operands_[i].stride_bytes;
stride[dim0] = stride[dim1];
}
};
int prev_dim = 0;
for (const auto dim : c10::irange(1, ndim())) {
for (int dim = 1; dim < ndim(); dim++) {
if (can_coalesce(prev_dim, dim)) {
if (shape_[prev_dim] == 1) {
replace_stride(prev_dim, dim);
@ -627,7 +627,7 @@ void TensorIteratorBase::coalesce_dimensions() {
}
shape_.resize(prev_dim + 1);
for (const auto i : c10::irange(ntensors())) {
for (int i = 0; i < ntensors(); i++) {
operands_[i].stride_bytes.resize(ndim());
}
has_coalesced_dimensions_ = true;
@ -670,7 +670,7 @@ void TensorIteratorBase::permute_dimensions(IntArrayRef perm) {
auto reorder = [perm](IntArrayRef data) {
auto res = DimVector(data.size(), 0);
for (const auto i : c10::irange(perm.size())) {
for (size_t i = 0; i < perm.size(); i++) {
res[i] = data[perm[i]];
}
return res;
@ -687,7 +687,7 @@ void TensorIteratorBase::permute_dimensions(IntArrayRef perm) {
int64_t TensorIteratorBase::num_output_elements() const {
int64_t elem = 1;
for (const auto dim : c10::irange(ndim())) {
for (int dim = 0; dim < ndim(); dim++) {
if (operands_[0].stride_bytes[dim] != 0 || shape_[dim] == 0) {
elem *= shape_[dim];
}
@ -697,7 +697,7 @@ int64_t TensorIteratorBase::num_output_elements() const {
int TensorIteratorBase::num_reduce_dims() const {
int count = 0;
for (const auto dim : c10::irange(ndim())) {
for (int dim = 0; dim < ndim(); dim++) {
if (operands_[0].stride_bytes[dim] == 0) {
count++;
}
@ -760,7 +760,7 @@ bool TensorIteratorBase::is_contiguous() const {
bool TensorIteratorBase::is_scalar(int arg) const {
const auto& stride = operands_[arg].stride_bytes;
for (const auto i : c10::irange(ndim())) {
for (int i = 0; i < ndim(); i++) {
if (stride[i] != 0 && shape_[i] != 1) {
return false;
}
@ -815,7 +815,7 @@ void TensorIteratorBase::narrow(int dim, int64_t start, int64_t size) {
void TensorIteratorBase::select_all_keeping_dim(int start_dim, IntArrayRef indices) {
TORCH_INTERNAL_ASSERT(start_dim <= ndim());
for (const auto i : c10::irange(start_dim, ndim())) {
for (int i = start_dim; i < ndim(); ++i) {
for (auto& op : operands_) {
op.data = ((char*)op.data) + op.stride_bytes[i] * indices[i - start_dim];
}
@ -1063,13 +1063,13 @@ void TensorIteratorBase::populate_operands(TensorIteratorConfig& config) {
void TensorIteratorBase::mark_outputs() {
// TODO: merge this into populate_operands
for (const auto i : c10::irange(num_outputs_)) {
for (int i = 0; i < num_outputs_; i++) {
operands_[i].is_output = true;
const auto& output = tensor(i);
if (!output.defined()) continue;
// check if output is also an input
for (const auto arg : c10::irange(num_outputs_, ntensors())) {
for (int arg = num_outputs_; arg < ntensors(); arg++) {
const auto& input = tensor(arg);
if (output.is_same(input)) {
operands_[i].is_read_write = true;
@ -1086,7 +1086,7 @@ void TensorIteratorBase::mark_resize_outputs(const TensorIteratorConfig& config)
if (config.static_shape_.has_value()) {
return;
}
for (const auto i : c10::irange(num_outputs_)) {
for (int i = 0; i < num_outputs_; i++) {
const auto& output = tensor(i);
if (output.defined() && !output.sizes().equals(shape_)) {
if (config.resize_outputs_ && !operands_[i].is_read_write) {
@ -1104,11 +1104,11 @@ void TensorIteratorBase::compute_mem_overlaps(const TensorIteratorConfig& config
if (!config.check_mem_overlap_) {
return;
}
for (const auto i : c10::irange(num_outputs_)) {
for (int i = 0; i < num_outputs_; i++) {
const auto& output = tensor_base(i);
if (!output.defined()) continue;
assert_no_internal_overlap(output);
for (const auto j : c10::irange(num_outputs_, ntensors())) {
for (int j = num_outputs_; j < ntensors(); j++) {
const auto& input = tensor_base(j);
if (!input.is_same(output)) {
assert_no_partial_overlap(output, input);
@ -1164,7 +1164,7 @@ void TensorIteratorBase::compute_strides(const TensorIteratorConfig& config) {
op.stride_bytes.resize(ndim(), 0);
else
op.stride_bytes.resize(ndim());
for (const auto i : c10::irange(original_shape.size())) {
for (size_t i = 0; i < original_shape.size(); i++) {
// see NOTE: [Computing output strides]
if (original_shape[i] == 1 && shape_[offset + i] !=1) {
op.stride_bytes[offset + i] = 0;
@ -1183,7 +1183,7 @@ bool TensorIteratorBase::can_use_32bit_indexing() const {
}
for (auto& op : operands_) {
int64_t max_offset = 1;
for (const auto dim : c10::irange(ndim())) {
for (int dim = 0; dim < ndim(); dim++) {
max_offset += (shape_[dim] - 1) * op.stride_bytes[dim];
}
if (max_offset > max_value) {
@ -1245,7 +1245,7 @@ bool TensorIteratorBase::fast_set_up(const TensorIteratorConfig& config) {
switch (setup_type) {
case FastSetupType::CONTIGUOUS:
{
for (const auto i : c10::irange(num_outputs_)) {
for (int i = 0; i < num_outputs_; i++){
auto& op = operands_[i];
if (!op.tensor_base().defined()) {
TORCH_INTERNAL_ASSERT(op.is_type_defined(), "no type for operand", i);
@ -1256,7 +1256,7 @@ bool TensorIteratorBase::fast_set_up(const TensorIteratorConfig& config) {
}
case FastSetupType::CHANNELS_LAST:
{
for (const auto i : c10::irange(num_outputs_)) {
for (int i = 0; i < num_outputs_; i++){
auto& op = operands_[i];
if (!op.tensor_base().defined()) {
TORCH_INTERNAL_ASSERT(op.is_type_defined(), "no type for operand", i);
@ -1273,7 +1273,7 @@ bool TensorIteratorBase::fast_set_up(const TensorIteratorConfig& config) {
if (tensor(i_defined).defined()) break;
}
TORCH_CHECK(i_defined >= 0, "Can not find a defined tensor when fast allocating memory to outputs");
for (const auto i : c10::irange(num_outputs_)) {
for (int i = 0; i < num_outputs_; i++){
auto& op = operands_[i];
if (!op.tensor_base().defined()) {
TORCH_INTERNAL_ASSERT(op.is_type_defined(), "no type for operand", i);

View File

@ -4,7 +4,6 @@
#include <c10/util/MaybeOwned.h>
#include <c10/util/SmallVector.h>
#include <c10/util/TypeCast.h>
#include <c10/util/irange.h>
#include <ATen/core/Dimname.h>
#include <ATen/core/Range.h>
#include <ATen/core/TensorBase.h>
@ -323,9 +322,9 @@ private:
char** base, const int64_t* strides, int64_t size0, int64_t size1) {
PtrVector data(base, base + ntensor);
const int64_t* outer_strides = &strides[ntensor];
for (const auto i : c10::irange(size1)) {
for (int64_t i = 0; i < size1; i++) {
if (i > 0) {
for (const auto arg : c10::irange(ntensor)) {
for (int64_t arg = 0; arg < ntensor; arg++) {
data[arg] += outer_strides[arg];
}
}
@ -398,7 +397,7 @@ public:
bool has_contiguous_first_dim() const {
int num_tensors = ntensors();
for (const auto i : c10::irange(num_tensors)) {
for (int i = 0; i < num_tensors; i++) {
if (strides(i)[0] != element_size(i)) {
return false;
}

View File

@ -1,7 +1,6 @@
#pragma once
#include <ATen/native/TensorIterator.h>
#include <c10/util/SmallBuffer.h>
#include <c10/util/irange.h>
namespace at {
@ -25,9 +24,9 @@ inline void get_data_ptrs(
const int64_t ntensors = base.size();
const int64_t ndim = counter.size();
std::copy(base.begin(), base.end(), ptrs);
for (const auto dim : c10::irange(ndim)) {
for (int64_t dim = 0; dim < ndim; ++dim) {
int64_t value = counter[dim];
for (const auto arg : c10::irange(ntensors)) {
for (int64_t arg = 0; arg < ntensors; ++arg) {
ptrs[arg] += value * strides[dim * ntensors + arg];
}
}

View File

@ -56,7 +56,7 @@ TensorNames::TensorNames(ArrayRef<Dimname> names, int64_t start, int64_t end) {
start = maybe_wrap_dim(start, names.size());
end = maybe_wrap_dim(end, names.size());
names_.reserve(end - start);
for (const auto idx : c10::irange(start, end)) {
for (int64_t idx = start; idx < end; ++idx) {
names_.emplace_back(names, idx);
}
}

View File

@ -2,7 +2,6 @@
#include <ATen/Config.h>
#include <ATen/TensorUtils.h>
#include <c10/util/accumulate.h>
#include <c10/util/irange.h>
#include <ostream>
#include <sstream>
@ -324,7 +323,7 @@ size_t computeStorageNbytes(
// size of the underlying storage is 1 bigger than the offset
// of the last element according to stride
size_t size = 1;
for (const auto i : c10::irange(sizes.size())) {
for(size_t i = 0; i < sizes.size(); i++) {
if(sizes[i] == 0) {
return 0;
}

View File

@ -83,7 +83,7 @@ VmapDimVector VmapPhysicalView::getPhysicalShape(IntArrayRef logical_shape) cons
static BatchDims computeFrontBatchDimsFromLevels(std::bitset<kVmapNumLevels> levels_bitset) {
BatchDims bdims;
int64_t dim = 0;
for (const auto level : c10::irange(kVmapNumLevels)) {
for (int64_t level = 0; level < kVmapNumLevels; level++) {
if (!levels_bitset[level]) {
continue;
}
@ -208,7 +208,7 @@ MultiBatchVmapTransform::logicalToPhysical(TensorList logical_tensors) {
VmapDimVector batch_sizes(num_batch_dims, 1);
for (const auto& physical_tensor : physical_tensors) {
auto physical_sizes = physical_tensor.sizes();
for (const auto dim : c10::irange(num_batch_dims)) {
for (int64_t dim = 0; dim < num_batch_dims; dim++) {
if (physical_sizes[dim] != 1) {
batch_sizes[dim] = physical_sizes[dim];
}

View File

@ -2,7 +2,6 @@
#include <c10/core/WrapDimMinimal.h>
#include <c10/core/TensorImpl.h>
#include <c10/util/irange.h>
#include <ATen/core/Tensor.h>
namespace at {
@ -41,7 +40,7 @@ static inline void maybe_wrap_dims_n(int64_t* dims, int64_t ndims, int64_t dim_p
}
int64_t min = -dim_post_expr;
int64_t max = dim_post_expr - 1;
for (const auto i : c10::irange(ndims)) {
for (int64_t i = 0; i < ndims; ++i) {
auto &dim = dims[i];
if (dim < min || dim > max) {
TORCH_CHECK_INDEX(false,
@ -86,7 +85,7 @@ static inline int64_t legacy_cat_wrap_dim(int64_t dim, TensorList tensors) {
// wrap negative dims in a vector
static inline void wrap_all_dims(std::vector<int64_t>& dims_to_wrap, int64_t tensor_total_dims) {
for (const auto i : c10::irange(dims_to_wrap.size())) {
for (size_t i = 0; i < dims_to_wrap.size(); i++) {
dims_to_wrap[i] = maybe_wrap_dim(dims_to_wrap[i], tensor_total_dims);
}
}

View File

@ -1,7 +1,6 @@
#pragma once
#include <c10/core/TensorImpl.h>
#include <c10/util/irange.h>
#include <ATen/WrapDimUtils.h>
#include <sstream>
#include <bitset>
@ -16,7 +15,7 @@ constexpr size_t dim_bitset_size = 64;
static inline std::bitset<dim_bitset_size> dim_list_to_bitset(IntArrayRef dims, int64_t ndims) {
TORCH_CHECK(ndims <= (int64_t) dim_bitset_size, "only tensors with up to ", dim_bitset_size, " dims are supported");
std::bitset<dim_bitset_size> seen;
for (const auto i : c10::irange(dims.size())) {
for (size_t i = 0; i < dims.size(); i++) {
size_t dim = maybe_wrap_dim(dims[i], ndims);
TORCH_CHECK(!seen[dim], "dim ", dim, " appears multiple times in the list of dims");
seen[dim] = true;

View File

@ -1,5 +1,4 @@
#include <benchmark/benchmark.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/passes/xnnpack_rewrite.h>
#include <torch/csrc/autograd/generated/variable_factories.h>
#include <torch/csrc/jit/api/module.h>
@ -34,7 +33,7 @@ static void stateful_conv1d(benchmark::State& state) {
)");
std::vector<std::vector<torch::jit::IValue>> inputs;
for (const auto i : c10::irange(10)) {
for (int i = 0; i < 10; ++i) {
std::vector<torch::jit::IValue> input;
// NOLINTNEXTLINE(modernize-use-emplace)
input.push_back(torch::rand({batch_size, input_channels, width}));
@ -71,8 +70,8 @@ static void GenerateSizes(benchmark::internal::Benchmark* b) {
for (size_t input_channels = 32; input_channels < 256; input_channels *= 2) {
for (size_t output_channels = 32; output_channels < 256; output_channels *= 2) {
for (const auto kernel : c10::irange(3, 8)) {
for (const auto batch_size : c10::irange(1, 5)) {
for (size_t kernel = 3; kernel < 8; ++kernel) {
for (size_t batch_size = 1; batch_size < 5; ++batch_size) {
for (size_t width = 32; width < 256; width *= 2) {
b->Args({input_channels, output_channels, kernel, batch_size, width, true});
b->Args({input_channels, output_channels, kernel, batch_size, width, false});

View File

@ -4,7 +4,6 @@
// device code.
#include <c10/macros/Macros.h>
#include <c10/util/irange.h>
namespace at { namespace detail {

View File

@ -1,5 +1,4 @@
#include <ATen/core/Formatting.h>
#include <c10/util/irange.h>
#include <cmath>
#include <cstdint>
@ -45,7 +44,7 @@ static std::tuple<double, int64_t> __printFormat(std::ostream& stream, const Ten
}
bool intMode = true;
auto self_p = self.data_ptr<double>();
for (const auto i : c10::irange(size)) {
for(int64_t i = 0; i < size; i++) {
auto z = self_p[i];
if(std::isfinite(z)) {
if(z != std::ceil(z)) {
@ -71,7 +70,7 @@ static std::tuple<double, int64_t> __printFormat(std::ostream& stream, const Ten
} else {
expMin = fabs(self_p[offset]);
expMax = fabs(self_p[offset]);
for (const auto i : c10::irange(offset, size)) {
for(int64_t i = offset; i < size; i++) {
double z = fabs(self_p[i]);
if(std::isfinite(z)) {
if(z < expMin) {
@ -131,8 +130,7 @@ static std::tuple<double, int64_t> __printFormat(std::ostream& stream, const Ten
static void __printIndent(std::ostream &stream, int64_t indent)
{
for (const auto i : c10::irange(indent)) {
(void)i; //Suppress unused variable warning
for(int64_t i = 0; i < indent; i++) {
stream << " ";
}
}
@ -170,7 +168,7 @@ static void __printMatrix(std::ostream& stream, const Tensor& self, int64_t line
printScale(stream,scale);
__printIndent(stream, indent);
}
for (const auto l : c10::irange(self.size(0))) {
for(int64_t l = 0; l < self.size(0); l++) {
Tensor row = self.select(0,l);
double *row_ptr = row.data_ptr<double>();
for(int64_t c = firstColumn; c < lastColumn+1; c++) {
@ -200,7 +198,8 @@ void __printTensor(std::ostream& stream, Tensor& self, int64_t linesize)
bool start = true;
bool finished = false;
counter[0] = -1;
for (const auto i : c10::irange(1, counter.size()))counter[i] = 0;
for(size_t i = 1; i < counter.size(); i++)
counter[i] = 0;
while(true) {
for(int64_t i = 0; self.ndimension()-2; i++) {
counter[i] = counter[i] + 1;
@ -270,7 +269,7 @@ std::ostream& print(std::ostream& stream, const Tensor & tensor_, int64_t linesi
printScale(stream, scale);
}
double* tensor_p = tensor.data_ptr<double>();
for (const auto i : c10::irange(tensor.size(0))) {
for (int64_t i = 0; i < tensor.size(0); i++) {
stream << std::setw(sz) << tensor_p[i]/scale << std::endl;
}
}
@ -285,7 +284,7 @@ std::ostream& print(std::ostream& stream, const Tensor & tensor_, int64_t linesi
__printTensor(stream, tensor, linesize);
}
stream << "[ " << tensor_.toString() << "{" << tensor.size(0);
for (const auto i : c10::irange(1, tensor.ndimension())) {
for(int64_t i = 1; i < tensor.ndimension(); i++) {
stream << "," << tensor.size(i);
}
stream << "}";

View File

@ -155,7 +155,7 @@ private:
data_.seed_ = seed;
data_.seeded_ = true;
data_.state_[0] = seed & 0xffffffff;
for (const auto j : c10::irange(1, MERSENNE_STATE_N)) {
for(int j = 1; j < MERSENNE_STATE_N; j++) {
data_.state_[j] = (1812433253 * (data_.state_[j-1] ^ (data_.state_[j-1] >> 30)) + j);
}
data_.left_ = 1;

View File

@ -3,7 +3,6 @@
#include <c10/macros/Macros.h>
#include <c10/util/Deprecated.h>
#include <c10/util/Exception.h>
#include <c10/util/irange.h>
#include <stdint.h>
#include <cstddef>
@ -135,7 +134,7 @@ public:
const source_index_t* sizes_,
const source_index_t* strides_)
: data_(data_) {
for (const auto i : c10::irange(N)) {
for (int i = 0; i < N; i++) {
this->sizes_[i] = sizes_[i];
this->strides_[i] = strides_[i];
}

View File

@ -7,7 +7,6 @@
#include <ATen/core/dispatch/Dispatcher.h>
#include <ATen/core/ivalue.h>
#include <c10/core/CPUAllocator.h>
#include <c10/util/irange.h>
template<class... Inputs>
inline std::vector<c10::IValue> makeStack(Inputs&&... inputs) {
@ -88,7 +87,7 @@ inline void expectThrows(Functor&& functor, const char* expectMessageContains) {
template<class T, size_t N>
void expectListEquals(c10::ArrayRef<T> expected, std::array<T, N> actual) {
EXPECT_EQ(expected.size(), actual.size());
for (const auto i : c10::irange(expected.size())) {
for (size_t i = 0; i < expected.size(); ++i) {
EXPECT_EQ(expected[i], actual[i]);
}
}
@ -96,7 +95,7 @@ void expectListEquals(c10::ArrayRef<T> expected, std::array<T, N> actual) {
template<class T>
void expectListEquals(c10::ArrayRef<T> expected, c10::ArrayRef<T> actual) {
EXPECT_EQ(expected.size(), actual.size());
for (const auto i : c10::irange(expected.size())) {
for (size_t i = 0; i < expected.size(); ++i) {
EXPECT_EQ(expected[i], actual[i]);
}
}
@ -104,7 +103,7 @@ void expectListEquals(c10::ArrayRef<T> expected, c10::ArrayRef<T> actual) {
template<class T>
void expectListEquals(c10::ArrayRef<T> expected, c10::List<T> actual) {
EXPECT_EQ(expected.size(), actual.size());
for (const auto i : c10::irange(expected.size())) {
for (size_t i = 0; i < expected.size(); ++i) {
EXPECT_EQ(expected[i], actual.get(i));
}
}
@ -112,7 +111,7 @@ void expectListEquals(c10::ArrayRef<T> expected, c10::List<T> actual) {
template<class T>
void expectListEquals(c10::ArrayRef<T> expected, std::vector<T> actual) {
EXPECT_EQ(expected.size(), actual.size());
for (const auto i : c10::irange(expected.size())) {
for (size_t i = 0; i < expected.size(); ++i) {
EXPECT_EQ(expected[i], actual[i]);
}
}

View File

@ -5,7 +5,6 @@
#include <ATen/core/jit_type.h>
#include <c10/util/Bitset.h>
#include <c10/core/DispatchKeySet.h>
#include <c10/util/irange.h>
#include <ATen/core/Variadic.h>
#include <ATen/core/stack.h>
@ -172,7 +171,7 @@ private:
"The function schema has ", schema.arguments().size(),
" arguments but this PyTorch build only supports ", c10::utils::bitset::NUM_BITS());
c10::utils::bitset dispatch_arg_indices_reverse;
for (const auto index : c10::irange(schema.arguments().size())) {
for (size_t index = 0; index < schema.arguments().size(); ++index) {
if (schema.arguments()[index].type()->isSubtypeOf(*TensorType::get()) ||
schema.arguments()[index].type()->isSubtypeOf(
*ListType::ofTensors()) ||

View File

@ -5,7 +5,6 @@
#include <ATen/Functions.h>
#include <ATen/core/dispatch/Dispatcher.h>
#include <ATen/core/op_registration/op_registration.h>
#include <c10/util/irange.h>
#include <torch/library.h>
using namespace at;
@ -52,7 +51,7 @@ void generic_wrapper_fallback(const c10::OperatorHandle& op, torch::jit::Stack*
// Unwrap all arguments
auto args = torch::jit::pop(*stack, num_arguments);
for (const auto i : c10::irange(num_arguments)) {
for (size_t i = 0; i < num_arguments; i++) {
// TODO: Handle tensor list
if (args[i].isTensor()) {
auto* impl = args[i].unsafeToTensorImpl();
@ -71,7 +70,7 @@ void generic_wrapper_fallback(const c10::OperatorHandle& op, torch::jit::Stack*
// Rewrap outputs
auto rets = torch::jit::pop(*stack, num_returns);
for (const auto i : c10::irange(num_returns)) {
for (size_t i = 0; i < num_returns; i++) {
// TODO: Handle tensor list
if (rets[i].isTensor()) {
torch::jit::push(*stack, at::detail::make_tensor<GenericWrapperTensorImpl>(std::move(rets[i]).toTensor())); // yes move!

View File

@ -2,7 +2,6 @@
#include <c10/util/StringUtil.h>
#include <c10/util/string_view.h>
#include <c10/util/irange.h>
#include <ATen/core/jit_type.h>
#include <ATen/core/interned_strings.h>
#include <ATen/core/ivalue.h>

View File

@ -16,7 +16,7 @@ inline std::ostream& operator<<(std::ostream& out, const FunctionSchema& schema)
out << "(";
bool seen_kwarg_only = false;
for (const auto i : c10::irange(schema.arguments().size())) {
for(size_t i = 0; i < schema.arguments().size(); ++i) {
if (i > 0) out << ", ";
if (schema.arguments()[i].kwarg_only() && !seen_kwarg_only) {
out << "*, ";
@ -35,7 +35,7 @@ inline std::ostream& operator<<(std::ostream& out, const FunctionSchema& schema)
const auto& returns = schema.returns();
out << "(";
for (const auto i : c10::irange(returns.size())) {
for(size_t i = 0; i < returns.size(); ++i) {
if (i > 0) {
out << ", ";
}
@ -53,7 +53,7 @@ inline std::ostream& operator<<(std::ostream& out, const FunctionSchema& schema)
inline size_t findFirstOutArg(const std::vector<Argument>& args) {
// find the start of out args in the schema
for (const auto out_start_idx : c10::irange(args.size())) {
for (size_t out_start_idx = 0; out_start_idx < args.size(); out_start_idx++) {
if (args.at(out_start_idx).is_out()) {
return out_start_idx;
}
@ -122,7 +122,7 @@ inline bool FunctionSchema::isBackwardCompatibleWith(
&& arguments().size() >= old.arguments().size())) {
return false;
}
for (const auto i : c10::irange(returns().size())) {
for (size_t i = 0; i < returns().size(); ++i) {
// Backwards compatibility requires covariance on argument types
// (i.e. more generic), and contravariance on return types (i.e.
// more specific).
@ -138,7 +138,7 @@ inline bool FunctionSchema::isBackwardCompatibleWith(
size_t new_out_start_idx = findFirstOutArg(arguments());
// make sure among the default args, they are backward compatible
for (const auto i : c10::irange(old_out_start_idx)) {
for (size_t i = 0; i < old_out_start_idx; i++) {
if (!arguments().at(i).isBackwardCompatibleWith(
old.arguments().at(i), why_not)) {
return false;
@ -146,7 +146,7 @@ inline bool FunctionSchema::isBackwardCompatibleWith(
}
// // Validate that all new arguments provided has a default value
for (const auto i : c10::irange(old_out_start_idx, new_out_start_idx)) {
for (size_t i = old_out_start_idx; i < new_out_start_idx; ++i) {
if (!arguments().at(i).default_value()) {
if (why_not) {
*why_not
@ -160,7 +160,7 @@ inline bool FunctionSchema::isBackwardCompatibleWith(
}
// now compare the out args
for (const auto i : c10::irange(old_out_start_idx, old.arguments().size())) {
for (size_t i = old_out_start_idx; i < old.arguments().size(); i++) {
if (!arguments()
.at(i - old_out_start_idx + new_out_start_idx)
.isBackwardCompatibleWith(old.arguments().at(i), why_not)) {
@ -238,7 +238,7 @@ inline void FunctionSchema::checkAndNormalizeInputs(
*this);
size_t consumed_kwargs = 0;
for (const auto pos : c10::irange(arguments().size())) {
for (size_t pos = 0; pos < arguments().size(); ++pos) {
const auto& argument = arguments()[pos];
if (pos < inputs.size()) {
checkArg(inputs[pos], argument, pos);
@ -298,7 +298,7 @@ inline bool isSubtypeOfList(
if (child.size() != parent.size()) {
return false;
}
for (const auto i : c10::irange(child.size())) {
for (size_t i = 0; i < child.size(); ++i) {
const Argument& c = child[i];
const Argument& p = parent[i];
if (c.name() != p.name()) {

View File

@ -22,7 +22,6 @@
#include <c10/util/intrusive_ptr.h>
#include <c10/util/irange.h>
#include <c10/util/hash.h>
#include <c10/util/irange.h>
namespace torch {
namespace jit {
@ -1115,7 +1114,7 @@ struct C10_EXPORT ivalue::Future final : c10::intrusive_ptr_target {
}
std::ostringstream oss;
oss << devices[0];
for (const auto idx : c10::irange(1, devices.size())) {
for (size_t idx = 1; idx < devices.size(); idx++) {
if (idx == devices.size() - 1) {
oss << " and ";
} else {
@ -1132,7 +1131,7 @@ struct C10_EXPORT ivalue::Future final : c10::intrusive_ptr_target {
return c10::kCPU;
}
c10::DeviceType deviceType = devices[0].type();
for (const auto idx : c10::irange(1, devices.size())) {
for (size_t idx = 1; idx < devices.size(); idx++) {
TORCH_CHECK_VALUE(
devices[idx].type() == deviceType,
"Expected all devices to be of the same type, but got a mismatch between ",
@ -1152,7 +1151,7 @@ struct C10_EXPORT ivalue::Future final : c10::intrusive_ptr_target {
[](const c10::Device& a, const c10::Device& b) { return a.index() < b.index(); });
// Deduplicate by compacting.
size_t targetIdx = 0;
for (const auto sourceIdx : c10::irange(devices.size())) {
for (size_t sourceIdx = 0; sourceIdx < devices.size(); sourceIdx++) {
TORCH_CHECK_VALUE(
devices[sourceIdx].has_index(),
"Expected devices to have indices, got ", devices[sourceIdx]);

View File

@ -1,5 +1,4 @@
#include <ATen/core/op_registration/infer_schema.h>
#include <c10/util/irange.h>
#include <sstream>
namespace c10 {
@ -21,7 +20,7 @@ std::string fastToString(size_t x) {
std::vector<Argument> createArgumentVector(c10::ArrayRef<ArgumentDef> args) {
std::vector<Argument> result;
result.reserve(args.size());
for (const auto i : c10::irange(args.size())) {
for (size_t i = 0; i < args.size(); ++i) {
// Arguments are named "_<index>"
result.emplace_back(fastToString(i), (*args[i].getTypeFn)());
}
@ -50,7 +49,7 @@ C10_EXPORT c10::optional<std::string> findSchemaDifferences(const FunctionSchema
" vs " + guts::to_string(rhs.returns().size());
}
for (const auto i : c10::irange(lhs.arguments().size())) {
for (size_t i = 0; i < lhs.arguments().size(); ++i) {
const TypePtr& leftType = lhs.arguments()[i].type();
const TypePtr& rightType = rhs.arguments()[i].type();
// Type::operator== is virtual. Comparing pointers first is
@ -62,7 +61,7 @@ C10_EXPORT c10::optional<std::string> findSchemaDifferences(const FunctionSchema
}
}
for (const auto i : c10::irange(lhs.returns().size())) {
for (size_t i = 0; i < lhs.returns().size(); ++i) {
const TypePtr& leftType = lhs.returns()[i].type();
const TypePtr& rightType = rhs.returns()[i].type();
// See above about comparing pointers first.

View File

@ -3,7 +3,6 @@
#include <c10/util/ArrayRef.h>
#include <c10/util/Exception.h>
#include <c10/util/StringUtil.h>
#include <c10/util/irange.h>
#include <string>
namespace c10 {
@ -70,7 +69,7 @@ struct QualifiedName {
// Can't be a prefix if it's bigger
return false;
}
for (const auto i : c10::irange(thisAtoms.size())) {
for (size_t i = 0; i < thisAtoms.size(); i++) {
if (thisAtoms[i] != otherAtoms[i]) {
return false;
}
@ -117,7 +116,7 @@ struct QualifiedName {
reserve += e.size() + 1;
}
out.reserve(reserve);
for (const auto i : c10::irange(v.size())) {
for (size_t i = 0; i < v.size(); ++i) {
if (i != 0) {
out.push_back(delimiter);
}

View File

@ -4,7 +4,6 @@
#include <ATen/core/ivalue.h>
#include <c10/util/Deprecated.h>
#include <c10/util/irange.h>
// TODO move this to c10 namespace
@ -109,7 +108,7 @@ static inline IValue pop(Stack* stack) {
static inline std::vector<IValue> pop(Stack& stack, size_t n) {
std::vector<IValue> result;
result.reserve(n);
for (const auto i : c10::irange(n)) {
for (size_t i = 0; i < n; ++i) {
result.push_back(std::move(peek(stack, i, n)));
}
drop(stack, n);

View File

@ -4,7 +4,6 @@
// See Note [Do not compile initializers with AVX]
#include <ATen/cpu/vec/vec.h>
#include <c10/util/irange.h>
namespace at { namespace vec {
@ -17,7 +16,7 @@ inline scalar_t vec_reduce_all(
using Vec = vec::Vectorized<scalar_t>;
scalar_t acc_arr[Vec::size()];
acc_vec.store(acc_arr);
for (const auto i : c10::irange(1, size)) {
for (int64_t i = 1; i < size; i++) {
std::array<scalar_t, Vec::size()> acc_arr_next = {0};
acc_arr_next[0] = acc_arr[i];
Vec acc_vec_next = Vec::loadu(acc_arr_next.data());

View File

@ -4,7 +4,6 @@
// See Note [Do not compile initializers with AVX]
#include <c10/util/complex.h>
#include <c10/util/irange.h>
#include <ATen/cpu/vec/intrinsics.h>
#include <ATen/cpu/vec/vec_base.h>
@ -110,7 +109,7 @@ public:
Vectorized<c10::complex<double>> map(c10::complex<double> (*const f)(const c10::complex<double> &)) const {
__at_align__ c10::complex<double> tmp[size()];
store(tmp);
for (const auto i : c10::irange(size())) {
for (int i = 0; i < size(); i++) {
tmp[i] = f(tmp[i]);
}
return loadu(tmp);
@ -294,7 +293,7 @@ public:
__at_align__ c10::complex<double> y_tmp[size()];
store(x_tmp);
exp.store(y_tmp);
for (const auto i : c10::irange(size())) {
for (int i = 0; i < size(); i++) {
x_tmp[i] = std::pow(x_tmp[i], y_tmp[i]);
}
return loadu(x_tmp);

View File

@ -4,7 +4,6 @@
// See Note [Do not compile initializers with AVX]
#include <c10/util/complex.h>
#include <c10/util/irange.h>
#include <ATen/cpu/vec/intrinsics.h>
#include <ATen/cpu/vec/vec_base.h>
#if defined(CPU_CAPABILITY_AVX2) && !defined(_MSC_VER)
@ -145,7 +144,7 @@ public:
Vectorized<c10::complex<float>> map(c10::complex<float> (*const f)(const c10::complex<float> &)) const {
__at_align__ c10::complex<float> tmp[size()];
store(tmp);
for (const auto i : c10::irange(size())) {
for (int i = 0; i < size(); i++) {
tmp[i] = f(tmp[i]);
}
return loadu(tmp);
@ -328,7 +327,7 @@ public:
__at_align__ c10::complex<float> y_tmp[size()];
store(x_tmp);
exp.store(y_tmp);
for (const auto i : c10::irange(size())) {
for (int i = 0; i < size(); i++) {
x_tmp[i] = std::pow(x_tmp[i], y_tmp[i]);
}
return loadu(x_tmp);

View File

@ -5,7 +5,6 @@
#include <ATen/cpu/vec/intrinsics.h>
#include <ATen/cpu/vec/vec_base.h>
#include <c10/util/irange.h>
#if defined(CPU_CAPABILITY_AVX2) && !defined(_MSC_VER)
#include <sleef.h>
#endif
@ -73,7 +72,7 @@ public:
// Ensure uninitialized memory does not change the output value See https://github.com/pytorch/pytorch/issues/32502
// for more details. We do not initialize arrays to zero using "={0}" because gcc would compile it to two
// instructions while a loop would be compiled to one instruction.
for (const auto i : c10::irange(size())) {
for (auto i = 0; i < size(); ++i) {
tmp_values[i] = 0.0;
}
std::memcpy(
@ -104,7 +103,7 @@ public:
Vectorized<double> map(double (*const f)(double)) const {
__at_align__ double tmp[size()];
store(tmp);
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
tmp[i] = f(tmp[i]);
}
return loadu(tmp);
@ -181,7 +180,7 @@ public:
__at_align__ double tmp_x[size()];
store(tmp);
x.store(tmp_x);
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
tmp[i] = calc_igamma(tmp[i], tmp_x[i]);
}
return loadu(tmp);
@ -191,7 +190,7 @@ public:
__at_align__ double tmp_x[size()];
store(tmp);
x.store(tmp_x);
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
tmp[i] = calc_igammac(tmp[i], tmp_x[i]);
}
return loadu(tmp);

View File

@ -5,7 +5,6 @@
#include <ATen/cpu/vec/intrinsics.h>
#include <ATen/cpu/vec/vec_base.h>
#include <c10/util/irange.h>
#if defined(CPU_CAPABILITY_AVX2) && !defined(_MSC_VER)
#include <sleef.h>
#endif
@ -81,7 +80,7 @@ public:
// Ensure uninitialized memory does not change the output value See https://github.com/pytorch/pytorch/issues/32502
// for more details. We do not initialize arrays to zero using "={0}" because gcc would compile it to two
// instructions while a loop would be compiled to one instruction.
for (const auto i : c10::irange(size())) {
for (auto i = 0; i < size(); ++i) {
tmp_values[i] = 0.0;
}
std::memcpy(
@ -110,7 +109,7 @@ public:
Vectorized<float> map(float (*const f)(float)) const {
__at_align__ float tmp[size()];
store(tmp);
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
tmp[i] = f(tmp[i]);
}
return loadu(tmp);
@ -218,7 +217,7 @@ public:
__at_align__ float tmp_x[size()];
store(tmp);
x.store(tmp_x);
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
tmp[i] = calc_igamma(tmp[i], tmp_x[i]);
}
return loadu(tmp);
@ -228,7 +227,7 @@ public:
__at_align__ float tmp_x[size()];
store(tmp);
x.store(tmp_x);
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
tmp[i] = calc_igammac(tmp[i], tmp_x[i]);
}
return loadu(tmp);

View File

@ -5,7 +5,6 @@
#include <ATen/cpu/vec/intrinsics.h>
#include <ATen/cpu/vec/vec_base.h>
#include <c10/util/irange.h>
// Sleef offers vectorized versions of some transcedentals
// such as sin, cos, tan etc..
// However for now opting for STL, since we are not building
@ -222,7 +221,7 @@ public:
}
else {
__at_align__ float tmp_values[size()];
for (const auto i : c10::irange(size())) {
for (auto i = 0; i < size(); ++i) {
tmp_values[i] = 0.0;
}
std::memcpy(
@ -288,7 +287,7 @@ public:
__at_align__ float tmp[size()];
__at_align__ float res[size()];
store(tmp);
for (const auto i : c10::irange(size())) {
for (int i = 0; i < size(); i++) {
if (_isnan(tmp[i])) {
std::memset(static_cast<void*>(&res[i]), 0xFF, sizeof(float));
} else {
@ -300,7 +299,7 @@ public:
Vectorized<float> map(float (*const f)(float)) const {
__at_align__ float tmp[size()];
store(tmp);
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
tmp[i] = f(tmp[i]);
}
return loadu(tmp);
@ -337,7 +336,7 @@ public:
__at_align__ float tmp_exp[size()];
store(tmp);
exp.store(tmp_exp);
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
tmp[i] = std::atan2(tmp[i], tmp_exp[i]);
}
return loadu(tmp);
@ -372,7 +371,7 @@ public:
__at_align__ float tmp_q[size()];
store(tmp);
q.store(tmp_q);
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
tmp[i] = std::fmod(tmp[i], tmp_q[i]);
}
return loadu(tmp);
@ -382,7 +381,7 @@ public:
__at_align__ float tmp_b[size()];
store(tmp);
b.store(tmp_b);
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
tmp[i] = std::hypot(tmp[i], tmp_b[i]);
}
return loadu(tmp);
@ -398,7 +397,7 @@ public:
__at_align__ float tmp_x[size()];
store(tmp);
x.store(tmp_x);
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
tmp[i] = calc_igamma(tmp[i], tmp_x[i]);
}
return loadu(tmp);
@ -408,7 +407,7 @@ public:
__at_align__ float tmp_x[size()];
store(tmp);
x.store(tmp_x);
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
tmp[i] = calc_igammac(tmp[i], tmp_x[i]);
}
return loadu(tmp);
@ -430,7 +429,7 @@ public:
__at_align__ float tmp_b[size()];
store(tmp);
b.store(tmp_b);
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
tmp[i] = std::nextafter(tmp[i], tmp_b[i]);
}
return loadu(tmp);
@ -495,7 +494,7 @@ public:
__at_align__ float tmp_exp[size()];
store(tmp);
exp.store(tmp_exp);
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
tmp[i] = std::pow(tmp[i], tmp_exp[i]);
}
return loadu(tmp);

View File

@ -6,7 +6,6 @@
#include <ATen/cpu/vec/intrinsics.h>
#include <ATen/cpu/vec/vec_base.h>
#include <c10/macros/Macros.h>
#include <c10/util/irange.h>
#include <iostream>
namespace at {
@ -99,7 +98,7 @@ public:
// Ensure uninitialized memory does not change the output value See https://github.com/pytorch/pytorch/issues/32502
// for more details. We do not initialize arrays to zero using "={0}" because gcc would compile it to two
// instructions while a loop would be compiled to one instruction.
for (const auto i : c10::irange(size())) {
for (auto i = 0; i < size(); ++i) {
tmp_values[i] = 0;
}
std::memcpy(tmp_values, ptr, count * sizeof(int64_t));
@ -222,7 +221,7 @@ public:
// Ensure uninitialized memory does not change the output value See https://github.com/pytorch/pytorch/issues/32502
// for more details. We do not initialize arrays to zero using "={0}" because gcc would compile it to two
// instructions while a loop would be compiled to one instruction.
for (const auto i : c10::irange(size())) {
for (auto i = 0; i < size(); ++i) {
tmp_values[i] = 0;
}
std::memcpy(tmp_values, ptr, count * sizeof(int32_t));
@ -436,7 +435,7 @@ public:
// Ensure uninitialized memory does not change the output value See https://github.com/pytorch/pytorch/issues/32502
// for more details. We do not initialize arrays to zero using "={0}" because gcc would compile it to two
// instructions while a loop would be compiled to one instruction.
for (const auto i : c10::irange(size())) {
for (auto i = 0; i < size(); ++i) {
tmp_values[i] = 0;
}
std::memcpy(tmp_values, ptr, count * sizeof(int16_t));
@ -685,7 +684,7 @@ public:
// Ensure uninitialized memory does not change the output value See https://github.com/pytorch/pytorch/issues/32502
// for more details. We do not initialize arrays to zero using "={0}" because gcc would compile it to two
// instructions while a loop would be compiled to one instruction.
for (const auto i : c10::irange(size())) {
for (size_t i = 0; i < size(); ++i) {
tmp_values[i] = 0;
}
std::memcpy(tmp_values, ptr, count * sizeof(int8_t));

View File

@ -6,8 +6,6 @@
#include <ATen/cpu/vec/intrinsics.h>
#include <ATen/cpu/vec/vec_base.h>
#include <ATen/native/quantized/affine_quantizer_base.h>
#include <c10/util/irange.h>
#include <c10/util/qint32.h>
#include <c10/util/qint8.h>
#include <c10/util/quint8.h>
@ -741,7 +739,7 @@ struct VectorizedQuantizedConverter {
std::array<value_type, size_> vals;
VectorizedQuantizedConverter(T val) {
for (const auto i : c10::irange(size())) {
for (size_t i = 0; i < size(); ++i) {
vals[i] = val.val_;
}
}
@ -759,9 +757,9 @@ struct VectorizedQuantizedConverter {
Vectorized<float> zero_point,
Vectorized<float> scale_zp_premul) const {
float_vec_return_type rv;
for (const auto i : c10::irange(float_num_vecs())) {
for (int i = 0; i < float_num_vecs(); ++i) {
float tmp_vals[8];
for (const auto j : c10::irange(8)) {
for (int j = 0; j < 8; ++j) {
tmp_vals[j] = at::native::dequantize_val<T>(
scale[j], zero_point[j], T(vals[8 * i + j]));
}
@ -818,7 +816,7 @@ struct Vectorized<c10::qint32> : public VectorizedQuantizedConverter<
std::array<value_type, size()> qvals;
std::array<float, float_num_vecs() * 8> float_vals;
for (const auto i : c10::irange(float_num_vecs())) {
for (int i = 0; i < float_num_vecs(); ++i) {
rhs[i].store(&float_vals[i * 8], 8);
}
@ -834,7 +832,7 @@ struct Vectorized<c10::qint32> : public VectorizedQuantizedConverter<
Vectorized<c10::qint32> maximum(Vectorized<c10::qint32> b) const {
Vectorized<c10::qint32> retval;
for (const auto i : c10::irange(size())) {
for (size_t i = 0; i < size(); ++i) {
retval.vals[i] = std::max<value_type>(vals[i], b.vals[i]);
}
return retval;
@ -842,7 +840,7 @@ struct Vectorized<c10::qint32> : public VectorizedQuantizedConverter<
Vectorized<c10::qint32> minimum(Vectorized<c10::qint32> b) const {
Vectorized<c10::qint32> retval;
for (const auto i : c10::irange(size())) {
for (size_t i = 0; i < size(); ++i) {
retval.vals[i] = std::min<value_type>(vals[i], b.vals[i]);
}
return retval;
@ -857,7 +855,7 @@ struct Vectorized<c10::qint32> : public VectorizedQuantizedConverter<
Vectorized<c10::qint32> zero_point,
Vectorized<c10::qint32> q_six) {
Vectorized<c10::qint32> retval;
for (const auto i : c10::irange(size())) {
for (size_t i = 0; i < size(); ++i) {
retval.vals[i] = std::min<value_type>(
std::max<value_type>(vals[i], zero_point.vals[i]), q_six.vals[i]);
}
@ -866,7 +864,7 @@ struct Vectorized<c10::qint32> : public VectorizedQuantizedConverter<
int_vec_return_type widening_subtract(Vectorized<c10::qint32> b) const {
int_vec_return_type retval;
for (const auto i : c10::irange(size())) {
for (size_t i = 0; i < size(); ++i) {
retval[0].vals[i] = vals[i] - b.vals[i];
}
return retval;
@ -877,7 +875,7 @@ struct Vectorized<c10::qint32> : public VectorizedQuantizedConverter<
float multiplier,
int32_t zero_point) {
Vectorized<c10::qint32> retval;
for (const auto i : c10::irange(size())) {
for (size_t i = 0; i < size(); ++i) {
retval.vals[i] =
nearbyint(static_cast<float>(inp[0].vals[i]) * multiplier) +
zero_point;
@ -950,7 +948,7 @@ struct Vectorized<c10::qint8> : public VectorizedQuantizedConverter<
std::array<value_type, size()> qvals;
std::array<float, float_num_vecs() * 8> float_vals;
for (const auto i : c10::irange(float_num_vecs())) {
for (int i = 0; i < float_num_vecs(); ++i) {
rhs[i].store(&float_vals[i * 8], 8);
}
@ -966,7 +964,7 @@ struct Vectorized<c10::qint8> : public VectorizedQuantizedConverter<
Vectorized<c10::qint8> maximum(Vectorized<c10::qint8> b) const {
Vectorized<c10::qint8> retval;
for (const auto i : c10::irange(size())) {
for (size_t i = 0; i < size(); ++i) {
retval.vals[i] = std::max<value_type>(vals[i], b.vals[i]);
}
return retval;
@ -974,7 +972,7 @@ struct Vectorized<c10::qint8> : public VectorizedQuantizedConverter<
Vectorized<c10::qint8> minimum(Vectorized<c10::qint8> b) const {
Vectorized<c10::qint8> retval;
for (const auto i : c10::irange(size())) {
for (size_t i = 0; i < size(); ++i) {
retval.vals[i] = std::min<value_type>(vals[i], b.vals[i]);
}
return retval;
@ -988,7 +986,7 @@ struct Vectorized<c10::qint8> : public VectorizedQuantizedConverter<
Vectorized<c10::qint8> zero_point,
Vectorized<c10::qint8> q_six) {
Vectorized<c10::qint8> retval;
for (const auto i : c10::irange(size())) {
for (size_t i = 0; i < size(); ++i) {
retval.vals[i] = std::min<value_type>(
std::max<value_type>(vals[i], zero_point.vals[i]), q_six.vals[i]);
}
@ -998,8 +996,8 @@ struct Vectorized<c10::qint8> : public VectorizedQuantizedConverter<
int_vec_return_type widening_subtract(Vectorized<c10::qint8> b) const {
int_vec_return_type retval;
constexpr int elem_per_int_vec = size() / int_num_vecs();
for (const auto i : c10::irange(int_num_vecs())) {
for (const auto j : c10::irange(elem_per_int_vec)) {
for (size_t i = 0; i < int_num_vecs(); ++i) {
for (size_t j = 0; j < elem_per_int_vec; ++j) {
retval[i].vals[j] =
static_cast<int32_t>(vals[i * elem_per_int_vec + j]) -
static_cast<int32_t>(b.vals[i * elem_per_int_vec + j]);
@ -1015,8 +1013,8 @@ struct Vectorized<c10::qint8> : public VectorizedQuantizedConverter<
constexpr auto min_val = std::numeric_limits<value_type>::min();
constexpr auto max_val = std::numeric_limits<value_type>::max();
Vectorized<c10::qint8> retval;
for (const auto i : c10::irange(int_num_vecs())) {
for (const auto j : c10::irange(elem_per_int_vec)) {
for (size_t i = 0; i < int_num_vecs(); ++i) {
for (size_t j = 0; j < elem_per_int_vec; ++j) {
int32_t rounded =
nearbyint(static_cast<float>(inp[i].vals[j]) * multiplier) +
zero_point;
@ -1070,7 +1068,7 @@ struct Vectorized<c10::quint8> : public VectorizedQuantizedConverter<
std::array<value_type, size()> qvals;
std::array<float, float_num_vecs() * 8> float_vals;
for (const auto i : c10::irange(float_num_vecs())) {
for (int i = 0; i < float_num_vecs(); ++i) {
rhs[i].store(&float_vals[i * 8], 8);
}
@ -1086,7 +1084,7 @@ struct Vectorized<c10::quint8> : public VectorizedQuantizedConverter<
Vectorized<c10::quint8> maximum(Vectorized<c10::quint8> b) const {
Vectorized<c10::quint8> retval;
for (const auto i : c10::irange(size())) {
for (size_t i = 0; i < size(); ++i) {
retval.vals[i] = std::max<value_type>(vals[i], b.vals[i]);
}
return retval;
@ -1094,7 +1092,7 @@ struct Vectorized<c10::quint8> : public VectorizedQuantizedConverter<
Vectorized<c10::quint8> minimum(Vectorized<c10::quint8> b) const {
Vectorized<c10::quint8> retval;
for (const auto i : c10::irange(size())) {
for (size_t i = 0; i < size(); ++i) {
retval.vals[i] = std::min<value_type>(vals[i], b.vals[i]);
}
return retval;
@ -1109,7 +1107,7 @@ struct Vectorized<c10::quint8> : public VectorizedQuantizedConverter<
Vectorized<c10::quint8> zero_point,
Vectorized<c10::quint8> q_six) {
Vectorized<c10::quint8> retval;
for (const auto i : c10::irange(size())) {
for (size_t i = 0; i < size(); ++i) {
retval.vals[i] = std::min<value_type>(
std::max<value_type>(vals[i], zero_point.vals[i]), q_six.vals[i]);
}
@ -1119,8 +1117,8 @@ struct Vectorized<c10::quint8> : public VectorizedQuantizedConverter<
int_vec_return_type widening_subtract(Vectorized<c10::quint8> b) const {
int_vec_return_type retval;
constexpr int elem_per_int_vec = size() / int_num_vecs();
for (const auto i : c10::irange(int_num_vecs())) {
for (const auto j : c10::irange(elem_per_int_vec)) {
for (size_t i = 0; i < int_num_vecs(); ++i) {
for (size_t j = 0; j < elem_per_int_vec; ++j) {
retval[i].vals[j] =
static_cast<int32_t>(vals[i * elem_per_int_vec + j]) -
static_cast<int32_t>(b.vals[i * elem_per_int_vec + j]);
@ -1136,8 +1134,8 @@ struct Vectorized<c10::quint8> : public VectorizedQuantizedConverter<
constexpr auto min_val = std::numeric_limits<value_type>::min();
constexpr auto max_val = std::numeric_limits<value_type>::max();
Vectorized<c10::quint8> retval;
for (const auto i : c10::irange(int_num_vecs())) {
for (const auto j : c10::irange(elem_per_int_vec)) {
for (size_t i = 0; i < int_num_vecs(); ++i) {
for (size_t j = 0; j < elem_per_int_vec; ++j) {
int32_t rounded =
nearbyint(static_cast<float>(inp[i].vals[j]) * multiplier) +
zero_point;

View File

@ -3,7 +3,6 @@
#include <ATen/cpu/vec/vec_base.h>
#include <ATen/cpu/vec/vec256/vsx/vsx_helpers.h>
#include <c10/util/complex.h>
#include <c10/util/irange.h>
namespace at {
namespace vec {
@ -168,7 +167,7 @@ class Vectorized<ComplexDbl> {
Vectorized<ComplexDbl> map(ComplexDbl (*const f)(ComplexDbl)) const {
__at_align__ ComplexDbl tmp[size()];
store(tmp);
for (const auto i : c10::irange(size())) {
for (int i = 0; i < size(); i++) {
tmp[i] = f(tmp[i]);
}
return loadu(tmp);
@ -177,7 +176,7 @@ class Vectorized<ComplexDbl> {
Vectorized<ComplexDbl> map(ComplexDbl (*const f)(const ComplexDbl&)) const {
__at_align__ ComplexDbl tmp[size()];
store(tmp);
for (const auto i : c10::irange(size())) {
for (int i = 0; i < size(); i++) {
tmp[i] = f(tmp[i]);
}
return loadu(tmp);
@ -455,7 +454,7 @@ class Vectorized<ComplexDbl> {
__at_align__ ComplexDbl y_tmp[size()];
store(x_tmp);
exp.store(y_tmp);
for (const auto i : c10::irange(size())) {
for (int i = 0; i < size(); i++) {
x_tmp[i] = std::pow(x_tmp[i], y_tmp[i]);
}
return loadu(x_tmp);

View File

@ -4,7 +4,6 @@
#include <ATen/cpu/vec/vec_base.h>
#include <ATen/cpu/vec/vec256/vsx/vsx_helpers.h>
#include <c10/util/complex.h>
#include <c10/util/irange.h>
namespace at {
namespace vec {
@ -223,7 +222,7 @@ class Vectorized<ComplexFlt> {
Vectorized<ComplexFlt> map(ComplexFlt (*const f)(ComplexFlt)) const {
__at_align__ ComplexFlt tmp[size()];
store(tmp);
for (const auto i : c10::irange(size())) {
for (int i = 0; i < size(); i++) {
tmp[i] = f(tmp[i]);
}
return loadu(tmp);
@ -232,7 +231,7 @@ class Vectorized<ComplexFlt> {
Vectorized<ComplexFlt> map(ComplexFlt (*const f)(const ComplexFlt&)) const {
__at_align__ ComplexFlt tmp[size()];
store(tmp);
for (const auto i : c10::irange(size())) {
for (int i = 0; i < size(); i++) {
tmp[i] = f(tmp[i]);
}
return loadu(tmp);
@ -431,7 +430,7 @@ class Vectorized<ComplexFlt> {
__at_align__ ComplexFlt y_tmp[size()];
store(x_tmp);
exp.store(y_tmp);
for (const auto i : c10::irange(size())) {
for (int i = 0; i < size(); i++) {
x_tmp[i] = std::pow(x_tmp[i], y_tmp[i]);
}
return loadu(x_tmp);

View File

@ -3,8 +3,6 @@
#include <ATen/cpu/vec/intrinsics.h>
#include <ATen/cpu/vec/vec_base.h>
#include <ATen/cpu/vec/vec256/vsx/vsx_helpers.h>
#include <c10/util/irange.h>
#include <c10/util/quint8.h>
#include <array>

View File

@ -4,7 +4,6 @@
// See Note [Do not compile initializers with AVX]
#include <c10/util/complex.h>
#include <c10/util/irange.h>
#include <ATen/cpu/vec/intrinsics.h>
#include <ATen/cpu/vec/vec_base.h>
#if defined(CPU_CAPABILITY_AVX512) && !defined(_MSC_VER)
@ -150,7 +149,7 @@ public:
Vectorized<c10::complex<double>> map(c10::complex<double> (*const f)(const c10::complex<double> &)) const {
__at_align__ c10::complex<double> tmp[size()];
store(tmp);
for (const auto i : c10::irange(size())) {
for (int i = 0; i < size(); i++) {
tmp[i] = f(tmp[i]);
}
return loadu(tmp);
@ -358,7 +357,7 @@ public:
__at_align__ c10::complex<double> y_tmp[size()];
store(x_tmp);
exp.store(y_tmp);
for (const auto i : c10::irange(size())) {
for (int i = 0; i < size(); i++) {
x_tmp[i] = std::pow(x_tmp[i], y_tmp[i]);
}
return loadu(x_tmp);

View File

@ -4,7 +4,6 @@
// See Note [Do not compile initializers with AVX]
#include <c10/util/complex.h>
#include <c10/util/irange.h>
#include <ATen/cpu/vec/intrinsics.h>
#include <ATen/cpu/vec/vec_base.h>
#if defined(CPU_CAPABILITY_AVX512) && !defined(_MSC_VER)
@ -668,7 +667,7 @@ public:
Vectorized<c10::complex<float>> map(c10::complex<float> (*const f)(const c10::complex<float> &)) const {
__at_align__ c10::complex<float> tmp[size()];
store(tmp);
for (const auto i : c10::irange(size())) {
for (int i = 0; i < size(); i++) {
tmp[i] = f(tmp[i]);
}
return loadu(tmp);
@ -859,7 +858,7 @@ public:
__at_align__ c10::complex<float> y_tmp[size()];
store(x_tmp);
exp.store(y_tmp);
for (const auto i : c10::irange(size())) {
for (int i = 0; i < size(); i++) {
x_tmp[i] = std::pow(x_tmp[i], y_tmp[i]);
}
return loadu(x_tmp);

View File

@ -5,7 +5,6 @@
#include <ATen/cpu/vec/intrinsics.h>
#include <ATen/cpu/vec/vec_base.h>
#include <c10/util/irange.h>
#if (defined(CPU_CAPABILITY_AVX512)) && !defined(_MSC_VER)
#include <sleef.h>
#endif
@ -88,7 +87,7 @@ public:
// Ensure uninitialized memory does not change the output value See https://github.com/pytorch/pytorch/issues/32502
// for more details. We do not initialize arrays to zero using "={0}" because gcc would compile it to two
// instructions while a loop would be compiled to one instruction.
for (const auto i : c10::irange(size())) {
for (auto i = 0; i < size(); ++i) {
tmp_values[i] = 0.0;
}
std::memcpy(
@ -121,7 +120,7 @@ public:
Vectorized<double> map(double (*const f)(double)) const {
__at_align__ double tmp[size()];
store(tmp);
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
tmp[i] = f(tmp[i]);
}
return loadu(tmp);
@ -201,7 +200,7 @@ public:
__at_align__ double tmp_x[size()];
store(tmp);
x.store(tmp_x);
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
tmp[i] = calc_igamma(tmp[i], tmp_x[i]);
}
return loadu(tmp);
@ -211,7 +210,7 @@ public:
__at_align__ double tmp_x[size()];
store(tmp);
x.store(tmp_x);
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
tmp[i] = calc_igammac(tmp[i], tmp_x[i]);
}
return loadu(tmp);

View File

@ -5,7 +5,6 @@
#include <ATen/cpu/vec/intrinsics.h>
#include <ATen/cpu/vec/vec_base.h>
#include <c10/util/irange.h>
#if defined(CPU_CAPABILITY_AVX512) && !defined(_MSC_VER)
#include <sleef.h>
#endif
@ -105,7 +104,7 @@ public:
// Ensure uninitialized memory does not change the output value See https://github.com/pytorch/pytorch/issues/32502
// for more details. We do not initialize arrays to zero using "={0}" because gcc would compile it to two
// instructions while a loop would be compiled to one instruction.
for (const auto i : c10::irange(size())) {
for (auto i = 0; i < size(); ++i) {
tmp_values[i] = 0.0;
}
std::memcpy(
@ -136,7 +135,7 @@ public:
Vectorized<float> map(float (*const f)(float)) const {
__at_align__ float tmp[size()];
store(tmp);
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
tmp[i] = f(tmp[i]);
}
return loadu(tmp);
@ -247,7 +246,7 @@ public:
__at_align__ float tmp_x[size()];
store(tmp);
x.store(tmp_x);
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
tmp[i] = calc_igamma(tmp[i], tmp_x[i]);
}
return loadu(tmp);
@ -257,7 +256,7 @@ public:
__at_align__ float tmp_x[size()];
store(tmp);
x.store(tmp_x);
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
tmp[i] = calc_igammac(tmp[i], tmp_x[i]);
}
return loadu(tmp);

View File

@ -6,7 +6,6 @@
#include <ATen/cpu/vec/intrinsics.h>
#include <ATen/cpu/vec/vec_base.h>
#include <c10/macros/Macros.h>
#include <c10/util/irange.h>
namespace at {
namespace vec {
@ -101,7 +100,7 @@ public:
// Ensure uninitialized memory does not change the output value See https://github.com/pytorch/pytorch/issues/32502
// for more details. We do not initialize arrays to zero using "={0}" because gcc would compile it to two
// instructions while a loop would be compiled to one instruction.
for (const auto i : c10::irange(size())) {
for (auto i = 0; i < size(); ++i) {
tmp_values[i] = 0;
}
std::memcpy(tmp_values, ptr, count * sizeof(int64_t));
@ -254,7 +253,7 @@ public:
// Ensure uninitialized memory does not change the output value See https://github.com/pytorch/pytorch/issues/32502
// for more details. We do not initialize arrays to zero using "={0}" because gcc would compile it to two
// instructions while a loop would be compiled to one instruction.
for (const auto i : c10::irange(size())) {
for (auto i = 0; i < size(); ++i) {
tmp_values[i] = 0;
}
std::memcpy(tmp_values, ptr, count * sizeof(int32_t));
@ -486,7 +485,7 @@ public:
// Ensure uninitialized memory does not change the output value See https://github.com/pytorch/pytorch/issues/32502
// for more details. We do not initialize arrays to zero using "={0}" because gcc would compile it to two
// instructions while a loop would be compiled to one instruction.
for (const auto i : c10::irange(size())) {
for (auto i = 0; i < size(); ++i) {
tmp_values[i] = 0;
}
std::memcpy(tmp_values, ptr, count * sizeof(int16_t));
@ -762,7 +761,7 @@ public:
// Ensure uninitialized memory does not change the output value See https://github.com/pytorch/pytorch/issues/32502
// for more details. We do not initialize arrays to zero using "={0}" because gcc would compile it to two
// instructions while a loop would be compiled to one instruction.
for (const auto i : c10::irange(size())) {
for (size_t i = 0; i < size(); ++i) {
tmp_values[i] = 0;
}
std::memcpy(tmp_values, ptr, count * sizeof(int8_t));

View File

@ -6,8 +6,6 @@
#include <ATen/cpu/vec/intrinsics.h>
#include <ATen/cpu/vec/vec_base.h>
#include <ATen/native/quantized/affine_quantizer_base.h>
#include <c10/util/irange.h>
#include <c10/util/qint32.h>
#include <c10/util/qint8.h>
#include <c10/util/quint8.h>
@ -746,7 +744,7 @@ struct VectorizedQuantizedConverter {
std::array<value_type, size_> vals;
VectorizedQuantizedConverter(T val) {
for (const auto i : c10::irange(size())) {
for (size_t i = 0; i < size(); ++i) {
vals[i] = val.val_;
}
}
@ -764,9 +762,9 @@ struct VectorizedQuantizedConverter {
Vectorized<float> zero_point,
Vectorized<float> scale_zp_premul) const {
float_vec_return_type rv;
for (const auto i : c10::irange(float_num_vecs())) {
for (int i = 0; i < float_num_vecs(); ++i) {
float tmp_vals[16];
for (const auto j : c10::irange(16)) {
for (int j = 0; j < 16; ++j) {
tmp_vals[j] = at::native::dequantize_val<T>(
scale[j], zero_point[j], T(vals[16 * i + j]));
}
@ -831,7 +829,7 @@ struct Vectorized<c10::qint32> : public VectorizedQuantizedConverter<
std::array<value_type, size()> qvals;
std::array<float, float_num_vecs() * 16> float_vals;
for (const auto i : c10::irange(float_num_vecs())) {
for (int i = 0; i < float_num_vecs(); ++i) {
rhs[i].store(&float_vals[i * 16], 16);
}
@ -847,7 +845,7 @@ struct Vectorized<c10::qint32> : public VectorizedQuantizedConverter<
Vectorized<c10::qint32> maximum(Vectorized<c10::qint32> b) const {
Vectorized<c10::qint32> retval;
for (const auto i : c10::irange(size())) {
for (size_t i = 0; i < size(); ++i) {
retval.vals[i] = std::max<value_type>(vals[i], b.vals[i]);
}
return retval;
@ -855,7 +853,7 @@ struct Vectorized<c10::qint32> : public VectorizedQuantizedConverter<
Vectorized<c10::qint32> minimum(Vectorized<c10::qint32> b) const {
Vectorized<c10::qint32> retval;
for (const auto i : c10::irange(size())) {
for (size_t i = 0; i < size(); ++i) {
retval.vals[i] = std::min<value_type>(vals[i], b.vals[i]);
}
return retval;
@ -870,7 +868,7 @@ struct Vectorized<c10::qint32> : public VectorizedQuantizedConverter<
Vectorized<c10::qint32> zero_point,
Vectorized<c10::qint32> q_six) {
Vectorized<c10::qint32> retval;
for (const auto i : c10::irange(size())) {
for (size_t i = 0; i < size(); ++i) {
retval.vals[i] = std::min<value_type>(
std::max<value_type>(vals[i], zero_point.vals[i]), q_six.vals[i]);
}
@ -879,7 +877,7 @@ struct Vectorized<c10::qint32> : public VectorizedQuantizedConverter<
int_vec_return_type widening_subtract(Vectorized<c10::qint32> b) const {
int_vec_return_type retval;
for (const auto i : c10::irange(size())) {
for (size_t i = 0; i < size(); ++i) {
retval[0].vals[i] = vals[i] - b.vals[i];
}
return retval;
@ -890,7 +888,7 @@ struct Vectorized<c10::qint32> : public VectorizedQuantizedConverter<
float multiplier,
int32_t zero_point) {
Vectorized<c10::qint32> retval;
for (const auto i : c10::irange(size())) {
for (size_t i = 0; i < size(); ++i) {
retval.vals[i] =
nearbyint(static_cast<float>(inp[0].vals[i]) * multiplier) +
zero_point;
@ -963,7 +961,7 @@ struct Vectorized<c10::qint8> : public VectorizedQuantizedConverter<
std::array<value_type, size()> qvals;
std::array<float, float_num_vecs() * 16> float_vals;
for (const auto i : c10::irange(float_num_vecs())) {
for (int i = 0; i < float_num_vecs(); ++i) {
rhs[i].store(&float_vals[i * 16], 16);
}
@ -979,7 +977,7 @@ struct Vectorized<c10::qint8> : public VectorizedQuantizedConverter<
Vectorized<c10::qint8> maximum(Vectorized<c10::qint8> b) const {
Vectorized<c10::qint8> retval;
for (const auto i : c10::irange(size())) {
for (size_t i = 0; i < size(); ++i) {
retval.vals[i] = std::max<value_type>(vals[i], b.vals[i]);
}
return retval;
@ -987,7 +985,7 @@ struct Vectorized<c10::qint8> : public VectorizedQuantizedConverter<
Vectorized<c10::qint8> minimum(Vectorized<c10::qint8> b) const {
Vectorized<c10::qint8> retval;
for (const auto i : c10::irange(size())) {
for (size_t i = 0; i < size(); ++i) {
retval.vals[i] = std::min<value_type>(vals[i], b.vals[i]);
}
return retval;
@ -1001,7 +999,7 @@ struct Vectorized<c10::qint8> : public VectorizedQuantizedConverter<
Vectorized<c10::qint8> zero_point,
Vectorized<c10::qint8> q_six) {
Vectorized<c10::qint8> retval;
for (const auto i : c10::irange(size())) {
for (size_t i = 0; i < size(); ++i) {
retval.vals[i] = std::min<value_type>(
std::max<value_type>(vals[i], zero_point.vals[i]), q_six.vals[i]);
}
@ -1011,8 +1009,8 @@ struct Vectorized<c10::qint8> : public VectorizedQuantizedConverter<
int_vec_return_type widening_subtract(Vectorized<c10::qint8> b) const {
int_vec_return_type retval;
constexpr int elem_per_int_vec = size() / int_num_vecs();
for (const auto i : c10::irange(int_num_vecs())) {
for (const auto j : c10::irange(elem_per_int_vec)) {
for (size_t i = 0; i < int_num_vecs(); ++i) {
for (size_t j = 0; j < elem_per_int_vec; ++j) {
retval[i].vals[j] =
static_cast<int32_t>(vals[i * elem_per_int_vec + j]) -
static_cast<int32_t>(b.vals[i * elem_per_int_vec + j]);
@ -1028,8 +1026,8 @@ struct Vectorized<c10::qint8> : public VectorizedQuantizedConverter<
constexpr auto min_val = std::numeric_limits<value_type>::min();
constexpr auto max_val = std::numeric_limits<value_type>::max();
Vectorized<c10::qint8> retval;
for (const auto i : c10::irange(int_num_vecs())) {
for (const auto j : c10::irange(elem_per_int_vec)) {
for (size_t i = 0; i < int_num_vecs(); ++i) {
for (size_t j = 0; j < elem_per_int_vec; ++j) {
int32_t rounded =
nearbyint(static_cast<float>(inp[i].vals[j]) * multiplier) +
zero_point;
@ -1083,7 +1081,7 @@ struct Vectorized<c10::quint8> : public VectorizedQuantizedConverter<
std::array<value_type, size()> qvals;
std::array<float, float_num_vecs() * 16> float_vals;
for (const auto i : c10::irange(float_num_vecs())) {
for (int i = 0; i < float_num_vecs(); ++i) {
rhs[i].store(&float_vals[i * 16], 16);
}
@ -1099,7 +1097,7 @@ struct Vectorized<c10::quint8> : public VectorizedQuantizedConverter<
Vectorized<c10::quint8> maximum(Vectorized<c10::quint8> b) const {
Vectorized<c10::quint8> retval;
for (const auto i : c10::irange(size())) {
for (size_t i = 0; i < size(); ++i) {
retval.vals[i] = std::max<value_type>(vals[i], b.vals[i]);
}
return retval;
@ -1107,7 +1105,7 @@ struct Vectorized<c10::quint8> : public VectorizedQuantizedConverter<
Vectorized<c10::quint8> minimum(Vectorized<c10::quint8> b) const {
Vectorized<c10::quint8> retval;
for (const auto i : c10::irange(size())) {
for (size_t i = 0; i < size(); ++i) {
retval.vals[i] = std::min<value_type>(vals[i], b.vals[i]);
}
return retval;
@ -1122,7 +1120,7 @@ struct Vectorized<c10::quint8> : public VectorizedQuantizedConverter<
Vectorized<c10::quint8> zero_point,
Vectorized<c10::quint8> q_six) {
Vectorized<c10::quint8> retval;
for (const auto i : c10::irange(size())) {
for (size_t i = 0; i < size(); ++i) {
retval.vals[i] = std::min<value_type>(
std::max<value_type>(vals[i], zero_point.vals[i]), q_six.vals[i]);
}
@ -1132,8 +1130,8 @@ struct Vectorized<c10::quint8> : public VectorizedQuantizedConverter<
int_vec_return_type widening_subtract(Vectorized<c10::quint8> b) const {
int_vec_return_type retval;
constexpr int elem_per_int_vec = size() / int_num_vecs();
for (const auto i : c10::irange(int_num_vecs())) {
for (const auto j : c10::irange(elem_per_int_vec)) {
for (size_t i = 0; i < int_num_vecs(); ++i) {
for (size_t j = 0; j < elem_per_int_vec; ++j) {
retval[i].vals[j] =
static_cast<int32_t>(vals[i * elem_per_int_vec + j]) -
static_cast<int32_t>(b.vals[i * elem_per_int_vec + j]);
@ -1149,8 +1147,8 @@ struct Vectorized<c10::quint8> : public VectorizedQuantizedConverter<
constexpr auto min_val = std::numeric_limits<value_type>::min();
constexpr auto max_val = std::numeric_limits<value_type>::max();
Vectorized<c10::quint8> retval;
for (const auto i : c10::irange(int_num_vecs())) {
for (const auto j : c10::irange(elem_per_int_vec)) {
for (size_t i = 0; i < int_num_vecs(); ++i) {
for (size_t j = 0; j < elem_per_int_vec; ++j) {
int32_t rounded =
nearbyint(static_cast<float>(inp[i].vals[j]) * multiplier) +
zero_point;

View File

@ -31,7 +31,6 @@
#include <ATen/native/cpu/zmath.h>
#include <c10/util/TypeCast.h>
#include <c10/macros/Macros.h>
#include <c10/util/irange.h>
// These macros helped us unify vec_base.h
#ifdef CPU_CAPABILITY_AVX512
@ -151,7 +150,7 @@ public:
static Vectorized<T> blend(const Vectorized<T>& a, const Vectorized<T>& b) {
int64_t mask = mask_;
Vectorized vector;
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
if (mask & 0x01) {
vector[i] = b[i];
} else {
@ -166,7 +165,7 @@ public:
Vectorized vector;
int_same_size_t<T> buffer[size()];
mask.store(buffer);
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
if (buffer[i] & 0x01)
{
vector[i] = b[i];
@ -179,14 +178,14 @@ public:
template<typename step_t> // step sometimes requires a higher precision type (e.g., T=int, step_t=double)
static Vectorized<T> arange(T base = static_cast<T>(0), step_t step = static_cast<step_t>(1)) {
Vectorized vector;
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
vector.values[i] = base + i * step;
}
return vector;
}
static Vectorized<T> set(const Vectorized<T>& a, const Vectorized<T>& b, int64_t count = size()) {
Vectorized vector;
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
if (i < count) {
vector[i] = b[i];
} else {
@ -341,7 +340,7 @@ public:
}
Vectorized<T> atan2(const Vectorized<T> &exp) const {
Vectorized<T> ret;
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
ret[i] = std::atan2(values[i], exp[i]);
}
return ret;
@ -381,7 +380,7 @@ public:
// U is for SFINAE purposes only. Make sure it is not changed.
static_assert(std::is_same<U, T>::value, "U must be T");
Vectorized<T> ret;
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
ret[i] = std::fmod(values[i], q[i]);
}
return ret;
@ -424,7 +423,7 @@ public:
}
Vectorized<T> hypot(const Vectorized<T> &b) const {
Vectorized<T> ret;
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
ret[i] = std::hypot(values[i], b[i]);
}
return ret;
@ -437,14 +436,14 @@ public:
}
Vectorized<T> igamma(const Vectorized<T> &x) const {
Vectorized<T> ret;
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
ret[i] = calc_igamma(values[i], x[i]);
}
return ret;
}
Vectorized<T> igammac(const Vectorized<T> &x) const {
Vectorized<T> ret;
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
ret[i] = calc_igammac(values[i], x[i]);
}
return ret;
@ -457,7 +456,7 @@ public:
}
Vectorized<T> nextafter(const Vectorized<T> &b) const {
Vectorized<T> ret;
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
ret[i] = std::nextafter(values[i], b[i]);
}
return ret;
@ -495,7 +494,7 @@ public:
}
Vectorized<T> pow(const Vectorized<T> &exp) const {
Vectorized<T> ret;
for (const auto i : c10::irange(size())) {
for (int64_t i = 0; i < size(); i++) {
ret[i] = std::pow(values[i], exp[i]);
}
return ret;
@ -809,7 +808,7 @@ inline gather(T const* base_addr, const Vectorized<int_same_size_t<T>>& vindex)
int_same_size_t<T> index_arr[size];
vindex.store(static_cast<void*>(index_arr));
T buffer[size];
for (const auto i : c10::irange(size)) {
for (int64_t i = 0; i < size; i++) {
buffer[i] = base_addr[index_arr[i] * scale / sizeof(T)];
}
return Vectorized<T>::loadu(static_cast<void*>(buffer));
@ -827,7 +826,7 @@ inline mask_gather(const Vectorized<T>& src, T const* base_addr,
mask.store(static_cast<void*>(mask_arr));
vindex.store(static_cast<void*>(index_arr));
T buffer[size];
for (const auto i : c10::irange(size)) {
for (int64_t i = 0; i < size; i++) {
if (mask_arr[i] & 0x01) { // check highest bit
buffer[i] = base_addr[index_arr[i] * scale / sizeof(T)];
} else {
@ -873,7 +872,7 @@ inline Vectorized<int_same_size_t<T>> convert_to_int_of_same_size(const Vectoriz
T src_arr[size];
src.store(static_cast<void*>(src_arr));
int_same_size_t<T> buffer[size];
for (const auto i : c10::irange(size)) {
for (int64_t i = 0; i < size; i++) {
buffer[i] = static_cast<int_same_size_t<T>>(src_arr[i]);
}
return Vectorized<int_same_size_t<T>>::loadu(static_cast<void*>(buffer));
@ -900,7 +899,7 @@ deinterleave2(const Vectorized<T>& a, const Vectorized<T>& b) {
T buffer2[size];
a.store(static_cast<void*>(a_arr));
b.store(static_cast<void*>(b_arr));
for (const auto i : c10::irange(half_size)) {
for (int64_t i = 0; i < half_size; i++) {
buffer1[i] = a_arr[i * 2];
buffer1[half_size + i] = b_arr[i * 2];
buffer2[i] = a_arr[i * 2 + 1];
@ -932,7 +931,7 @@ interleave2(const Vectorized<T>& a, const Vectorized<T>& b) {
T buffer2[size];
a.store(static_cast<void*>(a_arr));
b.store(static_cast<void*>(b_arr));
for (const auto i : c10::irange(half_size)) {
for (int64_t i = 0; i < half_size; i++) {
buffer1[i * 2] = a_arr[i];
buffer1[i * 2 + 1] = b_arr[i];
buffer2[i * 2] = a_arr[half_size + i];
@ -947,8 +946,7 @@ inline void convert(const src_T *src, dst_T *dst, int64_t n) {
#ifndef _MSC_VER
# pragma unroll
#endif
for (const auto i : c10::irange(n)) {
(void)i; //Suppress unused variable warning
for (int64_t i = 0; i < n; i++) {
*dst = c10::static_cast_with_inter_type<dst_T, src_T>::apply(*src);
src++;
dst++;

View File

@ -4,7 +4,6 @@
#include <ATen/cuda/CUDABlas.h>
#include <ATen/cuda/Exceptions.h>
#include <c10/util/irange.h>
#define CUDABLAS_POSINT_CHECK(FD, X) \
TORCH_CHECK( \
@ -296,7 +295,7 @@ void bgemm<at::Half>(CUDABLAS_BGEMM_ARGTYPES(at::Half)) {
c, CUDA_R_16F, ldc, stridec,
num_batches, CUDA_R_32F, CUBLAS_GEMM_DEFAULT_TENSOR_OP));
} else {
for (const auto i : c10::irange(num_batches)) {
for (int64_t i = 0; i < num_batches; ++i) {
at::cuda::blas::gemm<at::Half>(
transa, transb,
m, n, k,

View File

@ -1,7 +1,6 @@
#include <ATen/cudnn/Descriptors.h>
#include <ATen/ATen.h>
#include <c10/util/irange.h>
#include <iostream>
#include <sstream>
@ -48,11 +47,11 @@ void TensorDescriptor::set(cudnnDataType_t datatype, IntArrayRef t_sizes, IntArr
#undef STR
int size[CUDNN_DIM_MAX];
int stride[CUDNN_DIM_MAX];
for (const auto i : c10::irange(dim)) {
for (size_t i = 0; i < dim; ++i) {
size[i] = static_cast<int>(t_sizes[i]);
stride[i] = static_cast<int>(t_strides[i]);
}
for (const auto i : c10::irange(dim, pad)) {
for (size_t i = dim; i < pad; ++i) {
size[i] = 1;
stride[i] = 1;
}
@ -127,10 +126,10 @@ void FilterDescriptor::set(const at::Tensor &t, const at::MemoryFormat memory_fo
"cuDNN filters (a.k.a. weights) must be contiguous in desired memory_format");
int size[CUDNN_DIM_MAX];
for (const auto i : c10::irange(dim)) {
for (int i = 0; i < dim; ++i) {
size[i] = (int) t.size(i);
}
for (const auto i : c10::irange(dim, pad)) {
for (int i = dim; i < pad; ++i) {
size[i] = (int) 1;
}
dim = std::max(dim, pad);

View File

@ -1,6 +1,5 @@
#include <ATen/miopen/Descriptors.h>
#include <ATen/ATen.h>
#include <c10/util/irange.h>
#include <iostream>
@ -40,11 +39,11 @@ void TensorDescriptor::set(miopenDataType_t datatype, IntArrayRef t_sizes, IntAr
#undef STR
int size[MIOPEN_DIM_MAX];
int stride[MIOPEN_DIM_MAX];
for (const auto i : c10::irange(dim)) {
for (size_t i = 0; i < dim; ++i) {
size[i] = static_cast<int>(t_sizes[i]);
stride[i] = static_cast<int>(t_strides[i]);
}
for (const auto i : c10::irange(dim, pad)) {
for (size_t i = dim; i < pad; ++i) {
size[i] = 1;
stride[i] = 1;
}
@ -104,10 +103,10 @@ void FilterDescriptor::set(const at::Tensor &t, const at::MemoryFormat memory_fo
int size[MIOPEN_DIM_MAX];
int stride[MIOPEN_DIM_MAX];
for (const auto i : c10::irange(dim)) {
for (int i = 0; i < dim; ++i) {
size[i] = (int) t.size(i);
}
for (const auto i : c10::irange(dim, pad)) {
for (int i = dim; i < pad; ++i) {
size[i] = (int) 1;
}

View File

@ -500,7 +500,7 @@ inline void _rrelu_with_noise_train(
scalar_t* noise_data = noise.data_ptr<scalar_t>();
auto gen = at::get_generator_or_default<CPUGeneratorImpl>(generator, detail::getDefaultCPUGenerator());
std::lock_guard<std::mutex> lock(gen->mutex_);
for (const auto i : c10::irange(input.numel())) {
for (int64_t i = 0; i < input.numel(); i++) {
if (input_data[i] <= 0) {
at::uniform_real_distribution<double> uniform(lower, upper);
const scalar_t r = (scalar_t)uniform(gen);
@ -610,7 +610,7 @@ void inline prelu_cpu_kernel_share_weights(
auto weight_val = weight.data_ptr<scalar_t>()[0];
at::parallel_for(0, input_numel, 1000, [&](int64_t start, int64_t end) {
for (const auto i : c10::irange(start, end)) {
for (auto i = start; i < end; i++) {
scalar_t input_data_val = input_data[i];
// to allow for compiler optimization, here splitting into two lines:
scalar_t r = (input_data_val > 0) ? scalar_t(1) : weight_val;
@ -725,7 +725,7 @@ void inline prelu_cpu_backward_kernel_share_weights(
scalar_t sum = at::parallel_reduce(0, input_numel, 1000, scalar_t(0),
[&](int64_t start, int64_t end, scalar_t ident) -> scalar_t {
scalar_t partial_sum = ident;
for (const auto i : c10::irange(start, end)) {
for (auto i = start; i < end; i++) {
scalar_t input_data_val = input_data[i];
scalar_t grad_out_data_val = grad_out_data[i];
// to allow for compiler optimization, here splitting into two lines:
@ -839,7 +839,7 @@ std::tuple<Tensor, Tensor> prelu_backward_cpu(const Tensor& grad_out_, const Ten
std::vector<int64_t> reduce_dims;
reduce_dims.push_back(0);
if (dims > 2) {
for (const auto i : c10::irange(2, dims))reduce_dims.push_back(i);
for(int64_t i = 2; i < dims; i++) reduce_dims.push_back(i);
}
weight_grad = weight_grad_collector.sum(reduce_dims);
}

View File

@ -2,7 +2,6 @@
#include <ATen/NativeFunctions.h>
#include <ATen/native/AdaptivePooling.h>
#include <ATen/native/xnnpack/Engine.h>
#include <c10/util/irange.h>
namespace at {
@ -17,7 +16,7 @@ namespace {
{
TORCH_CHECK(output_size.size() == 2, "adaptive_avg_pool2d: output_size must be 2");
int64_t ndim = input.ndimension();
for (const auto i : c10::irange(1, ndim)) {
for (int64_t i = 1; i < ndim; i++) {
TORCH_CHECK(input.size(i) > 0,
"adaptive_avg_pool2d(): Expected input to have non-zero size for non-batch dimensions, "
"but input has sizes ", input.sizes(), " with dimension ", i, " being "
@ -53,7 +52,7 @@ namespace {
const Tensor& input)
{
int64_t ndim = grad_output.ndimension();
for (const auto i : c10::irange(1, ndim)) {
for (int64_t i = 1; i < ndim; i++) {
TORCH_CHECK(grad_output.size(i) > 0,
"adaptive_avg_pool2d_backward(): Expected grad_output to have non-zero size for non-batch dimensions, "
"but grad_output has sizes ", grad_output.sizes(), " with dimension ", i, " being "

View File

@ -1,7 +1,6 @@
#include <ATen/ATen.h>
#include <ATen/NativeFunctions.h>
#include <ATen/Parallel.h>
#include <c10/util/irange.h>
namespace at {
namespace native {
@ -34,19 +33,19 @@ static void adaptive_avg_pool3d_out_frame(
int64_t istrideH,
int64_t istrideW) {
at::parallel_for(0, sizeD, 1, [&](int64_t start, int64_t end) {
for (const auto d : c10::irange(start, end)) {
for (int64_t d = start; d < end; d++) {
/* loop over output */
for (const auto ot : c10::irange(osizeT)) {
for (int64_t ot = 0; ot < osizeT; ot++) {
int istartT = start_index(ot, osizeT, isizeT);
int iendT = end_index(ot, osizeT, isizeT);
int kT = iendT - istartT;
for (const auto oh : c10::irange(osizeH)) {
for (int64_t oh = 0; oh < osizeH; oh++) {
int istartH = start_index(oh, osizeH, isizeH);
int iendH = end_index(oh, osizeH, isizeH);
int kH = iendH - istartH;
for (const auto ow : c10::irange(osizeW)) {
for (int64_t ow = 0; ow < osizeW; ow++) {
int istartW = start_index(ow, osizeW, isizeW);
int iendW = end_index(ow, osizeW, isizeW);
int kW = iendW - istartW;
@ -59,9 +58,9 @@ static void adaptive_avg_pool3d_out_frame(
/* compute local average: */
scalar_t sum = 0;
for (const auto it : c10::irange(kT)) {
for (const auto ih : c10::irange(kH)) {
for (const auto iw : c10::irange(kW)) {
for (int it = 0; it < kT; it++) {
for (int ih = 0; ih < kH; ih++) {
for (int iw = 0; iw < kW; iw++) {
scalar_t val =
*(ip + it * istrideT + ih * istrideH + iw * istrideW);
sum += val;
@ -84,7 +83,7 @@ void adaptive_avg_pool3d_out_cpu_template(
IntArrayRef output_size) {
TORCH_CHECK(output_size.size() == 3, "adaptive_avg_pool3d: output_size must be 3");
for (const auto i : c10::irange(1, input.ndimension())) {
for (int64_t i = 1; i < input.ndimension(); i++) {
TORCH_CHECK(
input.size(i) > 0,
"adaptive_avg_pool3d(): Expected input to have non-zero size for non-batch dimensions, "
@ -149,7 +148,7 @@ void adaptive_avg_pool3d_out_cpu_template(
auto input_data = input.data_ptr<scalar_t>();
auto output_data = output.data_ptr<scalar_t>();
at::parallel_for(0, n, 1, [&](int64_t start, int64_t end) {
for (const auto b : c10::irange(start, end)) {
for (int64_t b = start; b < end; ++b) {
adaptive_avg_pool3d_out_frame<scalar_t>(
input_data + b * input.stride(0),
output_data + b * sizeD * osizeT * osizeH * osizeW,
@ -182,22 +181,22 @@ static void adaptive_avg_pool3d_backward_out_frame(
int64_t osizeH,
int64_t osizeW) {
at::parallel_for(0, sizeD, 1, [&](int64_t start, int64_t end) {
for (const auto d : c10::irange(start, end)) {
for (int64_t d = start; d < end; d++) {
scalar_t* gradInput_p_d = gradInput_p + d * isizeT * isizeW * isizeH;
scalar_t* gradOutput_p_d = gradOutput_p + d * osizeT * osizeW * osizeH;
/* calculate average */
for (const auto ot : c10::irange(osizeT)) {
for (int64_t ot = 0; ot < osizeT; ot++) {
int istartT = start_index(ot, osizeT, isizeT);
int iendT = end_index(ot, osizeT, isizeT);
int kT = iendT - istartT;
for (const auto oh : c10::irange(osizeH)) {
for (int64_t oh = 0; oh < osizeH; oh++) {
int istartH = start_index(oh, osizeH, isizeH);
int iendH = end_index(oh, osizeH, isizeH);
int kH = iendH - istartH;
for (const auto ow : c10::irange(osizeW)) {
for (int64_t ow = 0; ow < osizeW; ow++) {
int istartW = start_index(ow, osizeW, isizeW);
int iendW = end_index(ow, osizeW, isizeW);
int kW = iendW - istartW;
@ -206,9 +205,9 @@ static void adaptive_avg_pool3d_backward_out_frame(
gradOutput_p_d[ot * osizeH * osizeW + oh * osizeW + ow] / kT /
kH / kW;
for (const auto it : c10::irange(istartT, iendT)) {
for (const auto ih : c10::irange(istartH, iendH)) {
for (const auto iw : c10::irange(istartW, iendW)) {
for (int it = istartT; it < iendT; it++) {
for (int ih = istartH; ih < iendH; ih++) {
for (int iw = istartW; iw < iendW; iw++) {
/* update gradient */
gradInput_p_d[it * isizeH * isizeW + ih * isizeW + iw] +=
grad_delta;
@ -266,7 +265,7 @@ Tensor& adaptive_avg_pool3d_backward_out_cpu_template(
scalar_t* gradInput_data = gradInput.data_ptr<scalar_t>();
scalar_t* gradOutput_data = gradOutput.data_ptr<scalar_t>();
at::parallel_for(0, n, 1, [&](int64_t start, int64_t end) {
for (const auto b : c10::irange(start, end)) {
for (int64_t b = start; b < end; b++) {
adaptive_avg_pool3d_backward_out_frame<scalar_t>(
gradInput_data + b * sizeD * isizeT * isizeH * isizeW,
gradOutput_data + b * sizeD * osizeT * osizeH * osizeW,

View File

@ -1,7 +1,6 @@
#include <ATen/ATen.h>
#include <ATen/NativeFunctions.h>
#include <ATen/native/AdaptivePooling.h>
#include <c10/util/irange.h>
namespace at {
@ -11,7 +10,7 @@ TORCH_META_FUNC(adaptive_max_pool2d) (const Tensor& input, IntArrayRef output_si
TORCH_CHECK(ndim == 3 || ndim == 4,
"adaptive_max_pool2d(): Expected 3D or 4D tensor, but got: ",
input.sizes());
for (const auto i : c10::irange(1, ndim)) {
for (int64_t i = 1; i < ndim; i++) {
TORCH_CHECK(input.size(i) > 0,
"adaptive_max_pool2d(): Expected input to have non-zero size for non-batch dimensions, "
"but input has sizes ", input.sizes(), " with dimension ", i,
@ -52,7 +51,7 @@ TORCH_META_FUNC(adaptive_max_pool2d_backward)
int64_t ndim = grad_output.ndimension();
TORCH_CHECK(ndim == 3 || ndim == 4,
"adaptive_max_pooling2d_backward(): Expected 3D or 4D grad_output, but got: ", grad_output.sizes());
for (const auto i : c10::irange(1, ndim)) {
for (int64_t i = 1; i < ndim; i++) {
TORCH_CHECK(grad_output.size(i) > 0,
"adaptive_max_pooling2d_backward(): Expected grad_output to have non-zero size for non-batch dimensions, "
"but grad_output has sizes ", grad_output.sizes(), " with dimension ", i,

View File

@ -1,7 +1,6 @@
#include <ATen/ATen.h>
#include <ATen/NativeFunctions.h>
#include <ATen/Parallel.h>
#include <c10/util/irange.h>
#include <tuple>
@ -12,7 +11,7 @@ TORCH_META_FUNC(adaptive_max_pool3d) (const Tensor& input, IntArrayRef output_si
TORCH_CHECK(
ndim == 4 || ndim == 5,
"adaptive_max_pool3d(): Expected 4D or 5D tensor, but got: ", input.sizes());
for (const auto i : c10::irange(1, ndim)) {
for (int64_t i = 1; i < ndim; i++) {
TORCH_CHECK(
input.size(i) > 0,
"adaptive_max_pool3d(): Expected input to have non-zero size for non-batch dimensions, "
@ -97,7 +96,8 @@ static void adaptive_max_pool3d_single_out_frame(
int64_t istrideW)
{
at::parallel_for(0, sizeD, 0, [&](int64_t start, int64_t end) {
for (const auto d : c10::irange(start, end)) {
for (auto d = start; d < end; d++)
{
/* loop over output */
int64_t ot, oh, ow;
for(ot = 0; ot < osizeT; ot++)
@ -176,7 +176,8 @@ static void adaptive_max_pool3d_out_frame(
int64_t istrideW)
{
at::parallel_for(0, sizeB, 0, [&](int64_t start, int64_t end) {
for (const auto b : c10::irange(start, end)) {
for (auto b = start; b < end; b++)
{
adaptive_max_pool3d_single_out_frame<scalar_t>(input_data+b*istrideB, output_data+b*sizeD*osizeT*osizeH*osizeW,
indices_data+b*sizeD*osizeT*osizeH*osizeW,
sizeD,
@ -202,7 +203,8 @@ static void adaptive_max_pool3d_backward_single_out_frame(
int64_t osizeW)
{
at::parallel_for(0, sizeD, 0, [&](int64_t start, int64_t end) {
for (const auto d : c10::irange(start, end)) {
for (auto d = start; d < end; d++)
{
scalar_t *gradInput_p_d = gradInput_p + d*isizeT*isizeH*isizeW;
scalar_t *gradOutput_p_d = gradOutput_p + d*osizeT*osizeH*osizeW;
int64_t *ind_p_d = ind_p + d*osizeT*osizeH*osizeW;
@ -242,7 +244,8 @@ static void adaptive_max_pool3d_backward_out_frame(
int64_t osizeW)
{
at::parallel_for(0, sizeB, 0, [&](int64_t start, int64_t end) {
for (const auto b : c10::irange(start, end)) {
for (auto b = start; b < end; b++)
{
adaptive_max_pool3d_backward_single_out_frame<scalar_t>(gradInput_data+b*sizeD*isizeT*isizeH*isizeW, gradOutput_data+b*sizeD*osizeT*osizeH*osizeW,
indices_data+b*sizeD*osizeT*osizeH*osizeW,
sizeD,

View File

@ -2,7 +2,6 @@
#include <ATen/Parallel.h>
#include <ATen/NativeFunctions.h>
#include <ATen/native/Pool.h>
#include <c10/util/irange.h>
#include <tuple>
@ -170,7 +169,8 @@ static void avg_pool3d_out_frame(
c10::optional<int64_t> divisor_override)
{
at::parallel_for(0, nslices, 0, [&](int64_t start, int64_t end) {
for (const auto k : c10::irange(start, end)) {
for (auto k = start; k < end; k++)
{
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
int64_t i, j, ti;
@ -315,7 +315,7 @@ TORCH_IMPL_FUNC(avg_pool3d_out_cpu) (
scalar_t *output_data = output.data_ptr<scalar_t>();
at::parallel_for(0, nbatch, 0, [&](int64_t start, int64_t end) {
for (const auto p : c10::irange(start, end)) {
for (auto p = start; p < end; p++) {
avg_pool3d_out_frame(
input_data + p * istride, output_data + p * ostride, nslices,
itime, iwidth, iheight,
@ -358,7 +358,8 @@ static void avg_pool3d_backward_out_frame(
c10::optional<int64_t> divisor_override)
{
at::parallel_for(0, nslices, 0, [&](int64_t start, int64_t end) {
for (const auto k : c10::irange(start, end)) {
for (auto k = start; k < end; k++)
{
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
int64_t i, j, ti;
@ -499,7 +500,8 @@ TORCH_IMPL_FUNC(avg_pool3d_backward_out_cpu) (
scalar_t *gradOutput_data = gradOutput.data_ptr<scalar_t>();
at::parallel_for(0, nbatch, 0, [&](int64_t start, int64_t end) {
for (const auto p : c10::irange(start, end)) {
for (auto p = start; p < end; p++)
{
avg_pool3d_backward_out_frame(
gradInput_data + p * istride, gradOutput_data + p * ostride, nslices,
itime, iwidth, iheight,

View File

@ -63,7 +63,7 @@ void apply_reflect_conj_tri_single(scalar_t* self, int64_t n, int64_t stride, bo
std::function<void(int64_t, int64_t)> loop = [](int64_t, int64_t){};
if (upper) {
loop = [&](int64_t start, int64_t end) {
for (const auto i : c10::irange(start, end)) {
for (int64_t i = start; i < end; i++) {
for (int64_t j = i + 1; j < n; j++) {
self[i * stride + j] = conj_impl(self[j * stride + i]);
}
@ -71,8 +71,8 @@ void apply_reflect_conj_tri_single(scalar_t* self, int64_t n, int64_t stride, bo
};
} else {
loop = [&](int64_t start, int64_t end) {
for (const auto i : c10::irange(start, end)) {
for (const auto j : c10::irange(i)) {
for (int64_t i = start; i < end; i++) {
for (int64_t j = 0; j < i; j++) {
self[i * stride + j] = conj_impl(self[j * stride + i]);
}
}
@ -106,7 +106,7 @@ void apply_cholesky_inverse(Tensor& input, Tensor& infos, bool upper) {
auto n = input.size(-2);
auto lda = std::max<int64_t>(1, n);
for (const auto i : c10::irange(batch_size)) {
for (int64_t i = 0; i < batch_size; i++) {
scalar_t* input_working_ptr = &input_data[i * input_matrix_stride];
int* info_working_ptr = &infos_data[i];
lapackCholeskyInverse<scalar_t>(uplo, n, input_working_ptr, lda, info_working_ptr);
@ -501,7 +501,7 @@ inline void apply_orgqr(Tensor& self, const Tensor& tau) {
lwork = std::max<int>(1, real_impl<scalar_t, value_t>(wkopt));
Tensor work = at::empty({lwork}, self.options());
for (const auto i : c10::irange(batch_size)) {
for (int64_t i = 0; i < batch_size; i++) {
scalar_t* self_working_ptr = &self_data[i * self_matrix_stride];
scalar_t* tau_working_ptr = &tau_data[i * tau_stride];

View File

@ -2,7 +2,6 @@
#include <algorithm>
#include <ATen/ATen.h>
#include <ATen/Config.h>
#include <c10/util/irange.h>
#if AT_BUILD_WITH_BLAS()
extern "C" double ddot_(int *n, double *x, int *incx, double *y, int *incy);
@ -152,7 +151,7 @@ inline void scal(int64_t n, scalar_t a, scalar_t *x, int64_t incx)
blas_impl::scal_fast_path<scalar_t>(&i_n, &a, x, &i_incx);
return;
}
for (const auto i : c10::irange(n)) {
for (int64_t i = 0; i < n; i++) {
if (a == scalar_t(0)) {
x[i * incx] = 0;
} else {
@ -177,10 +176,11 @@ void gemv(char trans, int64_t m, int64_t n, scalar_t alpha, scalar_t *a, int64_t
}
if ((trans == 'T') || (trans == 't')) {
for (const auto i : c10::irange(n)) {
for (int64_t i = 0; i < n; i++)
{
scalar_t sum = 0;
scalar_t *row_ = a + lda * i;
for (const auto j : c10::irange(m)) {
for (int64_t j = 0; j < m; j++) {
sum += x[j * incx] * row_[j];
}
if (beta == scalar_t(0)) {
@ -192,10 +192,10 @@ void gemv(char trans, int64_t m, int64_t n, scalar_t alpha, scalar_t *a, int64_t
} else {
if (beta != scalar_t(1) && beta != scalar_t(0)) scal<scalar_t>(m, beta, y, incy);
for (const auto j : c10::irange(n)) {
for (int64_t j = 0; j < n; j++) {
scalar_t *column_ = a + lda * j;
scalar_t z = alpha * x[j * incx];
for (const auto i : c10::irange(m)) {
for (int64_t i = 0; i < m; i++) {
//output values are ignored if beta is 0, and set to 0, nans and infs are not propagated
if (j==0 && beta==scalar_t(0)) {
y[i * incy] = scalar_t(0);

View File

@ -2,7 +2,6 @@
#include <ATen/Parallel.h>
#include <ATen/native/BucketizationUtils.h>
#include <ATen/native/Resize.h>
#include <c10/util/irange.h>
/* Implement a TF like searchsorted and a bucketize function running on cpu
*
@ -59,7 +58,7 @@ void searchsorted_cpu_contiguous(Tensor& result, const Tensor& input, const Tens
bool is_1d_boundaries = boundaries.dim() == 1;
at::parallel_for(0, numel_in, SEARCHSORTED_GRAIN_SIZE, [&](int64_t start, int64_t end) {
for (const auto i : c10::irange(start, end)) {
for (int64_t i = start; i < end; ++i) {
// If boundaries tensor is 1d, we always search the entire boundary tensor
int64_t start_bd = is_1d_boundaries ? 0 : i / idim_in * idim_bd;
const input_t *data_bd_start = &data_bd[start_bd];

View File

@ -5,7 +5,6 @@
#include <ATen/native/im2col.h>
#include <ATen/native/im2col_shape_check.h>
#include <c10/util/irange.h>
// Note [im2col/col2im output padding]
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -151,7 +150,7 @@ static void col2im_out_cpu_template(
stride_width +
1;
for (const auto elt : c10::irange(batch_size)) {
for (int64_t elt = 0; elt < batch_size; elt++) {
input_n = input.select(0, elt);
output_n = output.select(0, elt);

View File

@ -24,7 +24,7 @@ inline Tensor view_tensor(
inline DimVector computeStrideForViewAsReal(IntArrayRef oldstride) {
DimVector res(oldstride.size() + 1);
for (const auto i : c10::irange(oldstride.size())) {
for(size_t i = 0; i < oldstride.size(); i++) {
res[i] = oldstride[i] * 2;
}
res.back() = 1;

View File

@ -47,7 +47,7 @@ Tensor constant_pad_nd(const Tensor& self, IntArrayRef pad, const Scalar& value)
new_shape.emplace_back(input_sizes[i]);
}
for (const auto i : c10::irange((size_t)l_pad)) {
for (size_t i = 0; i < (size_t)l_pad; i++) {
auto pad_idx = pad.size() - ((i + 1) * 2);
auto new_dim = input_sizes[l_diff + i] + pad[pad_idx] + pad[pad_idx + 1];
TORCH_CHECK(new_dim > 0, "The input size ", input_sizes[l_diff + i], ", plus negative padding ",

View File

@ -1,7 +1,6 @@
#pragma once
#include <ATen/detail/CUDAHooksInterface.h>
#include <c10/util/env.h>
#include <c10/util/irange.h>
namespace at { namespace native {
@ -36,7 +35,7 @@ static inline std::vector<int64_t> conv_output_size(
std::vector<int64_t> output_size(dim);
output_size[0] = input_size[input_batch_size_dim];
output_size[1] = weight_size[weight_output_channels_dim];
for (const auto d : c10::irange(2, dim)) {
for (size_t d = 2; d < dim; ++d) {
auto dilation_ = has_dilation ? dilation[d - 2] : 1;
auto kernel = dilation_ * (weight_size[d] - 1) + 1;
output_size[d] = (input_size[d] + (2 * padding[d - 2]) - kernel) / stride[d - 2] + 1;
@ -54,7 +53,7 @@ static inline std::vector<int64_t> conv_input_size(
std::vector<int64_t> input_size(dim);
input_size[0] = output_size[output_batch_size_dim];
input_size[1] = weight_size[weight_input_channels_dim] * groups;
for (const auto d : c10::irange(2, dim)) {
for (size_t d = 2; d < dim; ++d) {
int kernel = dilation[d - 2] * (weight_size[d] - 1) + 1;
input_size[d] = (output_size[d] - 1) * stride[d - 2] - (2 * padding[d - 2]) +
kernel + output_padding[d - 2];
@ -70,7 +69,7 @@ static inline std::vector<int64_t> conv_weight_size(
std::vector<int64_t> weight_size(dim);
weight_size[0] = output_size[1];
weight_size[1] = input_size[1] / groups;
for (const auto d : c10::irange(2, dim)) {
for (size_t d = 2; d < dim; ++d) {
int kernel = input_size[d] - (output_size[d] - 1) * stride[d - 2]
+ 2 * padding[d - 2] - output_padding[d - 2];
weight_size[d] = (kernel - 1) / dilation[d - 2] + 1;

View File

@ -975,7 +975,7 @@ at::Tensor _convolution(
} else {
std::vector<Tensor> outputs(params.groups);
input = input.contiguous();
for (const auto g : c10::irange(params.groups)) {
for (int g = 0; g < params.groups; ++g) {
auto input_g = subtensor(input, 1, params.groups, g);
auto weight_g = subtensor(weight, 0, params.groups, g);
auto bias_g = subtensor(bias, 0, params.groups, g);
@ -1212,7 +1212,7 @@ std::tuple<Tensor,Tensor,Tensor> _convolution_double_backward( const c10::option
}
} else {
std::vector<Tensor> gWt_list(groups);
for (const auto g : c10::irange(groups)) {
for (int g = 0; g < groups; ++g) {
auto ggIt_g = subvariable(ggIt, 0, groups, g);
auto gOt_g = subvariable(gOt, 0, groups, g);
if (gOt_g.is_cuda()) {
@ -1239,7 +1239,7 @@ std::tuple<Tensor,Tensor,Tensor> _convolution_double_backward( const c10::option
// the ConvForward kernels don't support asymmetric padding.
auto gW_size = gW.sizes();
auto w_size = weight.sizes();
for (const auto i : c10::irange(2, gW_size.size())) {
for (size_t i = 2; i < gW_size.size(); ++i) {
if (gW_size[i] > w_size[i]) {
gW = gW.narrow(i, 0, w_size[i]);
gW_size = gW.sizes();
@ -1268,7 +1268,7 @@ std::tuple<Tensor,Tensor,Tensor> _convolution_double_backward( const c10::option
// rather than narrowing the computed gI
auto gI_size = gI.sizes();
auto i_size = input.sizes();
for (const auto i : c10::irange(2, gI_size.size())) {
for (size_t i = 2; i < gI_size.size(); ++i) {
if (gI_size[i] > i_size[i]) {
gI = gI.narrow(i, 0, i_size[i]);
gI_size = gI.sizes();
@ -1289,7 +1289,7 @@ std::tuple<Tensor,Tensor,Tensor> _convolution_double_backward( const c10::option
gi_conv_params.output_padding[1] = input_shape[0] - expected_input_shape;
}
} else {
for (const auto i : c10::irange(kernel_size.size())) {
for(size_t i = 0; i < kernel_size.size(); ++i) {
// Check if whole input has been used or not
auto expected_input_shape = (kernel_size[i] - 1) * gi_conv_params.dilation[i]
- 2 * gi_conv_params.padding[i]

View File

@ -7,7 +7,6 @@
#include <ATen/div_rtn.h>
#include <ATen/native/CPUBlas.h>
#include <ATen/native/Unfold2d.h>
#include <c10/util/irange.h>
namespace at {
namespace native {
@ -300,7 +299,7 @@ void slow_conv2d_backward_out_cpu_template(
at::parallel_for(0, batch_size, 0, [&](int64_t start, int64_t end) {
auto fgrad_input = std::make_unique<scalar_t[]>(
c10::multiply_integers(finput.sizes().slice(1)));
for (const auto t : c10::irange(start, end)) {
for (int64_t t = start; t < end; t++) {
auto grad_input_t = grad_input_a[t];
auto grad_output_t = grad_output_a[t];
slow_conv2d_backward_update_grad_input_frame(
@ -479,7 +478,7 @@ std::tuple<Tensor&, Tensor&> slow_conv2d_forward_out_cpu(
auto weight_2d_a = weight_2d.accessor<scalar_t, 2>();
at::parallel_for(0, batch_size, 0, [&](int64_t start, int64_t end) {
for (const auto t : c10::irange(start, end)) {
for (int64_t t = start; t < end; t++) {
auto input_t = input_a[t];
auto output_t = output_a[t];
auto finput_t = finput_a[t];

View File

@ -6,7 +6,6 @@
#include <ATen/div_rtn.h>
#include <ATen/native/CPUBlas.h>
#include <ATen/native/Unfold3d.h>
#include <c10/util/irange.h>
constexpr int64_t CONV3D_GRAIN_SALT = 20;
@ -359,7 +358,7 @@ void slow_conv3d_backward_out_cpu_template(
auto fgrad_input_a = fgrad_input.accessor<scalar_t, 3>();
auto weight_2d_a = weight2d.accessor<scalar_t, 2>();
for (const auto t : c10::irange(start, end)) {
for (int64_t t = start; t < end; t++) {
auto grad_input_t = grad_input_a[t];
auto grad_output_t = grad_output_a[t];
auto fgrad_input_t = fgrad_input_a[t];
@ -463,7 +462,7 @@ static void slow_conv3d_backward_parameters_out_cpu_template(
auto grad_weight_2d_a = grad_weight_2d.accessor<scalar_t, 2>();
auto grad_output_a = grad_output_contiguous.accessor<scalar_t, 5>();
auto finput_a = finput.accessor<scalar_t, 3>();
for (const auto t : c10::irange(batch_size)) {
for (int64_t t = 0; t < batch_size; t++) {
auto grad_output_t = grad_output_a[t];
auto finput_t = finput_a[t];
slow_conv3d_backward_weight_frame(
@ -565,7 +564,7 @@ std::tuple<Tensor&, Tensor&, Tensor&> slow_conv3d_forward_out_cpu(const Tensor&
at::parallel_for(
0, batch_size, CONV3D_GRAIN_SALT, [&](int64_t start, int64_t end) {
for (const auto t : c10::irange(start, end)) {
for (int64_t t = start; t < end; t++) {
auto input_t = input_a[t];
auto output_t = output_a[t];
auto finput_t = finput_a[t];

View File

@ -1,6 +1,5 @@
#include <ATen/ATen.h>
#include <ATen/NativeFunctions.h>
#include <c10/util/irange.h>
#include <tuple>
namespace at {
@ -40,7 +39,7 @@ Tensor conv_tbc(const Tensor& self, const Tensor& weight, const Tensor& bias, in
weight_size[2],
}, self.options());
output.copy_(bias.expand(output.sizes()));
for (const auto k : c10::irange(kw)) {
for (int k = 0; k < kw; k++) {
int iShift = std::max(0, static_cast<int>(k - real_pad));
int oShift = std::max(0, static_cast<int>(real_pad - k));
// NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions)

View File

@ -12,7 +12,6 @@
#include <ATen/MemoryOverlap.h>
#include <ATen/NamedTensorUtils.h>
#include <ATen/Parallel.h>
#include <c10/util/irange.h>
#include <torch/library.h>
#ifdef USE_FBGEMM
@ -66,16 +65,16 @@ void copy_same_type_transpose_(Tensor& self, const Tensor& src) {
int nc = std::min(NC - C, BLOCK_SZ);
// 1. copy columns from src to buf
for (const auto c : c10::irange(nc)) {
for (int c = 0; c < nc; c++) {
memcpy(bp + c * BLOCK_SZ, spo + c * NR, nr * sizeof(scalar_t));
}
// 2. transpose buf in place
int rc_max = std::max(nr, nc);
int rc_min = std::min(nr, nc);
for (const auto r : c10::irange(rc_max)) {
for (int r = 0; r < rc_max; r++) {
int end = std::min(r, rc_min);
for (const auto c : c10::irange(end)) {
for (int c = 0; c < end; c++) {
scalar_t tmp = bp[r + BLOCK_SZ * c];
bp[r + BLOCK_SZ * c] = bp[r * BLOCK_SZ + c];
bp[r * BLOCK_SZ + c] = tmp;
@ -83,7 +82,7 @@ void copy_same_type_transpose_(Tensor& self, const Tensor& src) {
}
// 3. copy rows from buf to dst
for (const auto r : c10::irange(nr)) {
for (int r = 0; r < nr; r++) {
memcpy(rpo + r * NC, bp + r * BLOCK_SZ, nc * sizeof(scalar_t));
}
}

View File

@ -3,7 +3,6 @@
#include <ATen/NativeFunctions.h>
#include <ATen/native/Cross.h>
#include <c10/util/irange.h>
namespace at { namespace native {
@ -31,7 +30,7 @@ Tensor & cross_out(const Tensor & input, const Tensor & other, const c10::option
int64_t dim = -1;
if(!dimension.has_value()) {
for (const auto i : c10::irange(input.dim())) {
for(int64_t i = 0; i < input.dim(); i++) {
if(input.size(i) == 3) {
dim = i;
break;

View File

@ -5,7 +5,6 @@
#include <ATen/div_rtn.h>
#include <ATen/ATen.h>
#include <c10/util/irange.h>
#define TORCH_CHECK_DIM_SIZE(T, DIM, DIM_SIZE, SIZE) \
TORCH_CHECK( \
@ -44,7 +43,7 @@ std::vector<int64_t> get_output_size(
IntArrayRef pad_size,
IntArrayRef dilation_size) {
std::vector<int64_t> sizes;
for (const auto index : c10::irange(dim)) {
for (int index = 0; index < dim; index++) {
sizes.push_back(
div_rtn<int64_t>(
input.size(index + input.dim() - dim) + 2 * pad_size[index] -

View File

@ -3,7 +3,6 @@
#include <ATen/NamedTensorUtils.h>
#include <ATen/NativeFunctions.h>
#include <ATen/native/Pool.h>
#include <c10/util/irange.h>
#include <tuple>
@ -38,7 +37,8 @@ static void max_pool3d_with_indices_single_out_frame(
int dilationH)
{
at::parallel_for(0, nslices, 0, [&](int64_t start, int64_t end) {
for (const auto k : c10::irange(start, end)) {
for (auto k = start; k < end; k++)
{
/* loop over output */
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
int64_t i, j, ti;
@ -120,7 +120,8 @@ static void max_pool3d_with_indices_out_frame(
int dilationT, int dilationW, int dilationH)
{
at::parallel_for(0, nbatch, 0, [&](int64_t start, int64_t end) {
for (const auto p : c10::irange(start, end)) {
for (auto p = start; p < end; p++)
{
max_pool3d_with_indices_single_out_frame(
input_data + p * istride,
output_data + p * ostride,
@ -284,7 +285,8 @@ static void max_pool3d_with_indices_backward_single_out_frame(
int dilationH)
{
at::parallel_for(0, nslices, 0, [&](int64_t start, int64_t end) {
for (const auto k : c10::irange(start, end)) {
for (auto k = start; k < end; k++)
{
scalar_t *gradInput_p_k = gradInput_p + k * itime * iwidth * iheight;
scalar_t *gradOutput_p_k = gradOutput_p + k * otime * owidth * oheight;
int64_t *indz_p_k = indz_p + k * otime * owidth * oheight;
@ -328,7 +330,8 @@ static void max_pool3d_with_indices_backward_out_frame(
int dilationT, int dilationW, int dilationH)
{
at::parallel_for(0, nbatch, 0, [&](int64_t start, int64_t end) {
for (const auto p : c10::irange(start, end)) {
for (auto p = start; p < end; p++)
{
max_pool3d_with_indices_backward_single_out_frame<scalar_t>(
gradInput_data + p * istride,
gradOutput_data + p * ostride,

View File

@ -1,7 +1,6 @@
#include <ATen/ATen.h>
#include <ATen/Dispatch.h>
#include <ATen/NamedTensorUtils.h>
#include <c10/util/irange.h>
namespace at { namespace native {
@ -17,10 +16,8 @@ Tensor make_feature_noise(const Tensor& input) {
sizes.reserve(input.dim());
sizes.push_back(input_sizes[0]);
sizes.push_back(input_sizes[1]);
for (const auto i : c10::irange(2, input.dim())) {
(void)i; //Suppress unused variable warning
for (int64_t i = 2; i < input.dim(); ++i)
sizes.push_back(1);
}
return at::empty(sizes, input.options());
}

View File

@ -123,7 +123,7 @@ Tensor embedding_dense_backward_cpu(
auto parallel_section = [&](index_t start, index_t end) {
TensorIterator iter(add_iter);
for (const auto i : c10::irange(numel)) {
for (int64_t i = 0; i < numel; i++) {
if (indices_data[i] != padding_idx) {
index_t k = indices_data[i];
if (k >= start && k < end) {
@ -167,7 +167,7 @@ Tensor & embedding_renorm_cpu_(
// Note that we cannot use at::parallel_for here because we perform operations on
// Tensor inside the loop. See github.com/pytorch/pytorch/issues/28370 for more details.
for (const auto i : c10::irange(num_indices)) {
for (auto i = 0; i < num_indices; i++) {
if (i > 0 && sorted_indices[i] == sorted_indices[i - 1]) {
continue;
}

View File

@ -107,7 +107,7 @@ index_select_add(const Tensor &select_indices,
auto output_stride0 = output.strides()[0];
auto output_stride1 = output.strides()[1];
for (const auto i : c10::irange(numel)) {
for (int64_t i = 0; i < numel; i++) {
// We can skip indices equal to padding_idx so they are not included in
// the reduction
if (select_indices_data[i] != padding_idx) {
@ -247,7 +247,7 @@ index_select_add(const Tensor &select_indices,
auto output_stride0 = output.strides()[0];
auto output_stride1 = output.strides()[1];
auto numel = add_indices.numel();
for (const auto i : c10::irange(numel)) {
for (int64_t i = 0; i < numel; i++) {
// We can skip indices equal to padding_idx so they are not included in
// the reduction
if (select_indices_data[i] != padding_idx) {
@ -302,14 +302,14 @@ index_select_scale_add(const Tensor &select_indices,
auto* scale_data = scale.data_ptr<data_t>();
auto scale_stride = scale.strides()[0];
for (const auto i : c10::irange(numel)) {
for (int64_t i = 0; i < numel; i++) {
// We can skip indices equal to padding_idx so they are not included in
// the reduction
if (select_indices_data[i] != padding_idx) {
auto* src_base = src_data + src_stride0 * select_indices_data[i];
auto* output_base = output_data + output_stride0 * add_indices_data[i];
auto scale = scale_data[i * scale_stride];
for (const auto j : c10::irange(ddim)) {
for (int64_t j = 0; j < ddim; j++) {
output_base[j * output_stride1] += src_base[j * src_stride1] * scale;
}
} else if (bag_size.defined()) {
@ -419,14 +419,14 @@ index_select_scale_add(const Tensor &select_indices,
auto numel = add_indices.numel();
for (const auto i : c10::irange(numel)) {
for (int64_t i = 0; i < numel; i++) {
// We can skip indices equal to padding_idx so they are not included in
// the reduction
if (select_indices_data[i] != padding_idx) {
auto* src_base = src_data + src_stride0 * select_indices_data[i];
auto* output_base = output_data + output_stride0 * add_indices_data[i];
auto scale = scale_data[i * scale_stride];
for (const auto j : c10::irange(ddim)) {
for (int64_t j = 0; j < ddim; j++) {
output_base[j * output_stride1] += src_base[j * src_stride1] * scale;
}
} else if (bag_size.defined()) {

View File

@ -6,7 +6,6 @@
#include <ATen/native/TensorIterator.h>
#include <ATen/Utils.h>
#include <c10/util/accumulate.h>
#include <c10/util/irange.h>
namespace at {
namespace native {
@ -64,7 +63,7 @@ Tensor& fill_diagonal_(Tensor& self, const Scalar& fill_value, bool wrap) {
if (nDims > 2) {
int64_t dim1 = height;
for (const auto i : c10::irange(1, nDims)) {
for (int64_t i = 1; i < nDims; i++) {
if (self.size(i) != dim1) {
AT_ERROR("all dimensions of input must be of equal length");
}
@ -77,7 +76,7 @@ Tensor& fill_diagonal_(Tensor& self, const Scalar& fill_value, bool wrap) {
int64_t size = std::min(height, width);
int64_t stride = 0;
for (const auto i : c10::irange(nDims)) {
for (int64_t i = 0; i < nDims; i++) {
stride += self.stride(i);
}
strides.push_back(stride);

View File

@ -1,7 +1,6 @@
#include <ATen/ATen.h>
#include <ATen/NativeFunctions.h>
#include <ATen/Parallel.h>
#include <c10/util/irange.h>
#include <tuple>
#include <vector>
@ -33,7 +32,7 @@ TORCH_META_FUNC(fractional_max_pool2d) (
int64_t ndims = input.ndimension();
TORCH_CHECK(ndims == 3 || ndims == 4,
"fractional_max_pool2d(): Expected 3D or 4D tensor, but got: ", input.sizes());
for (const auto i : c10::irange(1, ndims)) {
for (int64_t i = 1; i < ndims; ++i) {
TORCH_CHECK(input.size(i) > 0,
"fractional_max_pool2d(): Expected input to have non-zero size for non-batch dimensions, but got",
input.sizes(), " with dimension ", i, " being empty.");
@ -107,7 +106,7 @@ static void fractional_max_pool2d_out_single_batch_frame(
int outputW, int outputH,
int poolSizeW, int poolSizeH) {
at::parallel_for(0, numPlanes, 0, [&](int64_t start, int64_t end) {
for (const auto plane : c10::irange(start, end)) {
for (auto plane = start; plane < end; ++plane) {
/* each plane contains 2 random samples, one for W and one for H */
scalar_t* randomSamplesForPlane = randomSamples + plane * 2;
@ -178,7 +177,7 @@ static void fractional_max_pool2d_out_frame(
return;
}
at::parallel_for(0, numBatch, 0, [&](int64_t start, int64_t end) {
for (const auto batch : c10::irange(start, end)) {
for (auto batch = start; batch < end; ++batch) {
fractional_max_pool2d_out_single_batch_frame<scalar_t>(
input + batch * numPlanes * inputH * inputW,
output + batch * numPlanes * outputH * outputW,
@ -255,7 +254,7 @@ static void fractional_max_pool2d_backward_out_single_batch_frame(
int inputW, int inputH,
int outputW, int outputH) {
at::parallel_for(0, numPlanes, 0, [&](int64_t start, int64_t end) {
for (const auto plane : c10::irange(start, end)) {
for (auto plane = start; plane < end; plane++) {
scalar_t* gradInputForPlane = gradInput + plane * inputW * inputH;
scalar_t* gradOutputForPlane = gradOutput + plane * outputW * outputH;
int64_t* indicesForPlane = indices + plane * outputW * outputH;
@ -292,7 +291,7 @@ static void fractional_max_pool2d_backward_out_frame(
return;
}
at::parallel_for(0, numBatch, 0, [&](int64_t start, int64_t end) {
for (const auto batch : c10::irange(start, end)) {
for (auto batch = start; batch < end; ++batch) {
fractional_max_pool2d_backward_out_single_batch_frame<scalar_t>(
gradInput + batch * numPlanes * inputH * inputW,
gradOutput + batch * numPlanes * outputH * outputW,

View File

@ -44,7 +44,7 @@ static void fractional_max_pool3d_out_single_batch_frame(
int64_t poolSizeT, int64_t poolSizeH, int64_t poolSizeW) {
at::parallel_for(0, numPlanes, 0, [&](int64_t start, int64_t end) {
for (const auto plane : c10::irange(start, end)) {
for (auto plane = start; plane < end; ++plane) {
/* each plane contains 3 random samples,
one for T, one for W, and one for H */
scalar_t* randomSamplesForPlane = randomSamples + plane * 3;
@ -126,7 +126,7 @@ static void fractional_max_pool3d_out_frame(
}
at::parallel_for(0, numBatch, 0, [&](int64_t start, int64_t end) {
for (const auto batch : c10::irange(start, end)) {
for (auto batch = start; batch < end; ++batch) {
fractional_max_pool3d_out_single_batch_frame<scalar_t>(
input + batch * numPlanes * inputW * inputH * inputT,
output + batch * numPlanes * outputW * outputH * outputT,
@ -171,7 +171,7 @@ void fractional_max_pool3d_out_cpu_template(
TORCH_CHECK(ndims == 4 || ndims == 5,
"fractional_max_pool3d_out(): Expected 4D or 5D tensor, but got: ",
input_.sizes());
for (const auto i : c10::irange(1, ndims)) {
for (int64_t i = 1; i < ndims; ++i) {
TORCH_CHECK(input_.size(i) > 0,
"fractional_max_pool3d_out(): Expected input to have non-zero size for non-batch dimensions, but got",
input_.sizes(), " with dimension ", i, " being empty.");
@ -243,7 +243,7 @@ static void fractional_max_pool3d_backward_out_single_batch_frame(
int64_t outputT, int64_t outputH, int64_t outputW) {
at::parallel_for(0, numPlanes, 0, [&](int64_t start, int64_t end) {
for (const auto plane : c10::irange(start, end)) {
for (auto plane = start; plane < end; plane++) {
scalar_t* gradInputForPlane = gradInput + plane * inputT * inputH * inputW;
scalar_t* gradOutputForPlane = gradOutput +
plane * outputT * outputH * outputW;
@ -284,7 +284,7 @@ static void fractional_max_pool3d_backward_out_frame(
}
at::parallel_for(0, numBatch, 0, [&](int64_t start, int64_t end) {
for (const auto batch : c10::irange(start, end)) {
for (auto batch = start; batch < end; ++batch) {
fractional_max_pool3d_backward_out_single_batch_frame<scalar_t>(
gradInput + batch * numPlanes * inputW * inputH * inputT,
gradOutput + batch * numPlanes * outputW * outputH * outputT,

View File

@ -9,7 +9,6 @@
#include <ATen/native/UpSample.h>
#include <ATen/native/cpu/GridSamplerKernel.h>
#include <c10/util/Exception.h>
#include <c10/util/irange.h>
namespace at { namespace native {
@ -52,12 +51,12 @@ namespace {
scalar_t *grid_ptr = grid.data_ptr<scalar_t>();
// loop over each output pixel
at::parallel_for(0, N, 0, [&](int64_t start, int64_t end) {
for (const auto n : c10::irange(start, end)) {
for (int64_t n = start; n < end; ++n) {
scalar_t *grid_ptr_N = grid_ptr + n * grid_sN;
scalar_t *inp_ptr_N = inp_ptr + n * inp_sN;
for (const auto d : c10::irange(out_D)) {
for (const auto h : c10::irange(out_H)) {
for (const auto w : c10::irange(out_W)) {
for (int64_t d = 0; d < out_D; ++d) {
for (int64_t h = 0; h < out_H; ++h) {
for (int64_t w = 0; w < out_W; ++w) {
// get the corresponding input x, y, z co-ordinates from grid
scalar_t *grid_ptr_NDHW = grid_ptr_N + d * grid_sD + h * grid_sH + w * grid_sW;
scalar_t ix = *grid_ptr_NDHW;
@ -223,12 +222,12 @@ namespace {
scalar_t *gGrid_ptr = grad_grid.data_ptr<scalar_t>();
// loop over each output pixel
at::parallel_for(0, N, 0, [&](int64_t start, int64_t end) {
for (const auto n : c10::irange(start, end)) {
for (int64_t n = start; n < end; ++n) {
scalar_t *grid_ptr_N = grid_ptr + n * grid_sN;
scalar_t *inp_ptr_N = inp_ptr + n * inp_sN;
scalar_t *gGrid_ptr_NDHW = gGrid_ptr + n * gGrid_sN;
for (const auto d : c10::irange(out_D)) {
for (const auto h : c10::irange(out_H)) {
for (int64_t d = 0; d < out_D; ++d) {
for (int64_t h = 0; h < out_H; ++h) {
for (int64_t w = 0; w < out_W; ++w, gGrid_ptr_NDHW += gGrid_sW /* grad_grid is contiguous */ ) {
// get the corresponding input x, y, z co-ordinates from grid
scalar_t *grid_ptr_NDHW = grid_ptr_N + d * grid_sD + h * grid_sH + w * grid_sW;
@ -417,11 +416,11 @@ Tensor _grid_sampler_2d_cpu_fallback(const Tensor& input, const Tensor& grid,
scalar_t *grid_ptr = grid.data_ptr<scalar_t>();
// loop over each output pixel
at::parallel_for(0, N, 0, [&](int64_t start, int64_t end) {
for (const auto n : c10::irange(start, end)) {
for (int64_t n = start; n < end; ++n) {
scalar_t *grid_ptr_N = grid_ptr + n * grid_sN;
scalar_t *inp_ptr_N = inp_ptr + n * inp_sN;
for (const auto h : c10::irange(out_H)) {
for (const auto w : c10::irange(out_W)) {
for (int64_t h = 0; h < out_H; ++h) {
for (int64_t w = 0; w < out_W; ++w) {
// get the corresponding input x, y, z co-ordinates from grid
scalar_t *grid_ptr_NHW = grid_ptr_N + h * grid_sH + w * grid_sW;
scalar_t x = *grid_ptr_NHW;
@ -506,7 +505,7 @@ Tensor _grid_sampler_2d_cpu_fallback(const Tensor& input, const Tensor& grid,
scalar_t coefficients[4];
// Interpolate 4 values in the x directon
for (const auto i : c10::irange(4)) {
for (int64_t i = 0; i < 4; ++i) {
coefficients[i] = cubic_interp1d<scalar_t>(
get_value_bounded<scalar_t>(inp_ptr_NC, ix_nw - 1, iy_nw - 1 + i, inp_W, inp_H, inp_sW, inp_sH, padding_mode, align_corners),
get_value_bounded<scalar_t>(inp_ptr_NC, ix_nw + 0, iy_nw - 1 + i, inp_W, inp_H, inp_sW, inp_sH, padding_mode, align_corners),
@ -579,11 +578,11 @@ _grid_sampler_2d_cpu_fallback_backward(const Tensor& grad_output,
scalar_t *gGrid_ptr = grad_grid.data_ptr<scalar_t>();
// loop over each output pixel
at::parallel_for(0, N, 0, [&](int64_t start, int64_t end) {
for (const auto n : c10::irange(start, end)) {
for (int64_t n = start; n < end; ++n) {
scalar_t *grid_ptr_N = grid_ptr + n * grid_sN;
scalar_t *inp_ptr_N = inp_ptr + n * inp_sN;
scalar_t *gGrid_ptr_NHW = gGrid_ptr + n * gGrid_sN;
for (const auto h : c10::irange(out_H)) {
for (int64_t h = 0; h < out_H; ++h) {
for (int64_t w = 0; w < out_W; ++w, gGrid_ptr_NHW += gGrid_sW /* grad_grid is contiguous */ ) {
// get the corresponding input x, y co-ordinates from grid
scalar_t *grid_ptr_NHW = grid_ptr_N + h * grid_sH + w * grid_sW;
@ -704,8 +703,8 @@ _grid_sampler_2d_cpu_fallback_backward(const Tensor& grad_output,
for (int64_t c = 0; c < C; ++c, gOut_ptr_NCHW += gOut_sC, gInp_ptr_NC += gInp_sC, inp_ptr_NC+= inp_sC) {
scalar_t gOut = *gOut_ptr_NCHW;
for (const auto i : c10::irange(4)) {
for (const auto j : c10::irange(4)) {
for (int64_t i = 0; i < 4; ++i) {
for (int64_t j = 0; j < 4; ++j) {
// set input gradient
add_value_bounded<scalar_t>(gInp_ptr_NC, ix_nw - 1 + i, iy_nw - 1 + j,
@ -858,7 +857,7 @@ Tensor grid_sampler(const Tensor& input, const Tensor& grid,
!(input.dim() == 5 && static_cast<GridSamplerInterpolation>(interpolation_mode) == GridSamplerInterpolation::Bicubic),
"grid_sampler(): bicubic interpolation only supports 4D input"
);
for (const auto i : c10::irange(2, input.dim())) {
for (int64_t i = 2; i < input.dim(); i++) {
TORCH_CHECK(input.size(i) > 0,
"grid_sampler(): expected input to have non-empty spatial dimensions, "
"but input has sizes ", input.sizes(), " with dimension ", i, " being "

View File

@ -5,7 +5,6 @@
#include <ATen/native/im2col.h>
#include <ATen/native/im2col_shape_check.h>
#include <c10/util/irange.h>
namespace at {
namespace native {
@ -92,7 +91,7 @@ static void im2col_out_cpu_template(
Tensor input_n;
Tensor output_n;
for (const auto elt : c10::irange(batch_size)) {
for (int64_t elt = 0; elt < batch_size; elt++) {
input_n = input.select(0, elt);
output_n = output.select(0, elt);

View File

@ -2,7 +2,6 @@
#include <ATen/ExpandUtils.h>
#include <ATen/native/TensorIterator.h>
#include <ATen/core/List.h>
#include <c10/util/irange.h>
#include <limits>
@ -32,7 +31,7 @@ static C10_UNUSED std::vector<Tensor> expandTensors(const Tensor & self, const t
}
// The sizes of the ByteTensor mask or bool tensor must match the sizes of the
// corresponding dimensions in self
for (const auto j : c10::irange(index.dim())) {
for (int64_t j = 0; j < index.dim(); j++) {
int64_t srcIdx = result.size() + j;
if (index.size(j) != self.size(srcIdx)) {
invalid_mask(self, srcIdx, index, j);
@ -40,7 +39,7 @@ static C10_UNUSED std::vector<Tensor> expandTensors(const Tensor & self, const t
}
// Replace with nonzeros
auto nonzero = index.nonzero();
for (const auto j : c10::irange(index.dim())) {
for (int64_t j = 0; j < index.dim(); j++) {
result.emplace_back(nonzero.select(1, j));
}
} else {

View File

@ -1158,7 +1158,7 @@ static void addbmm_impl_(
}
auto adjusted_beta(beta);
for (const auto batch : c10::irange(num_batches)) {
for (int64_t batch = 0; batch < num_batches; ++batch) {
result.addmm_(batch1[batch], batch2[batch], adjusted_beta, alpha);
adjusted_beta = 1; // accumulate output once
}
@ -1215,23 +1215,23 @@ inline void baddbmm_cpu_kernel(const Tensor& result, const Tensor& self, const T
int64_t grain_size = std::min(internal::GRAIN_SIZE / (is * js * ks), (int64_t)1);
parallel_for(0, bs, grain_size, [&](int64_t b_begin, int64_t b_end) {
for (const auto b : c10::irange(b_begin, b_end)) {
for (int64_t b = b_begin; b < b_end; b++) {
auto r1 = r0[b];
auto s1 = s0[b];
auto m1 = m0[b];
for (const auto i : c10::irange(is)) {
for (int64_t i = 0; i < is; i++) {
auto r2 = r1[i];
auto s2 = s1[i];
for (const auto j : c10::irange(js)) {
for (int64_t j = 0; j < js; j++) {
scalar_t &r = r2[j];
if (is_bmm) {
r = 0;
for (const auto k : c10::irange(ks)) {
for (int64_t k = 0; k < ks; k++) {
r += s2[k] * m1[k][j];
}
} else {
r *= beta;
for (const auto k : c10::irange(ks)) {
for (int64_t k = 0; k < ks; k++) {
r += alpha * s2[k] * m1[k][j];
}
}
@ -1994,11 +1994,10 @@ void compute_T18_scale_square(
auto mexp_scaled = at::native::compute_T18<scalar_t>(a_scaled);
auto s_cpu = (s.device().type() == at::kCPU)
? s : s.to(at::kCPU);
for (const auto i : c10::irange(mexp_scaled.size(0))) {
for (int64_t i = 0; i < mexp_scaled.size(0); ++i) {
auto s_val = s_cpu.select(0, i).template item<int64_t>();
auto mexp = mexp_scaled.select(0, i);
for (const auto p : c10::irange(s_val)) {
(void)p; //Suppress unused variable warning
for (int64_t p = 0; p < s_val; ++p) {
mexp = at::matmul(mexp, mexp);
}
mexp_out.select(0, i).copy_(mexp);
@ -2266,7 +2265,7 @@ Tensor& nuclear_norm_out(const Tensor& self, IntArrayRef dim, bool keepdim, Tens
// (e.g. [0, 1, 2, ..., ndim-1])
static std::vector<int64_t> make_dim_list(int64_t ndim) {
std::vector<int64_t> dim_list(ndim);
for (const auto ind : c10::irange(ndim)) {
for (int64_t ind = 0; ind < ndim; ind++) {
dim_list[ind] = ind;
}
return dim_list;
@ -2819,7 +2818,7 @@ struct KronImpl final {
a_reshape = c10::SmallVector<int64_t, 10>(2 * maxdim);
b_reshape = c10::SmallVector<int64_t, 10>(2 * maxdim);
result_reshape = c10::SmallVector<int64_t, 10>(maxdim);
for (const auto i : c10::irange(maxdim)) {
for (int64_t i = 0; i < maxdim; i++) {
a_reshape[2 * i] = (i >= pad_self ? self.sizes()[i - pad_self] : 1);
a_reshape[2 * i + 1] = 1;
b_reshape[2 * i] = 1;
@ -2834,7 +2833,7 @@ struct KronImpl final {
TORCH_INTERNAL_ASSERT(result.defined(), "Cannot call kron_out with an undefined result tensor as the out argument. Please allocate a Tensor before calling kron_out with it.");
c10::SmallVector<int64_t, 10> mul_shape(2 * maxdim);
for (const auto i : c10::irange(maxdim)) {
for (int64_t i = 0; i < maxdim; i++) {
mul_shape[2 * i] = a_reshape[2 * i];
mul_shape[2 * i + 1] = b_reshape[2 * i + 1];
}

View File

@ -1,7 +1,6 @@
#pragma once
#include <c10/core/ScalarType.h>
#include <c10/util/irange.h>
#include <ATen/ATen.h>
#include <ATen/ExpandUtils.h>
#include <ATen/TensorUtils.h>
@ -170,8 +169,7 @@ void batch_iterator_with_broadcasting(const Tensor& a, const Tensor& b, const fu
auto* b_batch_idx_ptr = data[0];
auto* a_batch_idx_ptr = data[1];
for (const auto elem : c10::irange(nelems)) {
(void)elem; //Suppress unused variable warning
for (int64_t elem = 0; elem < nelems; ++elem) {
auto b_curr_linear_batch_idx = *reinterpret_cast<int64_t*>(b_batch_idx_ptr);
auto a_curr_linear_batch_idx = *reinterpret_cast<int64_t*>(a_batch_idx_ptr);
@ -334,7 +332,7 @@ static inline Tensor _move_to_end(const Tensor& self, IntArrayRef axes) {
const int64_t ndim = self.ndimension();
std::vector<int64_t> perm;
for (const auto i : c10::irange(ndim)) {
for (int64_t i = 0; i < ndim; i++) {
auto it = std::find(a.begin(), a.end(), i);
if (it == a.end()) {
perm.push_back(i);
@ -478,7 +476,7 @@ static inline std::vector<int64_t> create_dim_backshift_permutation(int64_t dim0
"duplicate or invalid dimensions");
std::vector<int64_t> permutation(ndim);
int64_t cur_permuted_dim = 0;
for (const auto dim_ind : c10::irange(ndim)) {
for (int64_t dim_ind = 0; dim_ind < ndim; dim_ind++) {
if ((dim_ind != dim0) && (dim_ind != dim1)) {
permutation[cur_permuted_dim++] = dim_ind;
}
@ -495,7 +493,7 @@ static inline std::vector<int64_t> create_dim_backshift_permutation(int64_t dim0
static inline std::vector<int64_t> create_reverse_permutation(std::vector<int64_t> permutation) {
int64_t ndim = permutation.size();
std::vector<int64_t> reverse_permutation(ndim);
for (const auto dim_ind : c10::irange(ndim)) {
for (int64_t dim_ind = 0; dim_ind < ndim; dim_ind++) {
reverse_permutation[permutation[dim_ind]] = dim_ind;
}
return reverse_permutation;

View File

@ -11,7 +11,6 @@
#include <ATen/Parallel.h>
#include <ATen/TensorUtils.h>
#include <ATen/native/Fill.h>
#include <c10/util/irange.h>
#include <numeric>
#include <type_traits>
@ -61,7 +60,7 @@ std::tuple<Tensor, Tensor> ctc_loss_cpu_template(const Tensor& log_probs, const
std::vector<int64_t> tg_batch_offsets(batch_size);
if (targets.dim() == 1) { // concatenated targets
int64_t pos = 0;
for (const auto i : c10::irange(batch_size)) {
for (int64_t i = 0; i < batch_size; i++) {
tg_batch_offsets[i] = pos;
pos += target_lengths[i];
if (max_target_length < target_lengths[i])
@ -73,7 +72,7 @@ std::tuple<Tensor, Tensor> ctc_loss_cpu_template(const Tensor& log_probs, const
else { // batch x max_target_length
// dim is 2
int64_t tg_batch_stride = targets.stride(0);
for (const auto i : c10::irange(batch_size)) {
for (int64_t i = 0; i < batch_size; i++) {
tg_batch_offsets[i] = i * tg_batch_stride;
if (max_target_length < target_lengths[i])
max_target_length = target_lengths[i];
@ -85,7 +84,7 @@ std::tuple<Tensor, Tensor> ctc_loss_cpu_template(const Tensor& log_probs, const
" (while checking arguments for ", c, ")");
}
int64_t max_input_length = log_probs.size(0);
for (const auto b : c10::irange(batch_size)) {
for (int64_t b = 0; b < batch_size; b++) {
TORCH_CHECK(input_lengths[b] <= max_input_length,
"Expected input_lengths to have value at most ", max_input_length, ", but got value ", input_lengths[b],
" (while checking arguments for ", c, ")");
@ -104,7 +103,7 @@ std::tuple<Tensor, Tensor> ctc_loss_cpu_template(const Tensor& log_probs, const
// first the default
log_alpha.narrow(1, 0, 1).fill_(neginf);
at::parallel_for(0, batch_size, 0, [&](int64_t start, int64_t end) {
for (const auto b : c10::irange(start, end)) {
for (int64_t b = start; b < end; b++) {
int64_t input_length = input_lengths[b];
int64_t target_length = target_lengths[b];
auto log_probs_a = log_probs_a_global[b];
@ -117,7 +116,7 @@ std::tuple<Tensor, Tensor> ctc_loss_cpu_template(const Tensor& log_probs, const
log_alpha_a[0][1] = log_probs_a[0][get_target_prime(targets_data, tg_batch_offset, tg_target_stride, 1, BLANK)];
// now the loop over the inputs
for (const auto t : c10::irange(1, input_length)) {
for (int64_t t=1; t<input_length; t++) {
for (int64_t s=0; s<2*target_length+1; s++) {
auto current_target_prime = get_target_prime(targets_data, tg_batch_offset, tg_target_stride, s, BLANK);
// this loop over s could be parallel/vectorized, too, but the required items are one index apart
@ -190,7 +189,7 @@ Tensor ctc_loss_backward_cpu_template(const Tensor& grad_out, const Tensor& log_
if (targets.dim() == 1) { // concatenated targets
int64_t pos = 0;
max_target_length = 0;
for (const auto i : c10::irange(batch_size)) {
for (int64_t i = 0; i < batch_size; i++) {
tg_batch_offsets[i] = pos;
pos += target_lengths[i];
if (max_target_length < target_lengths[i])
@ -201,7 +200,7 @@ Tensor ctc_loss_backward_cpu_template(const Tensor& grad_out, const Tensor& log_
else { // batch x max_target_length
// dim is 2
int64_t tg_batch_stride = targets.stride(0);
for (const auto i : c10::irange(batch_size)) {
for (int64_t i = 0; i < batch_size; i++) {
tg_batch_offsets[i] = i * tg_batch_stride;
}
tg_target_stride = targets.stride(1);
@ -235,7 +234,7 @@ Tensor ctc_loss_backward_cpu_template(const Tensor& grad_out, const Tensor& log_
TensorIterator fill_1d_iter_local(fill_1d_iter);
TensorIterator fill_log_beta_1d_iter_local(fill_log_beta_1d_iter);
for (const auto b : c10::irange(start, end)) {
for (int64_t b = start; b < end; b++) {
scalar_t nll = neg_log_likelihood.accessor<scalar_t, 1>()[b];
auto grad_a = grad_a_global[b];
if (zero_infinity && nll == std::numeric_limits<scalar_t>::infinity()) {
@ -323,8 +322,8 @@ Tensor ctc_loss_backward_cpu_template(const Tensor& grad_out, const Tensor& log_
// this could be a great target for further vectorization.
// grad is the output gradient, nll is the loss. Note that the likelihood -nll is the Z of eq (16)
scalar_t gr = grad_out.accessor<scalar_t, 1>()[b];
for (const auto t : c10::irange(input_length)) { // or go for the full thing?
for (const auto c : c10::irange(num_labels)) {
for (int64_t t = 0; t < input_length; t++) { // or go for the full thing?
for (int64_t c = 0; c < num_labels; c++) {
scalar_t& res = grad_a[t][c];
scalar_t lp = log_probs_a[t][c];
res = (std::exp(lp)-std::exp(res + nll - lp)) * gr;

View File

@ -3,7 +3,6 @@
#include <ATen/Dispatch.h>
#include <ATen/TensorUtils.h>
#include <ATen/native/LossMulti.h>
#include <c10/util/irange.h>
namespace at {
namespace native {
@ -18,21 +17,21 @@ inline scalar_t multilabel_margin_loss_forward_inner_sum_cpu(
int64_t dim) {
using accscalar_t = at::acc_type<scalar_t, false>;
accscalar_t sum = 0;
for (const auto ddt : c10::irange(dim)) {
for (int64_t ddt = 0; ddt < dim; ddt++) {
int64_t target_idx = target_data[ddt];
if (target_idx < 0) {
break;
}
is_target_data[target_idx] = 1;
}
for (const auto dt : c10::irange(dim)) {
for (int64_t dt = 0; dt < dim; dt++) {
int64_t target_idx = target_data[dt];
if (target_idx < 0) {
break;
}
scalar_t input_target = input_data[target_idx];
for (const auto d : c10::irange(dim)) {
for (int64_t d = 0; d < dim; d++) {
if (!is_target_data[d]) {
scalar_t z = 1 - input_target + input_data[d];
if (z > 0) {
@ -64,8 +63,7 @@ static void multilabel_margin_loss_forward_out_frame(
accscalar_t sum = 0;
for (const auto t : c10::irange(nframe)) {
(void)t; //Suppress unused variable warning
for (int64_t t = 0; t < nframe; t++) {
sum += multilabel_margin_loss_forward_inner_sum_cpu(
input_data, target_data, is_target_data, dim);
@ -83,7 +81,7 @@ static void multilabel_margin_loss_forward_out_frame(
} else {
auto output_acc = output.accessor<scalar_t, 1>();
for (const auto t : c10::irange(nframe)) {
for (int64_t t = 0; t < nframe; t++) {
scalar_t sum = multilabel_margin_loss_forward_inner_sum_cpu(
input_data, target_data, is_target_data, dim);
@ -173,16 +171,15 @@ static void multilabel_margin_loss_backward_out_frame(
reduction == Reduction::Mean ? 1. / (nframe * dim) : 1. / dim);
scalar_t* grad_input_row_data = grad_input.data_ptr<scalar_t>();
for (const auto t : c10::irange(nframe)) {
(void)t; //Suppress unused variable warning
for (const auto dt : c10::irange(dim)) {
for (int64_t t = 0; t < nframe; t++) {
for (int64_t dt = 0; dt < dim; dt++) {
int64_t target_idx = target_data[dt];
if (target_idx < 0) {
break;
}
scalar_t input_target = input_data[target_idx];
for (const auto d : c10::irange(dim)) {
for (int64_t d = 0; d < dim; d++) {
if (!is_target_data[d]) {
scalar_t z = 1 - input_target + input_data[d];
if (z > 0) {
@ -209,8 +206,8 @@ static void multilabel_margin_loss_backward_out_frame(
} else {
check_dim_size(grad_output, 1, 0, nframe);
auto grad_output_acc = grad_output.accessor<scalar_t, 1>();
for (const auto t : c10::irange(nframe)) {
for (const auto d : c10::irange(dim)) {
for (int64_t t = 0; t < nframe; t++) {
for (int64_t d = 0; d < dim; d++) {
grad_input_data[t * dim + d] *= grad_output_acc[t];
}
}

View File

@ -2,7 +2,6 @@
#include <ATen/Dispatch.h>
#include <ATen/AccumulateType.h>
#include <ATen/native/LossMulti.h>
#include <c10/util/irange.h>
namespace at {
namespace native {
@ -19,7 +18,7 @@ inline scalar_t multi_margin_inner_sum_cpu(
const int64_t target_idx) {
const scalar_t input_target = input_data[target_idx];
scalar_t sum = 0;
for (const auto d : c10::irange(dim)) {
for (int64_t d = 0; d < dim; d++) {
if (d == target_idx) {
continue;
}
@ -64,7 +63,7 @@ static inline void multi_margin_loss_cpu_kernel(
// cannot be handled by TensorAccessor)
if (reduction == Reduction::None && output.dim() > 0) {
auto output_acc = output.accessor<scalar_t, 1>();
for (const auto t : c10::irange(nframe)) {
for (int64_t t = 0; t < nframe; t++) {
const auto idx = target_index_checked(target_data, t, dim);
auto sum = multi_margin_inner_sum_cpu(
input_data, weight_data, p, margin, dim, idx);
@ -74,7 +73,7 @@ static inline void multi_margin_loss_cpu_kernel(
} else {
accscalar_t sum = 0;
auto output_acc = output.data_ptr<scalar_t>();
for (const auto t : c10::irange(nframe)) {
for (int64_t t = 0; t < nframe; t++) {
const auto idx = target_index_checked(target_data, t, dim);
sum += multi_margin_inner_sum_cpu(
input_data, weight_data, p, margin, dim, idx);
@ -150,11 +149,11 @@ static void multi_margin_loss_backward_cpu_kernel(
int64_t dim,
int64_t reduction) {
scalar_t* grad_input_row_data = grad_input_data;
for (const auto t : c10::irange(nframe)) {
for (int64_t t = 0; t < nframe; t++) {
int64_t target_idx = target_index_checked(target_data, t, dim);
scalar_t input_target = input_data[target_idx];
scalar_t grad_input_target = 0;
for (const auto d : c10::irange(dim)) {
for (int64_t d = 0; d < dim; d++) {
scalar_t z = margin - input_target + input_data[d];
if (d == target_idx) {
continue;
@ -187,8 +186,8 @@ static void multi_margin_loss_backward_cpu_kernel(
}
} else {
auto grad_output_acc = grad_output.accessor<scalar_t, 1>();
for (const auto t : c10::irange(nframe)) {
for (const auto d : c10::irange(dim)) {
for (int64_t t = 0; t < nframe; t++) {
for (int64_t d = 0; d < dim; d++) {
grad_input_data[t * dim + d] *= grad_output_acc[t];
}
}

View File

@ -9,7 +9,6 @@
#include <c10/util/SmallBuffer.h>
#include <c10/core/TensorOptions.h>
#include <c10/util/irange.h>
namespace at {
namespace meta {
@ -156,7 +155,7 @@ static void nll_loss_out_frame(
auto output_acc = output.accessor<scalar_t, 1>();
at::parallel_for(0, batch_size, 0, [&](int64_t start, int64_t end) {
for (const auto i : c10::irange(start, end)) {
for (auto i = start; i < end; i++) {
const auto cur_target = target_acc[i];
if (cur_target == ignore_index) {
@ -216,7 +215,7 @@ static void nll_loss_out_frame(
scalar_t weight_partial_sums[cascade_sum_num_levels] = {0};
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
scalar_t loss_partial_sums[cascade_sum_num_levels] = {0};
for (const auto b : c10::irange(batch_size)) {
for (int64_t b = 0; b < batch_size; b++) {
const int64_t cur_target = target_data[b];
if (cur_target == ignore_index) {
++num_ignored;
@ -331,7 +330,7 @@ static void nll_loss_backward_out_frame(
auto grad_input_acc = grad_input.accessor<scalar_t, 2>();
auto grad_output_acc = grad_output.accessor<scalar_t, 1>();
at::parallel_for(0, batch_size, 0, [&](int64_t start, int64_t end) {
for (const auto i : c10::irange(start, end)) {
for (auto i = start; i < end; i++) {
auto cur_target = target_acc[i];
if (cur_target == ignore_index) {
continue;

View File

@ -5,7 +5,6 @@
#include <ATen/TensorUtils.h>
#include <ATen/native/cpu/utils.h>
#include <ATen/native/Resize.h>
#include <c10/util/irange.h>
namespace at {
namespace native {
@ -110,9 +109,9 @@ static void nll_loss2d_forward_out_frame(
auto target_acc = target.accessor<int64_t, 3>();
at::parallel_for(0, batch_size, 0, [&](int64_t start, int64_t end) {
for (const auto b : c10::irange(start, end)) {
for (const auto h : c10::irange(H)) {
for (const auto w : c10::irange(W)) {
for (int64_t b = start; b < end; b++) {
for (int64_t h = 0; h < H; h++) {
for (int64_t w = 0; w < W; w++) {
const int64_t cur_target = (int64_t)target_acc[b][h][w];
if (cur_target == ignore_index) {
@ -177,8 +176,8 @@ static void nll_loss2d_forward_out_frame(
const int64_t level_mask = level_step - 1;
int64_t num_ignored = 0;
for (const auto b : c10::irange(batch_size)) {
for (const auto elem : c10::irange(map_size)) {
for (int64_t b = 0; b < batch_size; b++) {
for (int64_t elem = 0; elem < map_size; elem++) {
const int64_t cur_target = target_data[b * map_size + elem];
if (cur_target == ignore_index) {
++num_ignored;
@ -287,9 +286,9 @@ static void nll_loss2d_backward_out_frame(
auto target_acc = target.accessor<int64_t, 3>();
at::parallel_for(0, batch_size, 0, [&](int64_t start, int64_t end) {
for (const auto b : c10::irange(start, end)) {
for (const auto h : c10::irange(H)) {
for (const auto w : c10::irange(W)) {
for (int64_t b = start; b < end; b++) {
for (int64_t h = 0; h < H; h++) {
for (int64_t w = 0; w < W; w++) {
const int64_t cur_target = target_acc[b][h][w];
if (cur_target == ignore_index) {
continue;
@ -330,8 +329,8 @@ static void nll_loss2d_backward_out_frame(
: grad_output_value);
at::parallel_for(0, batch_size, 0, [&](int64_t start, int64_t end) {
for (const auto b : c10::irange(start, end)) {
for (const auto elem : c10::irange(map_size)) {
for (int64_t b = start; b < end; b++) {
for (int64_t elem = 0; elem < map_size; elem++) {
const int64_t t = target_data[b * map_size + elem];
if (t != ignore_index) {

View File

@ -60,7 +60,6 @@ bool _nnpack_available() {
#include <caffe2/utils/threadpool/pthreadpool-cpp.h>
#include <ATen/native/ConvUtils.h>
#include <ATen/Parallel.h>
#include <c10/util/irange.h>
namespace at {
namespace native {
@ -239,7 +238,7 @@ Tensor _nnpack_spatial_convolution(
const size_t input_size_per_batch = input_channels * input_size.width * input_size.height;
const size_t output_size_per_batch = output_channels * output_size.width * output_size.height;
for (const auto batch : c10::irange(0u, batch_size)) {
for (size_t batch = 0u; batch < batch_size; ++batch) {
const nnp_status status = nnp_convolution_inference(
algorithm,
nnp_convolution_transform_strategy_compute,

View File

@ -100,7 +100,7 @@ Tensor refine_names(const Tensor& self, DimnameList names) {
self_names.size(), " and ", names.size(), " respectively).");
check_names_valid_for(self, names);
for (const auto idx : c10::irange(self_names.size())) {
for (size_t idx = 0; idx < self_names.size(); idx++) {
const auto& self_name = self_names[idx];
const auto& out_name = names[idx];
if (self_name == out_name || self_name.isWildcard()) {
@ -221,7 +221,7 @@ Tensor align_to(const Tensor& tensor, DimnameList order, int64_t ellipsis_idx) {
};
// Fill in the non-ellipsis dimensions
for (const auto order_idx : c10::irange(0U, order.size())) {
for (auto order_idx = 0U; order_idx < order.size(); ++order_idx) {
auto out_idx = order_idx;
if (order_idx >= ellipsis_idx) {
out_idx = order_idx + num_ellipsis_names;

View File

@ -10,7 +10,6 @@
#include <ATen/native/cpu/Loops.h>
#include <ATen/native/batch_norm.h>
#include <ATen/native/Normalization.h>
#include <c10/util/irange.h>
#include <vector>
@ -157,7 +156,7 @@ std::tuple<Tensor,Tensor> batch_norm_cpu_update_stats_template(
// Reduce all dimensions except dim=1
DimVector reduce_dims(ndim - 1);
reduce_dims[0] = 0;
for (const auto i : c10::irange(2, ndim)) {
for (int64_t i = 2; i < ndim; ++i) {
reduce_dims[i - 1] = i;
}
@ -179,7 +178,7 @@ std::tuple<Tensor,Tensor> batch_norm_cpu_update_stats_template(
batch_norm_cpu_collect_stats_stub(kCPU, _mean, _var_sum, input);
parallel_for(0, n_input, 1, [&](int64_t b_begin, int64_t b_end) {
for (const auto f : c10::irange(b_begin, b_end)) {
for (int64_t f = b_begin; f < b_end; ++f) {
save_mean_a[f] = _mean_a[f];
save_var_transform_a[f] = VarTransform<accscalar_t>{}(_var_sum_a[f] / n, eps);
@ -207,7 +206,7 @@ std::tuple<Tensor,Tensor> batch_norm_cpu_update_stats_template(
parallel_for(0, n_input, 1, [&](int64_t b_begin, int64_t b_end) {
TensorIterator iter(reduce_iter);
for (const auto f : c10::irange(b_begin, b_end)) {
for (int64_t f = b_begin; f < b_end; ++f) {
// compute variance per input
iter.unsafe_replace_operand(0, in_data + channel_stride * f);
accscalar_t var_sum = 0;
@ -284,7 +283,7 @@ std::tuple<Tensor, Tensor, Tensor> batch_norm_backward_cpu_template(
// Reduce all dimensions except dim=1
DimVector reduce_dims(ndim - 1);
reduce_dims[0] = 0;
for (const auto i : c10::irange(2, ndim)) {
for (int64_t i = 2; i < ndim; ++i) {
reduce_dims[i - 1] = i;
}
@ -331,7 +330,7 @@ std::tuple<Tensor, Tensor, Tensor> batch_norm_backward_cpu_template(
TensorIterator unary_iter_local(unary_iter);
TensorIterator binary_iter_local(binary_iter);
for (const auto f : c10::irange(b_begin, b_end)) {
for (int64_t f = b_begin; f < b_end; ++f) {
scalar_t w = weight.defined() ? weight_a[f] : 1;
scalar_t mean, invstd;

View File

@ -77,7 +77,7 @@ std::tuple<Tensor, Tensor> _pack_padded_sequence(const Tensor& _input, const Ten
// more elements below in our column, we lower the counter (prev_l), and append the new
// block to the output.
int64_t prev_l = 0;
for (const auto i : c10::irange(batch_size)) {
for (int64_t i = 0; i < batch_size; ++i) {
int64_t l = lengths[batch_size - 1 - i];
if (l > prev_l) {
auto current_batch_size = batch_size - i;
@ -109,7 +109,7 @@ Tensor _pack_padded_sequence_backward(const Tensor& grad, at::IntArrayRef input_
int64_t offset = 0;
int64_t max_seq_len = batch_sizes_t.size(0);
int64_t * batch_sizes = batch_sizes_t.data_ptr<int64_t>();
for (const auto i : c10::irange(max_seq_len)) {
for (int64_t i = 0; i < max_seq_len; ++i) {
grad_input[i].slice(0, 0, batch_sizes[i]).copy_(grad.slice(0, offset, offset + batch_sizes[i]));
offset += batch_sizes[i];
}
@ -170,8 +170,7 @@ std::tuple<Tensor, Tensor> _pad_packed_sequence(const Tensor& data, const Tensor
}
int64_t dec = prev_batch_size - batch_size;
if (dec > 0) {
for (const auto j : c10::irange(dec)) {
(void)j; //Suppress unused variable warning
for (int64_t j = 0; j < dec; ++j) {
(*lengths--) = i;
}
}
@ -207,7 +206,7 @@ Tensor pad_sequence(TensorList sequences, bool batch_first, double padding_value
out_dims.insert(out_dims.end(), trailing_dims.begin(), trailing_dims.end());
Tensor out = at::full(out_dims, padding_value, sequences[0].options());
for (const auto i : c10::irange(sequences_size)) {
for (int64_t i = 0; i < sequences_size; i++) {
const Tensor currseq = sequences[i];
const int64_t length_i = currseq.size(0);
// use index notation to prevent duplicate references to the tensor

View File

@ -2,7 +2,6 @@
#include <ATen/NativeFunctions.h>
#include <ATen/div_rtn.h>
#include <ATen/native/DispatchStub.h>
#include <c10/util/irange.h>
#pragma once
@ -213,7 +212,7 @@ pool3d_shape_check(
TORCH_CHECK(ndim == 4 || ndim == 5,
fn_name, ": Expected 4D or 5D tensor for input, but got: ", input.sizes());
for (const auto i : c10::irange(1, ndim)) {
for (int64_t i = 1; i < ndim; ++i) {
TORCH_CHECK(input.size(i) > 0,
fn_name, "Expected input to have non-zero size for non-batch dimensions, but got",
input.sizes(), " with dimension ", i, " being empty.");

View File

@ -206,9 +206,9 @@ void CalcColOffsetsTranspose(
const int8_t* Bint8,
int32_t B_zero_point,
int32_t* col_offsets) {
for (const auto i : c10::irange(N)) {
for (int i = 0; i < N; ++i) {
int32_t sum = 0;
for (const auto j : c10::irange(K)) {
for (int j = 0; j < K; ++j) {
sum += Bint8[i * K + j];
}
col_offsets[i] = sum - B_zero_point * K;
@ -353,7 +353,7 @@ bool CheckAndSaturate(T max_val, T* element) {
void HandleWeightsSaturation(int64_t N, float* weight) {
const float kFp16Max = RawUint16ToFp16(0x7BFF);
bool found_out_of_range = false;
for (const auto i : c10::irange(N)) {
for (int64_t i = 0; i < N; ++i) {
if (CheckAndSaturate<float>(kFp16Max, weight + i)) {
found_out_of_range = true;
}

Some files were not shown because too many files have changed in this diff Show More