use irange for loops 2 (#66746)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/66746

Modified loops in files under fbsource/fbcode/caffe2/ from the format

`for(TYPE var=x0;var<x_max;x++)`

to the format

`for(const auto var: irange(xmax))`

This was achieved by running r-barnes's loop upgrader script (D28874212) with some modification to exclude all files under /torch/jit and a number of reversions or unused variable suppression warnings added by hand.

Test Plan: Sandcastle

Reviewed By: malfet

Differential Revision: D31705361

fbshipit-source-id: 33fd22eb03086d114e2c98e56703e8ec84460268
This commit is contained in:
Richard Barnes
2021-12-10 04:24:48 -08:00
committed by Facebook GitHub Bot
parent 91d16cb633
commit 29d759948e
96 changed files with 19710 additions and 19683 deletions

View File

@ -303,7 +303,7 @@ at::Tensor PackedLinearWeightsQnnp::apply_impl(
w_zero_points[0]);
auto* qnnp_w_data = qnnp_weight.data_ptr<c10::quint8>();
auto wt_numel = weight_contig.numel();
for (int i = 0; i < wt_numel; ++i) {
for (const auto i : c10::irange(wt_numel)) {
qnnp_w_data[i] = static_cast<c10::quint8>(w_data[i] + 128);
}
// Original bias was float, so we requantize it here.

View File

@ -301,7 +301,7 @@ at::Tensor PackedLinearWeightsQnnp::apply_dynamic_impl(
auto* qnnp_w_data = qnnp_weight.data_ptr<c10::quint8>();
int8_t* w_data = (int8_t*)weight_contig.data_ptr<c10::qint8>();
auto wt_numel = weight_contig.numel();
for (int i = 0; i < wt_numel; ++i) {
for (const auto i : c10::irange(wt_numel)) {
qnnp_w_data[i] = static_cast<c10::quint8>(w_data[i] + 128);
}

View File

@ -9,6 +9,7 @@
#include <ATen/native/quantized/cpu/quantized_ops.h>
#include <ATen/native/quantized/cpu/init_qnnpack.h>
#include <ATen/native/quantized/cpu/qnnpack_utils.h>
#include <c10/util/irange.h>
#include <caffe2/utils/threadpool/pthreadpool-cpp.h>
#include <algorithm>
@ -43,7 +44,7 @@ void spatial_dilated_max_pooling(
int64_t dW, // dilation
T* oData) { // output arrays (data and max-index)
at::parallel_for(0, iC, 0, [&](int64_t start, int64_t end) {
for (auto p = start; p < end; ++p) {
for (const auto p : c10::irange(start, end)) {
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
int64_t row, col;
const T* i_p = iData + p * iW * iH;
@ -195,7 +196,7 @@ Tensor q_maxpool_2d(
oData);
} else {
at::parallel_for(0, nbatch, 0, [&](int64_t start, int64_t end) {
for (auto p = start; p < end; ++p) {
for (const auto p : c10::irange(start, end)) {
auto* iData = qxd + p * iC * iW * iH;
auto* oData = qyd + p * oC * oW * oH;
spatial_dilated_max_pooling<Q>(

View File

@ -6,6 +6,7 @@
#include <ATen/native/quantized/cpu/init_qnnpack.h>
#include <ATen/native/quantized/cpu/qnnpack_utils.h>
#include <ATen/native/quantized/cpu/quantized_ops.h>
#include <c10/util/irange.h>
#include <caffe2/utils/threadpool/pthreadpool-cpp.h>
#include <torch/library.h>
@ -30,7 +31,7 @@ Tensor qnnpack_relu(Tensor input) {
initQNNPACK();
size_t num_elems = 1;
for (int i = 1; i < input_contig.ndimension(); ++i) {
for (const auto i : c10::irange(1, input_contig.ndimension())) {
num_elems *= input_contig.size(i);
}

View File

@ -7,6 +7,7 @@
#include <ATen/native/quantized/cpu/quantized_ops.h>
#include <ATen/native/quantized/cpu/init_qnnpack.h>
#include <ATen/native/quantized/cpu/qnnpack_utils.h>
#include <c10/util/irange.h>
#include <caffe2/utils/threadpool/pthreadpool-cpp.h>
#include <algorithm>
@ -26,7 +27,7 @@ Tensor qnnpack_sigmoid(
Tensor input_contig = input.contiguous(input.suggest_memory_format());
size_t num_elems = 1;
for (int i = 1; i < input_contig.ndimension(); ++i) {
for (const auto i : c10::irange(1, input_contig.ndimension())) {
num_elems *= input_contig.size(i);
}

View File

@ -7,6 +7,7 @@
#include <ATen/native/quantized/cpu/quantized_ops.h>
#include <ATen/native/quantized/cpu/init_qnnpack.h>
#include <ATen/native/quantized/cpu/qnnpack_utils.h>
#include <c10/util/irange.h>
#include <caffe2/utils/threadpool/pthreadpool-cpp.h>
#include <algorithm>
@ -29,7 +30,7 @@ Tensor qnnpack_tanh(Tensor input) {
Tensor input_contig = input.contiguous(input.suggest_memory_format());
size_t num_elems = 1;
for (int i = 1; i < input_contig.ndimension(); ++i) {
for (const auto i : c10::irange(1, input_contig.ndimension())) {
num_elems *= input_contig.size(i);
}
const auto zero_point = input_contig.q_zero_point();

View File

@ -1,6 +1,7 @@
#pragma once
#include <ATen/ATen.h>
#include <c10/util/irange.h>
#include <algorithm>
#include <cmath>
@ -193,7 +194,7 @@ static C10_UNUSED torch::List<int64_t> MakeArgForConv1d(const torch::List<int64_
inline void HandleWeightsSaturation(int64_t N, float* weight) {
const float kFp16Max = RawUint16ToFp16(0x7BFF);
bool found_out_of_range = false;
for (int64_t i = 0; i < N; ++i) {
for (const auto i : c10::irange(N)) {
bool saturate = CheckAndSaturate<float>(kFp16Max, weight + i);
if (saturate) {
found_out_of_range = true;

View File

@ -2,6 +2,7 @@
#include <ATen/native/UpSample.h>
#include <ATen/native/quantized/affine_quantizer.h>
#include <ATen/native/quantized/cpu/quantized_ops.h>
#include <c10/util/irange.h>
#include <algorithm>
#include <cmath>
@ -57,7 +58,7 @@ static void upsample_bilinear2d_out_frame(
const int64_t input_q_zero_point = input.q_zero_point();
const int64_t output_q_zero_point = output.q_zero_point();
for (int64_t h2 = 0; h2 < output_height; ++h2) {
for (const auto h2 : c10::irange(output_height)) {
const auto h1r = area_pixel_compute_source_index<float>(
rheight, h2, align_corners, /*cubic=*/false);
@ -67,7 +68,7 @@ static void upsample_bilinear2d_out_frame(
const float h1lambda = h1r - h1;
const float h0lambda = static_cast<float>(1.) - h1lambda;
for (int64_t w2 = 0; w2 < output_width; ++w2) {
for (const auto w2 : c10::irange(output_width)) {
const auto w1r = area_pixel_compute_source_index<float>(
rwidth, w2, align_corners, /*cubic=*/false);
@ -79,7 +80,8 @@ static void upsample_bilinear2d_out_frame(
const typename scalar_t::underlying* pos1 = i_p + h1 * input_width + w1;
typename scalar_t::underlying* pos2 = o_p + h2 * output_width + w2;
for (int64_t c = 0; c < channels; ++c) {
for (const auto c : c10::irange(channels)) {
(void)c; //Suppress unused variable warning
float result = h0lambda * (w0lambda * pos1[0] + w1lambda * pos1[w1p]) +
h1lambda *
(w0lambda * pos1[h1p * input_width] +

View File

@ -44,18 +44,19 @@ static void upsample_nearest2d_out_frame(
return;
}
for (int64_t h2 = 0; h2 < output_height; ++h2) {
for (const auto h2 : c10::irange(output_height)) {
const int64_t h1 =
nn_compute_source_index_fn(height_scale, h2, input_height);
for (int64_t w2 = 0; w2 < output_width; ++w2) {
for (const auto w2 : c10::irange(output_width)) {
const int64_t w1 =
nn_compute_source_index_fn(width_scale, w2, input_width);
const auto* pos1 = &i_p[h1 * input_width + w1];
auto* pos2 = &o_p[h2 * output_width + w2];
for (int64_t c = 0; c < channels; ++c) {
for (const auto c : c10::irange(channels)) {
(void)c; //Suppress unused variable warning
pos2[0] = pos1[0];
pos1 += input_height * input_width;
pos2 += output_height * output_width;
@ -88,11 +89,11 @@ static void upsample_nearest2d_out_frame_nhwc(
return;
}
for (int64_t h2 = 0; h2 < output_height; ++h2) {
for (const auto h2 : c10::irange(output_height)) {
const int64_t h1 =
nn_compute_source_index_fn(height_scale, h2, input_height);
for (int64_t w2 = 0; w2 < output_width; ++w2) {
for (const auto w2 : c10::irange(output_width)) {
const int64_t w1 =
nn_compute_source_index_fn(width_scale, w2, input_width);

View File

@ -48,22 +48,23 @@ static void upsample_nearest3d_out_frame(
return;
}
for (int64_t d2 = 0; d2 < output_depth; ++d2) {
for (const auto d2 : c10::irange(output_depth)) {
const int64_t d1 =
nn_compute_source_index_fn(depth_scale, d2, input_depth);
for (int64_t h2 = 0; h2 < output_height; ++h2) {
for (const auto h2 : c10::irange(output_height)) {
const int64_t h1 =
nn_compute_source_index_fn(height_scale, h2, input_height);
for (int64_t w2 = 0; w2 < output_width; ++w2) {
for (const auto w2 : c10::irange(output_width)) {
const int64_t w1 =
nn_compute_source_index_fn(width_scale, w2, input_width);
const auto* pos1 = &i_p[d1 * input_height * input_width + h1 * input_width + w1];
auto* pos2 = &o_p[d2 * output_height * output_width + h2 * output_width + w2];
for (int64_t c = 0; c < channels; ++c) {
for (const auto c : c10::irange(channels)) {
(void)c; //Suppress unused variable warning
pos2[0] = pos1[0];
pos1 += input_depth * input_height * input_width;
pos2 += output_depth * output_height * output_width;
@ -101,14 +102,14 @@ static void upsample_nearest3d_out_frame_nhwc(
return;
}
for (int64_t d2 = 0; d2 < output_depth; ++d2) {
for (const auto d2 : c10::irange(output_depth)) {
const int64_t d1 =
nn_compute_source_index_fn(depth_scale, d2, input_depth);
for (int64_t h2 = 0; h2 < output_height; ++h2) {
for (const auto h2 : c10::irange(output_height)) {
const int64_t h1 =
nn_compute_source_index_fn(height_scale, h2, input_height);
for (int64_t w2 = 0; w2 < output_width; ++w2) {
for (const auto w2 : c10::irange(output_width)) {
const int64_t w1 =
nn_compute_source_index_fn(width_scale, w2, input_width);

View File

@ -218,7 +218,7 @@ std::tuple<Tensor, Tensor, Tensor> _fake_quantize_learnable_per_channel_affine_b
// into the same shapes as X along the channel axis.
// NOLINTNEXTLINE(cppcoreguidelines-no-malloc)
int64_t* axis_mask = (int64_t *) calloc(numDimensions, sizeof(int64_t));
for (int i = 0; i < numDimensions; ++i) {
for (const auto i : c10::irange(numDimensions)) {
axis_mask[i] = (i == axis) ? X.size(axis) : 1;
}
auto X_shape = X.sizes();

View File

@ -7,6 +7,7 @@
#include <ATen/Parallel.h>
#include <ATen/SparseTensorUtils.h>
#include <c10/util/accumulate.h>
#include <c10/util/irange.h>
#include <map>
@ -71,9 +72,9 @@ std::vector<int64_t> get_offsets(const Tensor& indices, const IntArrayRef& sizes
}
}
for (int64_t i=0; i < nnz; i++) {
for (const auto i : c10::irange(nnz)) {
int64_t acc = 0;
for (int64_t j=0; j < ndim; j++) {
for (const auto j : c10::irange(ndim)) {
auto indices_row = indices_accessor[j];
auto stride = strides[j];
if (j != dim) {
@ -119,9 +120,9 @@ std::vector<std::vector<int64_t>> get_pools(const Tensor& indices, const IntArra
}
}
for (int64_t i=0; i < nnz; i++) {
for (const auto i : c10::irange(nnz)) {
int64_t pool_index = 0;
for (int64_t j=0; j < ndim; j++) {
for (const auto j : c10::irange(ndim)) {
if (j != dim) {
const auto indices_row = indices_accessor[j];
const auto stride = strides[j];
@ -315,7 +316,7 @@ void cpu_sparse_coo_softmax(Tensor output, const Tensor& input, const int64_t di
int64_t grain_size = 1;
parallel_for(0, pools.size(), grain_size, [&](int64_t begin, int64_t end) {
for (auto p = begin; p < end; p++) {
for (const auto p : c10::irange(begin, end)) {
auto pool_indices = pools[p];
// Skip empty pools
@ -329,7 +330,7 @@ void cpu_sparse_coo_softmax(Tensor output, const Tensor& input, const int64_t di
/* Compute mx */
for (int64_t i : pool_indices) {
auto values_row = values_accessor[i];
for (int64_t j=0; j < nvalues; j++) {
for (const auto j : c10::irange(nvalues)) {
mx_row[j] = std::max(mx_row[j], values_row[j]);
}
}
@ -338,7 +339,7 @@ void cpu_sparse_coo_softmax(Tensor output, const Tensor& input, const int64_t di
for (int64_t i : pool_indices) {
auto values_row = values_accessor[i];
auto out_values_row = out_values_accessor[i];
for (int64_t j=0; j < nvalues; j++) {
for (const auto j : c10::irange(nvalues)) {
auto v = std::exp(values_row[j] - mx_row[j]);
if (!LogSoftMax) {
out_values_row[j] = v;
@ -347,7 +348,7 @@ void cpu_sparse_coo_softmax(Tensor output, const Tensor& input, const int64_t di
}
}
for (int64_t j=0; j < nvalues; j++) {
for (const auto j : c10::irange(nvalues)) {
if (LogSoftMax) {
mx_row[j] += std::log(exp_sums_row[j]);
} else {
@ -359,7 +360,7 @@ void cpu_sparse_coo_softmax(Tensor output, const Tensor& input, const int64_t di
for (int64_t i : pool_indices) {
auto values_row = values_accessor[i];
auto out_values_row = out_values_accessor[i];
for (int64_t j=0; j < nvalues; j++) {
for (const auto j : c10::irange(nvalues)) {
if (LogSoftMax) {
out_values_row[j] = values_row[j] - mx_row[j];
} else {
@ -421,7 +422,7 @@ void cpu_sparse_coo_softmax_backward(const Tensor& grad_input, const Tensor& gra
values.set_(r);
}
} else {
for(int64_t i=0; i<out_nnz; i++) {
for (const auto i : c10::irange(out_nnz)) {
auto low = std::lower_bound(grad_offsets.begin(), grad_offsets.end(), out_offsets[i]);
auto j = low - grad_offsets.begin();
if (j < grad_nnz && out_offsets[i] == grad_offsets[j]) {
@ -456,7 +457,7 @@ void cpu_sparse_coo_softmax_backward(const Tensor& grad_input, const Tensor& gra
int64_t grain_size = 1;
parallel_for(0, pools.size(), grain_size, [&](int64_t begin, int64_t end) {
for (auto p = begin; p < end; p++) {
for (const auto p : c10::irange(begin, end)) {
auto pool_indices = pools[p];
// Skip empty pools
@ -473,7 +474,7 @@ void cpu_sparse_coo_softmax_backward(const Tensor& grad_input, const Tensor& gra
if (j < grad_nnz && (out_offsets[i] == grad_offsets[j])) {
auto grad_values_row = grad_values_accessor[j];
for (int64_t k=0; k<nvalues; k++) {
for (const auto k : c10::irange(nvalues)) {
if (LogSoftMax) {
tmp_row[k] -= grad_values_row[k];
} else {
@ -492,7 +493,7 @@ void cpu_sparse_coo_softmax_backward(const Tensor& grad_input, const Tensor& gra
if (j < grad_nnz && (out_offsets[i] == grad_offsets[j])) {
auto grad_values_row = grad_values_accessor[j];
for (int64_t k=0; k<nvalues; k++) {
for (const auto k : c10::irange(nvalues)) {
if (LogSoftMax) {
values_row[k] = grad_values_row[k] + std::exp(out_values_row[k]) * tmp_row[k];
} else {
@ -500,7 +501,7 @@ void cpu_sparse_coo_softmax_backward(const Tensor& grad_input, const Tensor& gra
}
}
} else {
for (int64_t k=0; k<nvalues; k++) {
for (const auto k : c10::irange(nvalues)) {
if (LogSoftMax) {
values_row[k] = std::exp(out_values_row[k]) * tmp_row[k];
} else {

View File

@ -13,6 +13,7 @@
#include <ATen/native/Resize.h>
#include <ATen/native/mkl/SparseBlasImpl.h>
#include <ATen/native/sparse/SparseBlasImpl.h>
#include <c10/util/irange.h>
#include <algorithm>
@ -60,7 +61,7 @@ void convert_indices_from_coo_to_csr_cpu(const Tensor& result, const Tensor& inp
at::parallel_for(0, numel - 1, GRAIN_SIZE, [&](int64_t start, int64_t end) {
input_t curr_value = data_in[start], next_value;
for (int64_t i = start; i < end; i++) {
for (const auto i : c10::irange(start, end)) {
next_value = data_in[i + 1];
for (; curr_value < next_value; curr_value++)
data_out[curr_value + 1] = static_cast<output_t>(i + 1);

View File

@ -5,6 +5,7 @@
#include <ATen/SparseTensorImpl.h>
#include <ATen/SparseTensorUtils.h>
#include <ATen/native/Resize.h>
#include <c10/util/irange.h>
#include <unordered_map>
namespace at { namespace native {
@ -30,7 +31,7 @@ void csr_to_coo(const int64_t n_row, const int64_t Ap[], int64_t Bi[]) {
Output:
`Bi` is the row indices
*/
for (int64_t i = 0; i < n_row; i++) {
for (const auto i : c10::irange(n_row)) {
for (int64_t jj = Ap[i]; jj < Ap[i + 1]; jj++) {
Bi[jj] = i;
}
@ -56,7 +57,7 @@ int64_t _csr_matmult_maxnnz(
*/
std::vector<int64_t> mask(n_col, -1);
int64_t nnz = 0;
for (int64_t i = 0; i < n_row; i++) {
for (const auto i : c10::irange(n_row)) {
int64_t row_nnz = 0;
for (int64_t jj = Ap[i]; jj < Ap[i + 1]; jj++) {
@ -127,19 +128,19 @@ void _csr_matmult(
Cp[0] = 0;
for (int64_t i = 0; i < n_row; i++) {
for (const auto i : c10::irange(n_row)) {
int64_t head = -2;
int64_t length = 0;
int64_t jj_start = Ap[i];
int64_t jj_end = Ap[i + 1];
for (int64_t jj = jj_start; jj < jj_end; jj++) {
for (const auto jj : c10::irange(jj_start, jj_end)) {
int64_t j = Aj[jj];
scalar_t v = Ax[jj];
int64_t kk_start = Bp[j];
int64_t kk_end = Bp[j + 1];
for (int64_t kk = kk_start; kk < kk_end; kk++) {
for (const auto kk : c10::irange(kk_start, kk_end)) {
int64_t k = Bj[kk];
sums[k] += v * Bx[kk];
@ -152,7 +153,8 @@ void _csr_matmult(
}
}
for (int64_t jj = 0; jj < length; jj++) {
for (const auto jj : c10::irange(length)) {
(void)jj; //Suppress unused variable warning
Cj[nnz] = head;
Cx[nnz] = sums[head];
nnz++;

View File

@ -12,6 +12,7 @@
#include <ATen/native/Copy.h>
#include <ATen/native/CPUBlas.h>
#include <c10/util/irange.h>
namespace at {
namespace native {
@ -229,7 +230,7 @@ Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_,
auto cpu_min_indices_accessor = cpu_min_indices.accessor<int64_t, 1>();
auto cpu_computed_indices_sizes_accessor =
cpu_computed_indices_sizes.accessor<int64_t, 1>();
for (int64_t d = 0; d < sparse_dim; d++) {
for (const auto d : c10::irange(sparse_dim)) {
int64_t min_index_in_dim = cpu_min_indices_accessor[d];
TORCH_CHECK(
min_index_in_dim >= 0,
@ -244,11 +245,11 @@ Tensor sparse_coo_tensor(const Tensor& indices, const Tensor& values_,
// If the indices doesn't have elements in it, there is not enough
// information to know what the minimum sparse dimension sizes should be,
// and in this case we set them to 0
for (int64_t d = 0; d < sparse_dim; d++) {
for (const auto d : c10::irange(sparse_dim)) {
computed_sizes[static_cast<size_t>(d)] = 0;
}
}
for (int64_t d = 0; d < dense_dim; d++) {
for (const auto d : c10::irange(dense_dim)) {
computed_sizes[static_cast<size_t>(sparse_dim + d)] = values.size(d + 1);
}
@ -305,7 +306,7 @@ void _validate_sparse_coo_tensor_args(
}
auto cpu_min_indices_accessor = cpu_min_indices.accessor<int64_t, 1>();
auto cpu_max_indices_accessor = cpu_max_indices.accessor<int64_t, 1>();
for (int64_t d = 0; d < sparse_dim; d++) {
for (const auto d : c10::irange(sparse_dim)) {
// NB: This used to sync ndim times to access each entry; now we copy
// everything to CPU first and then access it.
int64_t min_index_in_dim = cpu_min_indices_accessor[d];
@ -597,7 +598,7 @@ SparseTensor _coalesce_sparse_cpu(const SparseTensor& self) {
int64_t blockSize = values.stride(0);
scalar_t* values_ptr = values.data_ptr<scalar_t>();
scalar_t* newValues_ptr = newValues.data_ptr<scalar_t>();
for (int64_t j = 0; j < nnz; j++) {
for (const auto j : c10::irange(nnz)) {
int64_t pos = indicesPermutationAccessor[j];
int64_t curr = indicesBufferAccessor[j];
if (curr == prev) {
@ -613,7 +614,7 @@ SparseTensor _coalesce_sparse_cpu(const SparseTensor& self) {
}
} else {
++i;
for (int64_t d = 0; d < sparse_dim; d++) {
for (const auto d : c10::irange(sparse_dim)) {
newIndicesAccessor[d][i] = indicesAccessor[d][pos];
}
if (values.numel() >
@ -656,9 +657,9 @@ void inline sparse_mask_out_cpu_kernel(
auto t_strides = t.strides();
at::parallel_for(0, r_nnz, 1000, [&](int64_t start, int64_t end) {
for (auto i = start; i < end; i++) {
for (const auto i : c10::irange(start, end)) {
int64_t idx = 0;
for (int64_t d = 0; d < sparse_dim; d++) {
for (const auto d : c10::irange(sparse_dim)) {
idx += mask_indices_accessor[d][i] * t_strides[d];
}
r_values_accessor[i] = t_ptr[idx];
@ -706,14 +707,14 @@ SparseTensor& sparse_mask_out_cpu(
// ]. Keeping this implementation because it is faster than
// flatten_indices()
Tensor indices = at::zeros({mask._nnz()}, mask_indices.options());
for (int64_t d = 0; d < mask.sparse_dim(); d++) {
for (const auto d : c10::irange(mask.sparse_dim())) {
indices.mul_(mask.size(d));
indices.add_(mask_indices.select(0, d));
}
std::vector<int64_t> view_size(1 + mask.dense_dim());
view_size[0] = -1;
for (int64_t d = 0; d < mask.dense_dim(); d++) {
for (const auto d : c10::irange(mask.dense_dim())) {
view_size[d + 1] = mask.size(mask.sparse_dim() + d);
}
@ -777,7 +778,7 @@ Tensor sparse_mask_helper_cpu(
// Step 1: flatten the sparse indices `t._indices()` tensor and then map this
// flatten value `index` to the original position `i`
for (int64_t i = 0; i < t_nnz; i++) {
for (const auto i : c10::irange(t_nnz)) {
int64_t index = ti_flattened_indices.data_ptr<int64_t>()[i];
t_flatten_indices[index] = i;
}
@ -802,7 +803,7 @@ Tensor sparse_mask_helper_cpu(
const auto r_values_stride = r_values.strides()[0] * r_values.element_size();
const auto t_values_stride = t_v.strides()[0] * t_v.element_size();
for (auto i = start; i < end; i++) {
for (const auto i : c10::irange(start, end)) {
int64_t index = flattened_mask_indices.data_ptr<int64_t>()[i];
auto iter = t_flatten_indices.find(index);
if (iter != t_flatten_indices.end()) {

View File

@ -601,9 +601,9 @@ Tensor& add_out_dense_sparse_cpu(Tensor& r, const Tensor& dense, const SparseTen
// accessors rely on nnz test
if (nDim > nDimI) {
auto indices_accessor = indices.accessor<int64_t, 2>();
for (int64_t k = 0; k < sparse._nnz(); k++) {
for (const auto k : c10::irange(sparse._nnz())) {
Tensor dstBuffer = resultBuffer;
for (int64_t d = 0; d < sparse.sparse_dim(); d++) {
for (const auto d : c10::irange(sparse.sparse_dim())) {
dstBuffer = dstBuffer.select(0, indices_accessor[d][k]);
}
Tensor srcBuffer = valuesBuffer.select(0, k);
@ -970,7 +970,7 @@ SparseTensor& hspmm_out_sparse_cpu(const SparseTensor& sparse_, const Tensor& de
auto indices_accessor = indices.accessor<int64_t, 2>();
int64_t i = -1, prevIdx = -1;
for (int64_t j = 0; j < nnz; j++) {
for (const auto j : c10::irange(nnz)) {
int64_t currIdx = valueIndices_accessor[j];
if (currIdx != prevIdx) {
indices_accessor[0][++i] = currIdx;
@ -1086,10 +1086,10 @@ SparseTensor& _sspaddmm_out_cpu(
scalar_t* newv_ptr = newv.data_ptr<scalar_t>();
scalar_t cast_alpha = alpha.to<scalar_t>();
for (int64_t h = 0; h < dim_i; h++) {
for (const auto h : c10::irange(dim_i)) {
int64_t i_start = csr_accessor[h];
int64_t i_end = csr_accessor[h+1];
for (int64_t i = i_start; i < i_end; i++) {
for (const auto i : c10::irange(i_start, i_end)) {
scalar_t val = values_accessor[i];
int64_t col = indices_accessor[1][i];
if (col >= 0 && col < dim_j) {
@ -1103,7 +1103,7 @@ SparseTensor& _sspaddmm_out_cpu(
}
// Fill up the indices with the right values
if (i_start != i_end) {
for (int64_t i = 0; i < dim_k; i++) {
for (const auto i : c10::irange(dim_k)) {
newi_accessor[0][p+i] = h;
newi_accessor[1][p+i] = i;
}
@ -1178,7 +1178,7 @@ Tensor _sparse_sum(const SparseTensor& input, IntArrayRef dims_to_sum) {
auto dims_to_keep_v = std::vector<int64_t>();
auto dense_dims_to_sum_v = std::vector<int64_t>();
for (int64_t d = 0; d < input_dim; d++) {
for (const auto d : c10::irange(input_dim)) {
if (dims_to_sum_b[d]) {
if (d >= sparse_dim) dense_dims_to_sum_v.emplace_back(d + 1 - sparse_dim);
}

View File

@ -3,6 +3,7 @@
#include <ATen/SparseTensorUtils.h>
#include <ATen/cuda/CUDAUtils.h>
#include <c10/util/irange.h>
namespace at { namespace native {
@ -34,7 +35,7 @@ SparseTensor& sparse_mask_out_cuda(SparseTensor& r, const Tensor& t, const Spars
// Get a flattened sparse indices, similar to NOTE [ Flatten Sparse Indices ].
// Keeping this implementation because it is faster than flatten_indices()
Tensor indices = at::zeros({mask._nnz()}, mask_indices.options());
for (int64_t d = 0; d < mask.sparse_dim(); d++) {
for (const auto d : c10::irange(mask.sparse_dim())) {
indices.mul_(mask.size(d));
// This used to use a buffer but I deoptimized it
indices.add_(mask_indices.select(0, d));
@ -42,7 +43,7 @@ SparseTensor& sparse_mask_out_cuda(SparseTensor& r, const Tensor& t, const Spars
std::vector<int64_t> view_size(1 + mask.dense_dim());
view_size[0] = -1;
for (int64_t d = 0; d < mask.dense_dim(); d++) {
for (const auto d : c10::irange(mask.dense_dim())) {
view_size[d + 1] = mask.size(mask.sparse_dim() + d);
}

View File

@ -17,7 +17,7 @@ struct ParamsHash {
size_t operator()(const Params& params) const {
auto ptr = reinterpret_cast<const uint8_t*>(&params);
uint32_t value = 0x811C9DC5;
for (int i = 0; i < (int)sizeof(Params); ++i) {
for (const auto i : c10::irange((int)sizeof(Params))) {
value ^= ptr[i];
value *= 0x01000193;
}

View File

@ -2,6 +2,7 @@
#include <c10/util/accumulate.h>
#include <c10/util/ArrayRef.h>
#include <c10/util/Exception.h>
#include <c10/util/irange.h>
#ifdef USE_VULKAN_WRAPPER
#include <vulkan_wrapper.h>
@ -192,7 +193,7 @@ uint32_t VContext::getComputeQueueFamilyIndex() {
vkGetPhysicalDeviceQueueFamilyProperties(
physicalDevice_, &queueFamilyCount, queueFamilies.data());
for (uint32_t i = 0; i < queueFamilies.size(); ++i) {
for (const auto i : c10::irange(queueFamilies.size())) {
VkQueueFamilyProperties props = queueFamilies[i];
if (props.queueCount > 0 && (props.queueFlags & VK_QUEUE_COMPUTE_BIT)) {
return i;
@ -274,7 +275,7 @@ uint32_t findMemoryType(
const VkMemoryPropertyFlags properties) {
VkPhysicalDeviceMemoryProperties memoryProperties{};
vkGetPhysicalDeviceMemoryProperties(physicalDevice, &memoryProperties);
for (uint32_t i = 0; i < memoryProperties.memoryTypeCount; ++i) {
for (const auto i : c10::irange(memoryProperties.memoryTypeCount)) {
if ((memoryTypeBits & (1 << i)) &&
((memoryProperties.memoryTypes[i].propertyFlags & properties) ==
properties)) {

View File

@ -9,6 +9,7 @@
#include <ATen/native/vulkan/VulkanOpaqueTensorImpl.h>
#include <ATen/native/vulkan/VulkanOps.h>
#include <ATen/vulkan/Context.h>
#include <c10/util/irange.h>
namespace at {
namespace native {
@ -265,13 +266,13 @@ Tensor cat(const TensorList tensors, int64_t dim) {
int64_t cat_dim_size = 0;
std::vector<VulkanTensor> vTensors{};
for (int i = 0; i < tensors.size(); ++i) {
for (const auto i : c10::irange(tensors.size())) {
const auto& t = tensors[i];
TORCH_INTERNAL_ASSERT(
t.dim() == 4, "Vulkan cat expects 4 dimensional inputs");
TORCH_INTERNAL_ASSERT(t.is_vulkan(), "Vulkan cat expects Vulkan inputs");
for (int d = 0; d < 4; ++d) {
for (const auto d : c10::irange(4)) {
if (d == dim) {
continue;
}

View File

@ -3,6 +3,7 @@
#include <c10/util/accumulate.h>
#include <c10/util/Exception.h>
#include <c10/util/Optional.h>
#include <c10/util/irange.h>
#include <ATen/native/vulkan/Vulkan.h>
#include <ATen/native/vulkan/VulkanCommon.h>
@ -629,17 +630,17 @@ VBuffer kernelNCHW_OCHW_repack_O4C4HWi4o4(
memset(basePtr, 0, size);
const float* src = weights;
int ridx = 0;
for (int oc = 0; oc < OC; ++oc) {
for (const auto oc : c10::irange(OC)) {
int oc_4 = oc / 4;
int oc_4_i = oc % 4;
float* dst_oc = basePtr + oc_4 * oc_4SizeNumel;
for (int ic = 0; ic < C; ++ic) {
for (const auto ic : c10::irange(C)) {
int ic_4 = ic / 4;
int ic_4_i = ic % 4;
float* dst_ic = dst_oc + ic_4 * KW * KH * 16;
for (int ky = 0; ky < KH; ++ky) {
for (const auto ky : c10::irange(KH)) {
float* dst_ky = dst_ic + ky * KW * 16;
for (int kx = 0; kx < KW; ++kx) {
for (const auto kx : c10::irange(KW)) {
float* dst_kx = dst_ky + kx * 16;
dst_kx[4 * ic_4_i + oc_4_i] = src[ridx++];
}

View File

@ -1,5 +1,6 @@
#include <ATen/native/vulkan/api/Runtime.h>
#include <ATen/native/vulkan/api/Adapter.h>
#include <c10/util/irange.h>
#include <sstream>
@ -244,7 +245,7 @@ uint32_t query_compute_queue_family_index(const VkPhysicalDevice physical_device
&queue_family_count,
queue_families_properties.data());
for (uint32_t i = 0; i < queue_families_properties.size(); ++i) {
for (const auto i : c10::irange(queue_families_properties.size())) {
const VkQueueFamilyProperties& properties = queue_families_properties[i];
if (properties.queueCount > 0 && (properties.queueFlags & VK_QUEUE_COMPUTE_BIT)) {
return i;

View File

@ -1005,8 +1005,7 @@ VmaDefragmentationContext defragCtx;
vmaDefragmentationBegin(allocator, &defragInfo, nullptr, &defragCtx);
vmaDefragmentationEnd(allocator, defragCtx);
for(uint32_t i = 0; i < allocCount; ++i)
{
for (const auto i : c10::irange(allocCount)) {
if(allocationsChanged[i])
{
// Destroy buffer that is immutably bound to memory region which is no longer valid.
@ -1083,8 +1082,7 @@ vkEndCommandBuffer(commandBuffer);
vmaDefragmentationEnd(allocator, defragCtx);
for(uint32_t i = 0; i < allocCount; ++i)
{
for (const auto i : c10::irange(allocCount)) {
if(allocationsChanged[i])
{
// Destroy buffer that is immutably bound to memory region which is no longer valid.
@ -4818,8 +4816,7 @@ T must be pointer type, e.g. VmaAllocation, VmaPool.
template<typename T>
static bool VmaValidatePointerArray(uint32_t count, const T* arr)
{
for(uint32_t i = 0; i < count; ++i)
{
for (const auto i : c10::irange(count)) {
const T iPtr = arr[i];
if(iPtr == VMA_NULL)
{
@ -7459,8 +7456,7 @@ private:
{
FreeSpace s = {};
s.blockInfoIndex = SIZE_MAX;
for(size_t i = 0; i < MAX_COUNT; ++i)
{
for (const auto i : c10::irange(MAX_COUNT)) {
m_FreeSpaces[i] = s;
}
}
@ -7474,8 +7470,7 @@ private:
// Find first invalid or the smallest structure.
size_t bestIndex = SIZE_MAX;
for(size_t i = 0; i < MAX_COUNT; ++i)
{
for (const auto i : c10::irange(MAX_COUNT)) {
// Empty structure.
if(m_FreeSpaces[i].blockInfoIndex == SIZE_MAX)
{
@ -7502,8 +7497,7 @@ private:
{
size_t bestIndex = SIZE_MAX;
VkDeviceSize bestFreeSpaceAfter = 0;
for(size_t i = 0; i < MAX_COUNT; ++i)
{
for (const auto i : c10::irange(MAX_COUNT)) {
// Structure is valid.
if(m_FreeSpaces[i].blockInfoIndex != SIZE_MAX)
{
@ -7846,8 +7840,7 @@ struct VmaCurrentBudgetData
VmaCurrentBudgetData()
{
for(uint32_t heapIndex = 0; heapIndex < VK_MAX_MEMORY_HEAPS; ++heapIndex)
{
for (const auto heapIndex : c10::irange(VK_MAX_MEMORY_HEAPS)) {
m_BlockBytes[heapIndex] = 0;
m_AllocationBytes[heapIndex] = 0;
#if VMA_MEMORY_BUDGET
@ -8447,8 +8440,7 @@ void VmaJsonWriter::ContinueString(const char* pStr)
VMA_ASSERT(m_InsideString);
const size_t strLen = strlen(pStr);
for(size_t i = 0; i < strLen; ++i)
{
for (const auto i : c10::irange(strLen)) {
char ch = pStr[i];
if(ch == '\\')
{
@ -8583,8 +8575,7 @@ void VmaJsonWriter::WriteIndent(bool oneLess)
{
--count;
}
for(size_t i = 0; i < count; ++i)
{
for (const auto i : c10::irange(count)) {
m_SB.Add(INDENT);
}
}
@ -9123,8 +9114,7 @@ bool VmaBlockMetadata_Generic::Validate() const
VMA_VALIDATE(m_FreeSuballocationsBySize.size() == freeSuballocationsToRegister);
VkDeviceSize lastSize = 0;
for(size_t i = 0; i < m_FreeSuballocationsBySize.size(); ++i)
{
for (const auto i : c10::irange(m_FreeSuballocationsBySize.size())) {
VmaSuballocationList::iterator suballocItem = m_FreeSuballocationsBySize[i];
// Only free suballocations can be registered in m_FreeSuballocationsBySize.
@ -10075,8 +10065,7 @@ bool VmaBlockMetadata_Linear::Validate() const
{
const size_t suballoc2ndCount = suballocations2nd.size();
size_t nullItem2ndCount = 0;
for(size_t i = 0; i < suballoc2ndCount; ++i)
{
for (const auto i : c10::irange(suballoc2ndCount)) {
const VmaSuballocation& suballoc = suballocations2nd[i];
const bool currFree = (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE);
@ -10100,8 +10089,7 @@ bool VmaBlockMetadata_Linear::Validate() const
VMA_VALIDATE(nullItem2ndCount == m_2ndNullItemsCount);
}
for(size_t i = 0; i < m_1stNullItemsBeginCount; ++i)
{
for (const auto i : c10::irange(m_1stNullItemsBeginCount)) {
const VmaSuballocation& suballoc = suballocations1st[i];
VMA_VALIDATE(suballoc.type == VMA_SUBALLOCATION_TYPE_FREE &&
suballoc.hAllocation == VK_NULL_HANDLE);
@ -10109,8 +10097,7 @@ bool VmaBlockMetadata_Linear::Validate() const
size_t nullItem1stCount = m_1stNullItemsBeginCount;
for(size_t i = m_1stNullItemsBeginCount; i < suballoc1stCount; ++i)
{
for (const auto i : c10::irange(m_1stNullItemsBeginCount, suballoc1stCount)) {
const VmaSuballocation& suballoc = suballocations1st[i];
const bool currFree = (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE);
@ -11301,10 +11288,7 @@ bool VmaBlockMetadata_Linear::CreateAllocationRequest_LowerAddress(
// If conflict exists, allocation cannot be made here.
if(allocSize % bufferImageGranularity || resultOffset % bufferImageGranularity)
{
for(size_t nextSuballocIndex = index1st;
nextSuballocIndex < suballocations1st.size();
nextSuballocIndex++)
{
for (const auto nextSuballocIndex : c10::irange(index1st, suballocations1st.size())) {
const VmaSuballocation& nextSuballoc = suballocations1st[nextSuballocIndex];
if(VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity))
{
@ -11712,8 +11696,7 @@ void VmaBlockMetadata_Linear::CleanupAfterFree()
{
const size_t nonNullItemCount = suballoc1stCount - nullItem1stCount;
size_t srcIndex = m_1stNullItemsBeginCount;
for(size_t dstIndex = 0; dstIndex < nonNullItemCount; ++dstIndex)
{
for (const auto dstIndex : c10::irange(nonNullItemCount)) {
while(suballocations1st[srcIndex].hAllocation == VK_NULL_HANDLE)
{
++srcIndex;
@ -11817,8 +11800,7 @@ bool VmaBlockMetadata_Buddy::Validate() const
VMA_VALIDATE(m_SumFreeSize == ctx.calculatedSumFreeSize);
// Validate free node lists.
for(uint32_t level = 0; level < m_LevelCount; ++level)
{
for (const auto level : c10::irange(m_LevelCount)) {
VMA_VALIDATE(m_FreeList[level].front == VMA_NULL ||
m_FreeList[level].front->free.prev == VMA_NULL);
@ -11840,8 +11822,7 @@ bool VmaBlockMetadata_Buddy::Validate() const
}
// Validate that free lists ar higher levels are empty.
for(uint32_t level = m_LevelCount; level < MAX_LEVELS; ++level)
{
for (const auto level : c10::irange(m_LevelCount, MAX_LEVELS)) {
VMA_VALIDATE(m_FreeList[level].front == VMA_NULL && m_FreeList[level].back == VMA_NULL);
}
@ -11850,8 +11831,7 @@ bool VmaBlockMetadata_Buddy::Validate() const
VkDeviceSize VmaBlockMetadata_Buddy::GetUnusedRangeSizeMax() const
{
for(uint32_t level = 0; level < m_LevelCount; ++level)
{
for (const auto level : c10::irange(m_LevelCount)) {
if(m_FreeList[level].front != VMA_NULL)
{
return LevelToNodeSize(level);
@ -12668,8 +12648,7 @@ VmaBlockVector::~VmaBlockVector()
VkResult VmaBlockVector::CreateMinBlocks()
{
for(size_t i = 0; i < m_MinBlockCount; ++i)
{
for (const auto i : c10::irange(m_MinBlockCount)) {
VkResult res = CreateBlock(m_PreferredBlockSize, VMA_NULL);
if(res != VK_SUCCESS)
{
@ -12692,8 +12671,7 @@ void VmaBlockVector::GetPoolStats(VmaPoolStats* pStats)
pStats->unusedRangeSizeMax = 0;
pStats->blockCount = blockCount;
for(uint32_t blockIndex = 0; blockIndex < blockCount; ++blockIndex)
{
for (const auto blockIndex : c10::irange(blockCount)) {
const VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex];
VMA_ASSERT(pBlock);
VMA_HEAVY_ASSERT(pBlock->Validate());
@ -12873,8 +12851,7 @@ VkResult VmaBlockVector::AllocatePage(
if(strategy == VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT)
{
// Forward order in m_Blocks - prefer blocks with smallest amount of free space.
for(size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex )
{
for (const auto blockIndex : c10::irange(m_Blocks.size())) {
VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex];
VMA_ASSERT(pCurrBlock);
VkResult res = AllocateFromBlock(
@ -12932,8 +12909,7 @@ VkResult VmaBlockVector::AllocatePage(
{
// Allocate 1/8, 1/4, 1/2 as first blocks.
const VkDeviceSize maxExistingBlockSize = CalcMaxBlockSize();
for(uint32_t i = 0; i < NEW_BLOCK_SIZE_SHIFT_MAX; ++i)
{
for (const auto i : c10::irange(NEW_BLOCK_SIZE_SHIFT_MAX)) {
const VkDeviceSize smallerNewBlockSize = newBlockSize / 2;
if(smallerNewBlockSize > maxExistingBlockSize && smallerNewBlockSize >= size * 2)
{
@ -13013,8 +12989,7 @@ VkResult VmaBlockVector::AllocatePage(
if(strategy == VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT)
{
// Forward order in m_Blocks - prefer blocks with smallest amount of free space.
for(size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex )
{
for (const auto blockIndex : c10::irange(m_Blocks.size())) {
VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex];
VMA_ASSERT(pCurrBlock);
VmaAllocationRequest currRequest = {};
@ -13238,8 +13213,7 @@ VkDeviceSize VmaBlockVector::CalcMaxBlockSize() const
void VmaBlockVector::Remove(VmaDeviceMemoryBlock* pBlock)
{
for(uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex)
{
for (const auto blockIndex : c10::irange(m_Blocks.size())) {
if(m_Blocks[blockIndex] == pBlock)
{
VmaVectorRemove(m_Blocks, blockIndex);
@ -13254,8 +13228,7 @@ void VmaBlockVector::IncrementallySortBlocks()
if(m_Algorithm != VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT)
{
// Bubble sort only until first swap.
for(size_t i = 1; i < m_Blocks.size(); ++i)
{
for (const auto i : c10::irange(1, m_Blocks.size())) {
if(m_Blocks[i - 1]->m_pMetadata->GetSumFreeSize() > m_Blocks[i]->m_pMetadata->GetSumFreeSize())
{
VMA_SWAP(m_Blocks[i - 1], m_Blocks[i]);
@ -13413,8 +13386,7 @@ void VmaBlockVector::ApplyDefragmentationMovesCpu(
// Go over all moves. Mark blocks that are used with BLOCK_FLAG_USED.
const size_t moveCount = moves.size();
for(size_t moveIndex = 0; moveIndex < moveCount; ++moveIndex)
{
for (const auto moveIndex : c10::irange(moveCount)) {
const VmaDefragmentationMove& move = moves[moveIndex];
blockInfo[move.srcBlockIndex].flags |= BLOCK_FLAG_USED;
blockInfo[move.dstBlockIndex].flags |= BLOCK_FLAG_USED;
@ -13448,8 +13420,7 @@ void VmaBlockVector::ApplyDefragmentationMovesCpu(
const VkDeviceSize nonCoherentAtomSize = m_hAllocator->m_PhysicalDeviceProperties.limits.nonCoherentAtomSize;
VkMappedMemoryRange memRange = { VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE };
for(size_t moveIndex = 0; moveIndex < moveCount; ++moveIndex)
{
for (const auto moveIndex : c10::irange(moveCount)) {
const VmaDefragmentationMove& move = moves[moveIndex];
const BlockInfo& srcBlockInfo = blockInfo[move.srcBlockIndex];
@ -13520,8 +13491,7 @@ void VmaBlockVector::ApplyDefragmentationMovesGpu(
// Go over all moves. Mark blocks that are used with BLOCK_FLAG_USED.
const size_t moveCount = moves.size();
for(size_t moveIndex = 0; moveIndex < moveCount; ++moveIndex)
{
for (const auto moveIndex : c10::irange(moveCount)) {
const VmaDefragmentationMove& move = moves[moveIndex];
//if(move.type == VMA_ALLOCATION_TYPE_UNKNOWN)
@ -13560,8 +13530,7 @@ void VmaBlockVector::ApplyDefragmentationMovesGpu(
// Go over all moves. Post data transfer commands to command buffer.
if(pDefragCtx->res == VK_SUCCESS)
{
for(size_t moveIndex = 0; moveIndex < moveCount; ++moveIndex)
{
for (const auto moveIndex : c10::irange(moveCount)) {
const VmaDefragmentationMove& move = moves[moveIndex];
const VmaBlockDefragmentationContext& srcBlockCtx = pDefragCtx->blockContexts[move.srcBlockIndex];
@ -13686,8 +13655,7 @@ void VmaBlockVector::PrintDetailedMap(class VmaJsonWriter& json)
json.WriteString("Blocks");
json.BeginObject();
for(size_t i = 0; i < m_Blocks.size(); ++i)
{
for (const auto i : c10::irange(m_Blocks.size())) {
json.BeginString();
json.ContinueString(m_Blocks[i]->GetId());
json.EndString();
@ -13895,8 +13863,7 @@ void VmaBlockVector::CommitDefragmentations(
size_t VmaBlockVector::CalcAllocationCount() const
{
size_t result = 0;
for(size_t i = 0; i < m_Blocks.size(); ++i)
{
for (const auto i : c10::irange(m_Blocks.size())) {
result += m_Blocks[i]->m_pMetadata->GetAllocationCount();
}
return result;
@ -13928,8 +13895,7 @@ void VmaBlockVector::MakePoolAllocationsLost(
{
VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex);
size_t lostAllocationCount = 0;
for(uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex)
{
for (const auto blockIndex : c10::irange(m_Blocks.size())) {
VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex];
VMA_ASSERT(pBlock);
lostAllocationCount += pBlock->m_pMetadata->MakeAllocationsLost(currentFrameIndex, m_FrameInUseCount);
@ -13948,8 +13914,7 @@ VkResult VmaBlockVector::CheckCorruption()
}
VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex);
for(uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex)
{
for (const auto blockIndex : c10::irange(m_Blocks.size())) {
VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex];
VMA_ASSERT(pBlock);
VkResult res = pBlock->CheckCorruption(m_hAllocator);
@ -13968,8 +13933,7 @@ void VmaBlockVector::AddStats(VmaStats* pStats)
VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex);
for(uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex)
{
for (const auto blockIndex : c10::irange(m_Blocks.size())) {
const VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex];
VMA_ASSERT(pBlock);
VMA_HEAVY_ASSERT(pBlock->Validate());
@ -13998,8 +13962,7 @@ VmaDefragmentationAlgorithm_Generic::VmaDefragmentationAlgorithm_Generic(
{
// Create block info for each block.
const size_t blockCount = m_pBlockVector->m_Blocks.size();
for(size_t blockIndex = 0; blockIndex < blockCount; ++blockIndex)
{
for (const auto blockIndex : c10::irange(blockCount)) {
BlockInfo* pBlockInfo = vma_new(m_hAllocator, BlockInfo)(m_hAllocator->GetAllocationCallbacks());
pBlockInfo->m_OriginalBlockIndex = blockIndex;
pBlockInfo->m_pBlock = m_pBlockVector->m_Blocks[blockIndex];
@ -14197,8 +14160,7 @@ VkResult VmaDefragmentationAlgorithm_Generic::DefragmentRound(
size_t VmaDefragmentationAlgorithm_Generic::CalcBlocksWithNonMovableCount() const
{
size_t result = 0;
for(size_t i = 0; i < m_Blocks.size(); ++i)
{
for (const auto i : c10::irange(m_Blocks.size())) {
if(m_Blocks[i]->m_HasNonMovableAllocations)
{
++result;
@ -14219,8 +14181,7 @@ VkResult VmaDefragmentationAlgorithm_Generic::Defragment(
}
const size_t blockCount = m_Blocks.size();
for(size_t blockIndex = 0; blockIndex < blockCount; ++blockIndex)
{
for (const auto blockIndex : c10::irange(blockCount)) {
BlockInfo* pBlockInfo = m_Blocks[blockIndex];
if(m_AllAllocations)
@ -14325,8 +14286,7 @@ VkResult VmaDefragmentationAlgorithm_Fast::Defragment(
// Sort blocks in order from most destination.
m_BlockInfos.resize(blockCount);
for(size_t i = 0; i < blockCount; ++i)
{
for (const auto i : c10::irange(blockCount)) {
m_BlockInfos[i].origBlockIndex = i;
}
@ -14539,8 +14499,7 @@ VkResult VmaDefragmentationAlgorithm_Fast::Defragment(
void VmaDefragmentationAlgorithm_Fast::PreprocessMetadata()
{
const size_t blockCount = m_pBlockVector->GetBlockCount();
for(size_t blockIndex = 0; blockIndex < blockCount; ++blockIndex)
{
for (const auto blockIndex : c10::irange(blockCount)) {
VmaBlockMetadata_Generic* const pMetadata =
(VmaBlockMetadata_Generic*)m_pBlockVector->GetBlock(blockIndex)->m_pMetadata;
pMetadata->m_FreeCount = 0;
@ -14567,8 +14526,7 @@ void VmaDefragmentationAlgorithm_Fast::PreprocessMetadata()
void VmaDefragmentationAlgorithm_Fast::PostprocessMetadata()
{
const size_t blockCount = m_pBlockVector->GetBlockCount();
for(size_t blockIndex = 0; blockIndex < blockCount; ++blockIndex)
{
for (const auto blockIndex : c10::irange(blockCount)) {
VmaBlockMetadata_Generic* const pMetadata =
(VmaBlockMetadata_Generic*)m_pBlockVector->GetBlock(blockIndex)->m_pMetadata;
const VkDeviceSize blockSize = pMetadata->GetSize();
@ -14778,8 +14736,7 @@ VmaDefragmentationContext_T::~VmaDefragmentationContext_T()
void VmaDefragmentationContext_T::AddPools(uint32_t poolCount, const VmaPool* pPools)
{
for(uint32_t poolIndex = 0; poolIndex < poolCount; ++poolIndex)
{
for (const auto poolIndex : c10::irange(poolCount)) {
VmaPool pool = pPools[poolIndex];
VMA_ASSERT(pool);
// Pools with algorithm other than default are not defragmented.
@ -14817,8 +14774,7 @@ void VmaDefragmentationContext_T::AddAllocations(
VkBool32* pAllocationsChanged)
{
// Dispatch pAllocations among defragmentators. Create them when necessary.
for(uint32_t allocIndex = 0; allocIndex < allocationCount; ++allocIndex)
{
for (const auto allocIndex : c10::irange(allocationCount)) {
const VmaAllocation hAlloc = pAllocations[allocIndex];
VMA_ASSERT(hAlloc);
// DedicatedAlloc cannot be defragmented.
@ -15615,14 +15571,12 @@ void VmaRecorder::WriteConfiguration(
fprintf(m_File, "PhysicalDeviceLimits,nonCoherentAtomSize,%llu\n", devProps.limits.nonCoherentAtomSize);
fprintf(m_File, "PhysicalDeviceMemory,HeapCount,%u\n", memProps.memoryHeapCount);
for(uint32_t i = 0; i < memProps.memoryHeapCount; ++i)
{
for (const auto i : c10::irange(memProps.memoryHeapCount)) {
fprintf(m_File, "PhysicalDeviceMemory,Heap,%u,size,%llu\n", i, memProps.memoryHeaps[i].size);
fprintf(m_File, "PhysicalDeviceMemory,Heap,%u,flags,%u\n", i, memProps.memoryHeaps[i].flags);
}
fprintf(m_File, "PhysicalDeviceMemory,TypeCount,%u\n", memProps.memoryTypeCount);
for(uint32_t i = 0; i < memProps.memoryTypeCount; ++i)
{
for (const auto i : c10::irange(memProps.memoryTypeCount)) {
fprintf(m_File, "PhysicalDeviceMemory,Type,%u,heapIndex,%u\n", i, memProps.memoryTypes[i].heapIndex);
fprintf(m_File, "PhysicalDeviceMemory,Type,%u,propertyFlags,%u\n", i, memProps.memoryTypes[i].propertyFlags);
}
@ -15830,8 +15784,7 @@ VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) :
if(pCreateInfo->pHeapSizeLimit != VMA_NULL)
{
for(uint32_t heapIndex = 0; heapIndex < GetMemoryHeapCount(); ++heapIndex)
{
for (const auto heapIndex : c10::irange(GetMemoryHeapCount())) {
const VkDeviceSize limit = pCreateInfo->pHeapSizeLimit[heapIndex];
if(limit != VK_WHOLE_SIZE)
{
@ -15844,8 +15797,7 @@ VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) :
}
}
for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex)
{
for (const auto memTypeIndex : c10::irange(GetMemoryTypeCount())) {
const VkDeviceSize preferredBlockSize = CalcPreferredBlockSize(memTypeIndex);
m_pBlockVectors[memTypeIndex] = vma_new(this, VmaBlockVector)(
@ -16747,14 +16699,11 @@ void VmaAllocator_T::CalculateStats(VmaStats* pStats)
{
// Initialize.
InitStatInfo(pStats->total);
for(size_t i = 0; i < VK_MAX_MEMORY_TYPES; ++i)
InitStatInfo(pStats->memoryType[i]);
for(size_t i = 0; i < VK_MAX_MEMORY_HEAPS; ++i)
InitStatInfo(pStats->memoryHeap[i]);
for (const auto i : c10::irange(VK_MAX_MEMORY_TYPES))InitStatInfo(pStats->memoryType[i]);
for (const auto i : c10::irange(VK_MAX_MEMORY_HEAPS))InitStatInfo(pStats->memoryHeap[i]);
// Process default pools.
for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex)
{
for (const auto memTypeIndex : c10::irange(GetMemoryTypeCount())) {
VmaBlockVector* const pBlockVector = m_pBlockVectors[memTypeIndex];
VMA_ASSERT(pBlockVector);
pBlockVector->AddStats(pStats);
@ -16770,8 +16719,7 @@ void VmaAllocator_T::CalculateStats(VmaStats* pStats)
}
// Process dedicated allocations.
for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex)
{
for (const auto memTypeIndex : c10::irange(GetMemoryTypeCount())) {
const uint32_t memHeapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex);
VmaMutexLockRead dedicatedAllocationsLock(m_DedicatedAllocationsMutex[memTypeIndex], m_UseMutex);
AllocationVectorType* const pDedicatedAllocVector = m_pDedicatedAllocations[memTypeIndex];
@ -16788,10 +16736,8 @@ void VmaAllocator_T::CalculateStats(VmaStats* pStats)
// Postprocess.
VmaPostprocessCalcStatInfo(pStats->total);
for(size_t i = 0; i < GetMemoryTypeCount(); ++i)
VmaPostprocessCalcStatInfo(pStats->memoryType[i]);
for(size_t i = 0; i < GetMemoryHeapCount(); ++i)
VmaPostprocessCalcStatInfo(pStats->memoryHeap[i]);
for (const auto i : c10::irange(GetMemoryTypeCount()))VmaPostprocessCalcStatInfo(pStats->memoryType[i]);
for (const auto i : c10::irange(GetMemoryHeapCount()))VmaPostprocessCalcStatInfo(pStats->memoryHeap[i]);
}
void VmaAllocator_T::GetBudget(VmaBudget* outBudget, uint32_t firstHeap, uint32_t heapCount)
@ -17114,8 +17060,7 @@ VkResult VmaAllocator_T::CheckCorruption(uint32_t memoryTypeBits)
VkResult finalRes = VK_ERROR_FEATURE_NOT_PRESENT;
// Process default pools.
for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex)
{
for (const auto memTypeIndex : c10::irange(GetMemoryTypeCount())) {
if(((1u << memTypeIndex) & memoryTypeBits) != 0)
{
VmaBlockVector* const pBlockVector = m_pBlockVectors[memTypeIndex];
@ -17463,8 +17408,7 @@ VkResult VmaAllocator_T::FlushOrInvalidateAllocations(
typedef VmaSmallVector<VkMappedMemoryRange, RangeAllocator, 16> RangeVector;
RangeVector ranges = RangeVector(RangeAllocator(GetAllocationCallbacks()));
for(uint32_t allocIndex = 0; allocIndex < allocationCount; ++allocIndex)
{
for (const auto allocIndex : c10::irange(allocationCount)) {
const VmaAllocation alloc = allocations[allocIndex];
const VkDeviceSize offset = offsets != VMA_NULL ? offsets[allocIndex] : 0;
const VkDeviceSize size = sizes != VMA_NULL ? sizes[allocIndex] : VK_WHOLE_SIZE;
@ -17559,8 +17503,7 @@ uint32_t VmaAllocator_T::CalculateGlobalMemoryTypeBits() const
if(!m_UseAmdDeviceCoherentMemory)
{
// Exclude memory types that have VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD.
for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex)
{
for (const auto memTypeIndex : c10::irange(GetMemoryTypeCount())) {
if((m_MemProps.memoryTypes[memTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY) != 0)
{
memoryTypeBits &= ~(1u << memTypeIndex);
@ -17650,8 +17593,7 @@ void VmaAllocator_T::UpdateVulkanBudget()
{
VmaMutexLockWrite lockWrite(m_Budget.m_BudgetMutex, m_UseMutex);
for(uint32_t heapIndex = 0; heapIndex < GetMemoryHeapCount(); ++heapIndex)
{
for (const auto heapIndex : c10::irange(GetMemoryHeapCount())) {
m_Budget.m_VulkanUsage[heapIndex] = budgetProps.heapUsage[heapIndex];
m_Budget.m_VulkanBudget[heapIndex] = budgetProps.heapBudget[heapIndex];
m_Budget.m_BlockBytesAtBudgetFetch[heapIndex] = m_Budget.m_BlockBytes[heapIndex].load();
@ -17713,8 +17655,7 @@ uint32_t VmaAllocator_T::GetGpuDefragmentationMemoryTypeBits()
void VmaAllocator_T::PrintDetailedMap(VmaJsonWriter& json)
{
bool dedicatedAllocationsStarted = false;
for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex)
{
for (const auto memTypeIndex : c10::irange(GetMemoryTypeCount())) {
VmaMutexLockRead dedicatedAllocationsLock(m_DedicatedAllocationsMutex[memTypeIndex], m_UseMutex);
AllocationVectorType* const pDedicatedAllocVector = m_pDedicatedAllocations[memTypeIndex];
VMA_ASSERT(pDedicatedAllocVector);
@ -17751,8 +17692,7 @@ void VmaAllocator_T::PrintDetailedMap(VmaJsonWriter& json)
{
bool allocationsStarted = false;
for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex)
{
for (const auto memTypeIndex : c10::irange(GetMemoryTypeCount())) {
if(m_pBlockVectors[memTypeIndex]->IsEmpty() == false)
{
if(allocationsStarted == false)
@ -17783,8 +17723,7 @@ void VmaAllocator_T::PrintDetailedMap(VmaJsonWriter& json)
{
json.WriteString("Pools");
json.BeginObject();
for(size_t poolIndex = 0; poolIndex < poolCount; ++poolIndex)
{
for (const auto poolIndex : c10::irange(poolCount)) {
json.BeginString();
json.ContinueString(m_Pools[poolIndex]->GetId());
json.EndString();
@ -18425,8 +18364,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryPages(
if(pAllocationInfo != VMA_NULL && result == VK_SUCCESS)
{
for(size_t i = 0; i < allocationCount; ++i)
{
for (const auto i : c10::irange(allocationCount)) {
allocator->GetAllocationInfo(pAllocations[i], pAllocationInfo + i);
}
}

View File

@ -3,6 +3,7 @@
#include <ATen/native/utils/ParamUtils.h>
#include <ATen/native/vulkan/ops/Common.h>
#include <ATen/native/vulkan/api/Utils.h>
#include <c10/util/irange.h>
namespace at {
namespace native {
@ -32,7 +33,7 @@ inline bool is_pointwise(const IntArrayRef filter) {
bool all_lessthan(const IntArrayRef arr, const int t) {
bool retval = true;
for (size_t i = 0; i < arr.size(); i++) {
for (const auto i : c10::irange(arr.size())) {
retval = retval && (arr[i] < t);
}
return retval;
@ -173,8 +174,8 @@ vTensor pack_weights_2d(
for (int64_t src_ic = 0; src_ic < src_filter[Layout::Filter::input]; ++src_ic) {
const int64_t dst_ic4 = src_ic / 4;
for (int64_t src_ih = 0; src_ih < src_kh_sz; ++src_ih) {
for (int64_t src_iw = 0; src_iw < src_kw_sz; ++src_iw) {
for (const auto src_ih : c10::irange(src_kh_sz)) {
for (const auto src_iw : c10::irange(src_kw_sz)) {
memcpy(
dst_weight_c_ptr + (dst_oh * src_kh_sz + src_ih) * dst_kw_sz +
dst_ic4 * src_kw_sz * 4 + src_iw * 4 + src_ic % 4,
@ -225,11 +226,11 @@ vTensor pack_weights_2d_winograd_2_3(
float* const dst_weight_ptr = v_weight_payload.get();
memset(dst_weight_ptr, 0, v_weight.nbytes());
for (int64_t src_oc = 0; src_oc < src_oc_sz; ++src_oc) {
for (const auto src_oc : c10::irange(src_oc_sz)) {
const int64_t dst_oh = src_oc / 4;
const int64_t dst_iw = src_oc % 4;
for (int64_t src_ic = 0; src_ic < src_ic_sz; ++src_ic) {
for (const auto src_ic : c10::irange(src_ic_sz)) {
const int64_t dst_ow = src_ic / 4;
const int64_t dst_c = src_ic % 4;
@ -344,7 +345,7 @@ vTensor pack_biases(
float* const dst_bias_ptr = v_bias_payload.get();
memset(dst_bias_ptr, 0, v_bias.nbytes());
for (int64_t i = 0; i < src_w; ++i) {
for (const auto i : c10::irange(src_w)) {
const int64_t c = i % 4;
const int64_t x = i / 4;
dst_bias_ptr[c * packed_w + x] = src_bias_ptr[i];

View File

@ -1,4 +1,5 @@
#include <ATen/native/vulkan/ops/Mm.h>
#include <c10/util/irange.h>
namespace at {
namespace native {
@ -47,8 +48,8 @@ vTensor pack_weights(
float* const dst_weight_ptr = v_weight_payload.get();
memset(dst_weight_ptr, 0, v_weight.nbytes());
for (int64_t src_h = 0; src_h < src_kh_sz; ++src_h) {
for (int64_t src_w = 0; src_w < src_kw_sz; ++src_w) {
for (const auto src_h : c10::irange(src_kh_sz)) {
for (const auto src_w : c10::irange(src_kw_sz)) {
int64_t dst_plane = 2*(src_h%2) + (src_w%2);
int64_t dst_index = (src_h/2)*dst_kw_sz + (src_w/2);
memcpy(
@ -109,8 +110,8 @@ vTensor pack_biases(
float* const dst_bias_ptr = v_bias_payload.get();
memset(dst_bias_ptr, 0, v_bias.nbytes());
for (int64_t src_h = 0; src_h < src_kh_sz; ++src_h) {
for (int64_t src_w = 0; src_w < src_kw_sz; ++src_w) {
for (const auto src_h : c10::irange(src_kh_sz)) {
for (const auto src_w : c10::irange(src_kw_sz)) {
int64_t dst_plane = 2*(src_h%2) + (src_w%2);
int64_t dst_index = (src_h/2)*dst_kw_sz + (src_w/2);
memcpy(

View File

@ -1,4 +1,5 @@
#include <ATen/native/vulkan/ops/Common.h>
#include <c10/util/irange.h>
#include <torch/library.h>
namespace at {
@ -35,7 +36,7 @@ Tensor reflection_pad2d(const Tensor& self_arg, IntArrayRef padding) {
const vTensor& v_self = convert(self);
c10::SmallVector<int64_t, 4> output_size(input_dim);
for (size_t d = 0; d < input_dim; ++d) {
for (const auto d : c10::irange(input_dim)) {
if (d == input_dim - 1) {
output_size[d] = input_size[d] + pad_right + pad_left;
} else if (d == input_dim - 2) {

View File

@ -7,6 +7,7 @@
#include <ATen/native/utils/Factory.h>
#include <ATen/native/utils/ParamUtils.h>
#include <ATen/native/xnnpack/Convolution.h>
#include <c10/util/irange.h>
namespace at {
namespace native {
@ -150,11 +151,11 @@ const Tensor reorder_weights_for_transpose_conv(const Tensor& weight_nhwc,
float* in_ptr = weight_nhwc.data_ptr<float>();
int out_index = 0;
for (int g = 0; g < num_groups; g++) {
for (int o = 0; o < output_channels_per_group; o++) {
for (int w = 0; w < kernel_width; w++) {
for (int h = 0; h < kernel_height; h++) {
for (int i = 0; i < input_channels_per_group; i++) {
for (const auto g : c10::irange(num_groups)) {
for (const auto o : c10::irange(output_channels_per_group)) {
for (const auto w : c10::irange(kernel_width)) {
for (const auto h : c10::irange(kernel_height)) {
for (const auto i : c10::irange(input_channels_per_group)) {
int in_index = (g*g_offset) + (i*i_offset) + (h*h_offset) + (w*w_offset) + (o*o_offset);
out_ptr[out_index] = in_ptr[in_index];
out_index++;
@ -210,7 +211,7 @@ ContextConv2D create(
if (transposed) {
const Tensor weight_reordered = reorder_weights_for_transpose_conv(weight_nhwc, groups);
for (int i = 0; i < 4; i++) {
for (const auto i : c10::irange(4)) {
weight_sizes[i] = weight_reordered.size(i);
}
create_status = xnn_create_deconvolution2d_nhwc_f32(
@ -238,7 +239,7 @@ ContextConv2D create(
0u, // flags
&convolution_op); // operator
} else {
for (int i = 0; i < 4; i++) {
for (const auto i : c10::irange(4)) {
weight_sizes[i] = weight_nhwc.size(i);
}
create_status = xnn_create_convolution2d_nhwc_f32(

View File

@ -4,6 +4,7 @@
#include <ATen/nnapi/nnapi_bind.h>
#include <ATen/nnapi/nnapi_wrapper.h>
#include <ATen/nnapi/nnapi_model_loader.h>
#include <c10/util/irange.h>
namespace torch {
namespace nnapi {
@ -103,7 +104,7 @@ void NnapiCompilation::run(
TORCH_CHECK((int32_t)inputs.size() == num_inputs_);
TORCH_CHECK((int32_t)outputs.size() == num_outputs_);
for (size_t i = 0; i < inputs.size(); i++) {
for (const auto i : c10::irange(inputs.size())) {
auto& t = inputs[i];
// TODO: Check contiguous and dtype.
ANeuralNetworksOperandType op_type;
@ -117,7 +118,7 @@ void NnapiCompilation::run(
t.nbytes());
}
for (size_t i = 0; i < outputs.size(); i++) {
for (const auto i : c10::irange(outputs.size())) {
auto& t = outputs[i];
// TODO: Check contiguous and dtype.
check_nnapi->Execution_setOutput(
@ -131,7 +132,7 @@ void NnapiCompilation::run(
check_nnapi->Execution_compute(execution);
// TODO: Maybe skip this for fixed-size outputs?
for (size_t i = 0; i < outputs.size(); i++) {
for (const auto i : c10::irange(outputs.size())) {
auto& t = outputs[i];
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
uint32_t rank;

View File

@ -3,6 +3,7 @@
#include <ATen/ATen.h>
#include <ATen/CPUApplyUtils.h>
#include <ATen/test/test_assert.h>
#include <c10/util/irange.h>
#include <iostream>
using namespace std;
@ -10,7 +11,7 @@ using namespace at;
void fill_tensor(int64_t scalar, Tensor& t_) {
auto t = t_.view(-1);
for (int64_t i = 0; i < t.numel(); i++) {
for (const auto i : c10::irange(t.numel())) {
t[i] = (i + 1) * scalar;
}
}
@ -42,7 +43,7 @@ void test(DeprecatedTypeProperties& type, IntArrayRef shape, int64_t a = 0, int6
auto a4 = at::empty({0}, at::TensorOptions(kCPU).dtype(kDouble));
std::vector<Tensor> tensors({a0, a1, a2, a3, a4});
for (size_t i = 0; i < tensors.size(); i++) {
for (const auto i : c10::irange(tensors.size())) {
tensors[i].resize_(shape);
fill_tensor(i + 1, tensors[i]);
if (a >= 0 && b >= 0) {
@ -55,7 +56,7 @@ void test(DeprecatedTypeProperties& type, IntArrayRef shape, int64_t a = 0, int6
a0, a1, [](scalar_t& y, const scalar_t& x) { y = x * x; });
CPU_tensor_apply2<double, scalar_t>(
a4, a1, [](double& y, scalar_t x) { y = (double)(x * x); });
for (int64_t i = 0; i < a0.numel(); i++) {
for (const auto i : c10::irange(a0.numel())) {
auto target = a1.data_ptr<scalar_t>()[i] * a1.data_ptr<scalar_t>()[i];
ASSERT(a0.data_ptr<scalar_t>()[i] == target);
ASSERT(a4.data_ptr<double>()[i] == target);
@ -71,7 +72,7 @@ void test(DeprecatedTypeProperties& type, IntArrayRef shape, int64_t a = 0, int6
a4, a1, a2, [](double& y, const scalar_t& x, const scalar_t& z) {
y = (double)(x * x + z);
});
for (int64_t i = 0; i < a0.numel(); i++) {
for (const auto i : c10::irange(a0.numel())) {
auto target = a1.data_ptr<scalar_t>()[i] * a1.data_ptr<scalar_t>()[i];
target = target + a2.data_ptr<scalar_t>()[i];
ASSERT(a0.data_ptr<scalar_t>()[i] == target);
@ -97,7 +98,7 @@ void test(DeprecatedTypeProperties& type, IntArrayRef shape, int64_t a = 0, int6
[](double& y, const scalar_t& x, const scalar_t& z, const scalar_t& a) {
y = (double)(x * x + z * a);
});
for (int64_t i = 0; i < a0.numel(); i++) {
for (const auto i : c10::irange(a0.numel())) {
auto target = a1.data_ptr<scalar_t>()[i] * a1.data_ptr<scalar_t>()[i];
target = target + a2.data_ptr<scalar_t>()[i] * a3.data_ptr<scalar_t>()[i];
ASSERT(a0.data_ptr<scalar_t>()[i] == target);

View File

@ -1,6 +1,7 @@
#include <gtest/gtest.h>
#include <ATen/ATen.h>
#include <c10/util/irange.h>
#include <iostream>
using namespace std;
@ -102,7 +103,7 @@ void trace() {
auto foo_a = foo.accessor<float, 2>();
float trace = 0;
for (int i = 0; i < foo_a.size(0); i++) {
for (const auto i : c10::irange(foo_a.size(0))) {
trace += foo_a[i][i];
}
@ -237,8 +238,8 @@ TEST_F(atest, atest) {
// foo = foo[3];
auto foo_v = foo.accessor<uint8_t, 2>();
for (int i = 0; i < foo_v.size(0); i++) {
for (int j = 0; j < foo_v.size(1); j++) {
for (const auto i : c10::irange(foo_v.size(0))) {
for (const auto j : c10::irange(foo_v.size(1))) {
foo_v[i][j]++;
}
}

View File

@ -4,6 +4,7 @@
#include <ATen/core/Reduction.h>
#include <torch/cuda.h>
#include <ATen/test/test_assert.h>
#include <c10/util/irange.h>
// for TH compat test only...
struct THFloatTensor;
@ -84,7 +85,8 @@ void TestAdd(DeprecatedTypeProperties& type) {
void TestZeros(DeprecatedTypeProperties& type) {
auto begin = std::chrono::high_resolution_clock::now();
Tensor a = zeros({1024, 1024}, type);
for (int i = 1; i < 1000; ++i) {
for (const auto i : c10::irange(1, 1000)) {
(void)i; // Suppress unused variable warning
a = zeros({128, 128}, type);
}
auto end = std::chrono::high_resolution_clock::now();
@ -102,7 +104,8 @@ void TestLoadsOfAdds(DeprecatedTypeProperties& type) {
auto begin = std::chrono::high_resolution_clock::now();
Tensor d = ones({3, 4}, type);
Tensor r = zeros({3, 4}, type);
for (auto i = 0; i < 100000; i++) {
for (const auto i : c10::irange(100000)) {
(void)i; // Suppress unused variable warning
add_out(r, r, d);
}
auto end = std::chrono::high_resolution_clock::now();
@ -119,7 +122,8 @@ void TestLoadOfAddsWithCopy(DeprecatedTypeProperties& type) {
auto begin = std::chrono::high_resolution_clock::now();
Tensor d = ones({3, 4}, type);
Tensor r = zeros({3, 4}, type);
for (auto i = 0; i < 100000; i++) {
for (const auto i : c10::irange(100000)) {
(void)i; // Suppress unused variable warning
r = add(r, d);
}
auto end = std::chrono::high_resolution_clock::now();
@ -176,7 +180,7 @@ void TestCopyBroadcasting(DeprecatedTypeProperties& type) {
Tensor a = zeros({4, 3}, type);
Tensor e = rand({3}, type);
a.copy_(e);
for (int i = 0; i < 4; ++i) {
for (const auto i : c10::irange(4)) {
ASSERT_TRUE(a[i].equal(e));
}
}
@ -247,13 +251,13 @@ void TestToString() {
void TestIndexingByScalar() {
Tensor tensor = arange(0, 10, kInt);
Tensor one = ones({}, kInt);
for (int64_t i = 0; i < tensor.numel(); ++i) {
for (const auto i : c10::irange(tensor.numel())) {
ASSERT_TRUE(tensor[i].equal(one * i));
}
for (size_t i = 0; i < static_cast<uint64_t>(tensor.numel()); ++i) {
ASSERT_TRUE(tensor[i].equal(one * static_cast<int64_t>(i)));
}
for (int i = 0; i < tensor.numel(); ++i) {
for (const auto i : c10::irange(tensor.numel())) {
ASSERT_TRUE(tensor[i].equal(one * i));
}
// NOLINTNEXTLINE(bugprone-too-small-loop-variable)
@ -272,7 +276,7 @@ void TestIndexingByScalar() {
void TestIndexingByZerodimTensor() {
Tensor tensor = arange(0, 10, kInt);
Tensor one = ones({}, kInt);
for (int i = 0; i < tensor.numel(); ++i) {
for (const auto i : c10::irange(tensor.numel())) {
ASSERT_TRUE(tensor[one * i].equal(one * i));
}
// Throw StartsWith(

View File

@ -4,6 +4,7 @@
#include <ATen/Utils.h>
#include <ATen/CPUGeneratorImpl.h>
#include <ATen/core/PhiloxRNGEngine.h>
#include <c10/util/irange.h>
#include <thread>
#include <limits>
#include <random>
@ -160,7 +161,8 @@ TEST(CPUGeneratorImpl, TestPhiloxEngineOffset1) {
// So if you want to skip 8 values, offset would
// be 2, since 2*4=8.
at::Philox4_32_10 engine2(123, 1, 2);
for(int i = 0; i < 8; i++){
for (const auto i : c10::irange(8)) {
(void)i; // Suppress unused variable warning
// Note: instead of using the engine() call 8 times
// we could have achieved the same functionality by
// calling the incr() function twice.
@ -221,14 +223,16 @@ TEST(CPUGeneratorImpl, TestMT19937EngineReproducibility) {
// test with zero seed
at::mt19937 engine1(0);
std::mt19937 engine2(0);
for(int i = 0; i < 10000; i++) {
for (const auto i : c10::irange(10000)) {
(void)i; // Suppress unused variable warning
ASSERT_EQ(engine1(), engine2());
}
// test with large seed
engine1 = at::mt19937(2147483647);
engine2 = std::mt19937(2147483647);
for(int i = 0; i < 10000; i++) {
for (const auto i : c10::irange(10000)) {
(void)i; // Suppress unused variable warning
ASSERT_EQ(engine1(), engine2());
}
@ -237,7 +241,8 @@ TEST(CPUGeneratorImpl, TestMT19937EngineReproducibility) {
auto seed = rd();
engine1 = at::mt19937(seed);
engine2 = std::mt19937(seed);
for(int i = 0; i < 10000; i++) {
for (const auto i : c10::irange(10000)) {
(void)i; // Suppress unused variable warning
ASSERT_EQ(engine1(), engine2());
}

View File

@ -2,6 +2,7 @@
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
#include <c10/util/irange.h>
#include <caffe2/core/init.h>
#include <caffe2/core/operator.h>
#include <caffe2/core/context_gpu.h>
@ -34,7 +35,7 @@ TEST(CUDACaffe2ToPytorch, SimpleLegacy) {
auto at_cpu = at_tensor.cpu();
auto it = at_cpu.data_ptr<int64_t>();
for (int64_t i = 0; i < 16; i++) {
for (const auto i : c10::irange(16)) {
ASSERT_EQ(it[i], 777);
}
}
@ -53,7 +54,7 @@ TEST(CUDACaffe2ToPytorch, Simple) {
auto at_cpu = at_tensor.cpu();
auto it = at_cpu.data_ptr<int64_t>();
for (int64_t i = 0; i < 16; i++) {
for (const auto i : c10::irange(16)) {
ASSERT_EQ(it[i], 777);
}
}
@ -109,7 +110,7 @@ TEST(CUDAPytorchToCaffe2, Op) {
ASSERT_EQ(result.GetDeviceType(), caffe2::CUDA);
auto data = result.data<float>();
for (int64_t i = 0; i < 25; i++) {
for (const auto i : c10::irange(25)) {
ASSERT_EQ(cuda_get(data + i), 3.0);
}
at::Tensor at_result(result);

View File

@ -3,6 +3,7 @@
#include <gtest/gtest.h>
#include <torch/torch.h>
#include <c10/util/intrusive_ptr.h>
#include <c10/util/irange.h>
#include <ATen/core/Dict.h>
// Snippets for checking assembly.
@ -643,7 +644,7 @@ TEST(IValueTest, IdentityComparisonAndHashing) {
auto moreSampleIValues = makeMoreSampleIValues();
ASSERT_EQ(sampleIValues.size(), moreSampleIValues.size());
for (int ii = 0; ii < sampleIValues.size(); ++ii) {
for (const auto ii : c10::irange(sampleIValues.size())) {
if (sampleIValues[ii].isComplexDouble() ||
sampleIValues[ii].isBlob() ||
sampleIValues[ii].isList() ||

View File

@ -2,6 +2,7 @@
#include <ATen/ATen.h>
#include <ATen/CPUFunctions.h>
#include <c10/util/irange.h>
using namespace at;
@ -115,7 +116,7 @@ TEST(MathKernelTest, MishBackward) {
TEST(MathKernelTest, NarrowCopy) {
auto x = rand({5, 8, 7});
for (int64_t dim = 0; dim < 3; ++dim) {
for (const auto dim : c10::irange(3)) {
const int64_t start = 1, length = 4;
auto y_ref = x.narrow(dim, start, length);
auto y_test = at::native::narrow_copy_dense(x, dim, start, length);

View File

@ -1,6 +1,7 @@
#include <gtest/gtest.h>
#include <ATen/ATen.h>
#include <c10/util/irange.h>
using namespace at;
@ -16,7 +17,7 @@ using namespace at;
void requireEqualTensorList(TensorList t1, TensorList t2) {
ASSERT_EQ(t1.size(), t2.size());
for (size_t i = 0; i < t1.size(); ++i) {
for (const auto i : c10::irange(t1.size())) {
ASSERT_EQUAL(t1[i], t2[i]);
}
}
@ -74,7 +75,7 @@ void TestStack(TensorOptions T, Tensor& t) {
auto z = rand({2, 3, 4});
auto inputs = {x, y, z};
for (int64_t dim = 0; dim < 4; ++dim) {
for (const auto dim : c10::irange(4)) {
_test_stack(inputs, dim, at::stack);
}
}
@ -85,7 +86,7 @@ void TestStack(TensorOptions T, Tensor& t) {
auto z = rand({2, 3, 4});
auto inputs = {x, y, z};
for (int64_t dim = 0; dim < 4; ++dim) {
for (const auto dim : c10::irange(4)) {
_test_stack(inputs, dim, at::native::_stack);
}
}
@ -96,7 +97,7 @@ void TestStack(TensorOptions T, Tensor& t) {
auto z = rand({2, 3, 4});
auto inputs = {x, y, z};
for (int64_t dim = 0; dim < 4; ++dim) {
for (const auto dim : c10::irange(4)) {
_test_stack(inputs, dim, at::native::_stack_cpu);
}
}

View File

@ -1,6 +1,7 @@
#include <ATen/Operators.h>
#include <ATen/test/test_assert.h>
#include <c10/util/Exception.h>
#include <c10/util/irange.h>
#include <gtest/gtest.h>
#include <ATen/ATen.h>
@ -34,7 +35,7 @@ TEST(PackedtensoraccessorTest, TransposeTest) {
t = rand({size}, CPU(kFloat));
auto original_1d = t.packed_accessor64<float, 1, DefaultPtrTraits>();
auto transposed_1d = original_1d.transpose(0, 0);
for (int i = 0; i < size; i++){
for (const auto i : c10::irange(size)) {
ASSERT_EQ(original_1d[i], transposed_1d[i]);
}

View File

@ -1,6 +1,7 @@
#include <gtest/gtest.h>
#include <ATen/native/Pow.h>
#include <c10/util/irange.h>
#include <torch/types.h>
#include <torch/utils.h>
@ -203,7 +204,7 @@ void tensor_pow_tensor(const Vals vals, c10::ScalarType vals_dtype, Pows pows, c
std::cout.precision(dbl::max_digits10);
const auto vals_tensor = torch::tensor(vals, vals_dtype);
for (size_t shift = 0; shift < pows.size(); shift++) {
for (const auto shift : c10::irange(pows.size())) {
const auto pows_tensor = torch::tensor(pows, pows_dtype);
const auto actual_pow = vals_tensor.pow(pows_tensor);

View File

@ -11,6 +11,7 @@
// For quantize_val
#include <ATen/native/quantized/affine_quantizer.h>
#include <c10/core/ScalarType.h>
#include <c10/util/irange.h>
#include <ATen/quantized/Quantizer.h>
using namespace at;
@ -30,14 +31,14 @@ TEST(TestQTensor, QuantDequantAPIs) {
// int_repr
Tensor int_repr = qr.int_repr();
auto* int_repr_data = int_repr.data_ptr<uint8_t>();
for (auto i = 0; i < num_elements; ++i) {
for (const auto i : c10::irange(num_elements)) {
ASSERT_EQ(int_repr_data[i], 3);
}
// Check for correct quantization
auto r_data = r.data_ptr<float>();
auto qr_data = qr.data_ptr<quint8>();
for (auto i = 0; i < num_elements; ++i) {
for (const auto i : c10::irange(num_elements)) {
ASSERT_EQ(
native::quantize_val<quint8>(scale, zero_point, r_data[i]).val_,
qr_data[i].val_);
@ -46,10 +47,10 @@ TEST(TestQTensor, QuantDequantAPIs) {
// Check for correct dequantization
Tensor rqr = qr.dequantize();
auto rqr_data = rqr.data_ptr<float>();
for (auto i = 0; i < num_elements; ++i) {
for (const auto i : c10::irange(num_elements)) {
ASSERT_EQ(r_data[i], rqr_data[i]);
}
for (auto i = 0; i < num_elements; ++i) {
for (const auto i : c10::irange(num_elements)) {
ASSERT_EQ(
r_data[i],
native::dequantize_val(qr.q_scale(), qr.q_zero_point(), qr_data[i]));
@ -60,7 +61,7 @@ TEST(TestQTensor, QuantDequantAPIs) {
int64_t new_zero_point = 1;
Tensor reqr = at::quantize_per_tensor(r, new_scale, new_zero_point, kQInt8);
auto reqr_data = reqr.data_ptr<qint8>();
for (auto i = 0; i < num_elements; ++i) {
for (const auto i : c10::irange(num_elements)) {
reqr_data[i].val_ =
native::requantize_val<quint8, qint8>(
scale, zero_point, new_scale, new_zero_point, qr_data[i])
@ -85,7 +86,7 @@ TEST(TestQTensor, RoundingMode) {
Tensor qx = at::quantize_per_tensor(x, /*scale=*/1.0, zero_point, kQUInt8);
auto qx_data = qx.data_ptr<quint8>();
for (size_t idx = 0; idx < x_values.size(); ++idx) {
for (const auto idx : c10::irange(x_values.size())) {
ASSERT_EQ(qx_expect[idx], qx_data[idx].val_)
<< "Tie breaking during rounding element " << idx << " failed!";
}
@ -108,14 +109,14 @@ TEST(TestQTensor, EmptyQuantized) {
{numel}, at::device(at::kCPU).dtype(kQUInt8), scale, zero_point);
// Assigning to QTensor
auto* q_data = q.data_ptr<quint8>();
for (int i = 0; i < numel; ++i) {
for (const auto i : c10::irange(numel)) {
q_data[i].val_ = val;
}
// dequantize
auto r = q.dequantize();
auto* r_data = r.data_ptr<float>();
for (int i = 0; i < numel; ++i) {
for (const auto i : c10::irange(numel)) {
ASSERT_EQ(r_data[i], (val - zero_point) * scale);
}
}
@ -134,14 +135,14 @@ TEST(TestQTensor, EmptyPerchannelQuantized) {
at::device(at::kCPU).dtype(kQUInt8));
// Assigning to QTensor
auto* q_data = q.data_ptr<quint8>();
for (int i = 0; i < numel; ++i) {
for (const auto i : c10::irange(numel)) {
q_data[i].val_ = val;
}
// dequantize
auto r = q.dequantize();
auto* r_data = r.data_ptr<float>();
for (int i = 0; i < numel; ++i) {
for (const auto i : c10::irange(numel)) {
ASSERT_EQ(
r_data[i],
(val - zero_points[i].item().to<int>()) * scales[i].item().to<float>());
@ -222,7 +223,7 @@ TEST(TestQTensor, FromBlobQuantizedPerTensor) {
custom_vec->reserve(numel);
uint8_t* custom_data = custom_vec->data();
for (auto i = 0; i < numel; ++i) {
for (const auto i : c10::irange(numel)) {
custom_data[i] = i;
}
bool customDataDeleted{false};
@ -236,7 +237,7 @@ TEST(TestQTensor, FromBlobQuantizedPerTensor) {
Tensor qtensor = at::from_blob_quantized_per_tensor_affine(custom_data, shape, deleter, scale, zero_point, options);
uint8_t* q_data = (uint8_t*)qtensor.data_ptr<quint8>();
for (auto i = 0; i < numel; ++i) {
for (const auto i : c10::irange(numel)) {
ASSERT_EQ((int)custom_data[i], (int)q_data[i]);
}
ASSERT_EQ((float)qtensor.q_scale(), (float)scale);
@ -258,7 +259,7 @@ TEST(TestQTensor, FromBlobQuantizedPerChannel) {
custom_vec->reserve(numel);
uint8_t* custom_data = custom_vec->data();
for (auto i = 0; i < numel; ++i) {
for (const auto i : c10::irange(numel)) {
custom_data[i] = i;
}
bool customDataDeleted{false};
@ -271,7 +272,7 @@ TEST(TestQTensor, FromBlobQuantizedPerChannel) {
{
Tensor qtensor = at::from_blob_quantized_per_channel_affine(custom_data, shape, deleter, scales, zero_points, ch_axis, options);
uint8_t* q_data = (uint8_t*)qtensor.data_ptr<quint8>();
for (auto i = 0; i < numel; ++i) {
for (const auto i : c10::irange(numel)) {
ASSERT_EQ((int)custom_data[i], (int)q_data[i]);
}
ASSERT_TRUE(at::allclose(qtensor.q_per_channel_scales(), scales));

View File

@ -1,6 +1,7 @@
#include <gtest/gtest.h>
#include <ATen/ATen.h>
#include <c10/util/irange.h>
#include <caffe2/core/init.h>
#include <caffe2/core/operator.h>
@ -8,13 +9,13 @@ TEST(Caffe2ToPytorch, SimpleLegacy) {
caffe2::Tensor c2_tensor(caffe2::CPU);
c2_tensor.Resize(4, 4);
auto data = c2_tensor.mutable_data<int64_t>();
for (int64_t i = 0; i < 16; i++) {
for (const auto i : c10::irange(16)) {
data[i] = i;
}
at::Tensor at_tensor(c2_tensor);
auto it = at_tensor.data_ptr<int64_t>();
for (int64_t i = 0; i < 16; i++) {
for (const auto i : c10::irange(16)) {
ASSERT_EQ(it[i], i);
}
}
@ -22,13 +23,13 @@ TEST(Caffe2ToPytorch, SimpleLegacy) {
TEST(Caffe2ToPytorch, Simple) {
caffe2::Tensor c2_tensor = caffe2::empty({4, 4}, at::kLong);
auto data = c2_tensor.mutable_data<int64_t>();
for (int64_t i = 0; i < 16; i++) {
for (const auto i : c10::irange(16)) {
data[i] = i;
}
at::Tensor at_tensor(c2_tensor);
auto it = at_tensor.data_ptr<int64_t>();
for (int64_t i = 0; i < 16; i++) {
for (const auto i : c10::irange(16)) {
ASSERT_EQ(it[i], i);
}
}
@ -37,7 +38,7 @@ TEST(Caffe2ToPytorch, ExternalData) {
caffe2::Tensor c2_tensor = caffe2::empty({4, 4}, at::kLong);
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays,cppcoreguidelines-avoid-magic-numbers)
int64_t buf[16];
for (int64_t i = 0; i < 16; i++) {
for (const auto i : c10::irange(16)) {
buf[i] = i;
}
c2_tensor.ShareExternalPointer(buf, 16 * sizeof(int64_t));
@ -48,7 +49,7 @@ TEST(Caffe2ToPytorch, ExternalData) {
at_tensor.permute({1, 0});
at_tensor.permute({1, 0});
auto it = at_tensor.data_ptr<int64_t>();
for (int64_t i = 0; i < 16; i++) {
for (const auto i : c10::irange(16)) {
ASSERT_EQ(it[i], i);
}
ASSERT_FALSE(at_tensor.storage().resizable());
@ -60,7 +61,7 @@ TEST(Caffe2ToPytorch, Op) {
caffe2::Tensor c2_tensor(caffe2::CPU);
c2_tensor.Resize(3, 3);
auto data = c2_tensor.mutable_data<int64_t>();
for (int64_t i = 0; i < 9; i++) {
for (const auto i : c10::irange(9)) {
data[i] = i;
}
at::Tensor at_tensor(c2_tensor);
@ -107,7 +108,7 @@ TEST(Caffe2ToPytorch, PartiallyInitialized) {
TEST(Caffe2ToPytorch, MutualResizes) {
caffe2::Tensor c2_tensor = caffe2::empty({5, 5}, at::kFloat);
auto data = c2_tensor.mutable_data<float>();
for (int64_t i = 0; i < 25; i++) {
for (const auto i : c10::irange(25)) {
data[i] = 0;
}
@ -171,7 +172,7 @@ TEST(PytorchToCaffe2, Op) {
auto result = XBlobGetMutableTensor(workspace.CreateBlob("d"), {5, 5}, at::kCPU);
auto it = result.data<float>();
for (int64_t i = 0; i < 25; i++) {
for (const auto i : c10::irange(25)) {
ASSERT_EQ(it[i], 3.0);
}
at::Tensor at_result(result);
@ -202,7 +203,7 @@ TEST(PytorchToCaffe2, SharedStorageRead) {
auto result = XBlobGetMutableTensor(workspace.CreateBlob("c"), {5, 5}, at::kCPU);
auto it = result.data<float>();
for (int64_t i = 0; i < 25; i++) {
for (const auto i : c10::irange(25)) {
ASSERT_EQ(it[i], 2.0);
}
at::Tensor at_result(result);
@ -259,7 +260,7 @@ TEST(PytorchToCaffe2, Strided) {
ASSERT_ANY_THROW(caffe2::Tensor c2_tensor(at_tensor));
// but calling contiguous is fine
caffe2::Tensor c2_tensor(at_tensor.contiguous());
for (int64_t i = 0; i < 25; i++) {
for (const auto i : c10::irange(25)) {
ASSERT_EQ(c2_tensor.data<float>()[i], 1.0);
}
}

View File

@ -1,5 +1,6 @@
#include <ATen/ATen.h>
#include <ATen/Parallel.h>
#include <c10/util/irange.h>
#include <test/cpp/tensorexpr/test_base.h>
#include <thread>
@ -13,7 +14,8 @@ void test(int given_num_threads) {
ASSERT_TRUE(given_num_threads >= 0);
ASSERT_EQ(at::get_num_threads(), given_num_threads);
auto t_sum = t.sum();
for (int i = 0; i < 1000; ++i) {
for (const auto i : c10::irange(1000)) {
(void)i; // Suppress unused variable warning
t_sum = t_sum + t.sum();
}
}

View File

@ -1,4 +1,5 @@
#include <ATen/test/vec_test_all_types.h>
#include <c10/util/irange.h>
namespace {
#if GTEST_HAS_TYPED_TEST
template <typename T>
@ -455,7 +456,7 @@ namespace {
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
CACHE_ALIGN VT expected_vals[vec::size()];
auto vals = 1 << (vec::size());
for (int val = 0; val < vals; ++val) {
for (const auto val : c10::irange(vals)) {
for (int i = 0; i < vec::size(); ++i) {
if (val & (1 << i)) {
test_vals[i] = std::numeric_limits<VT>::quiet_NaN();
@ -747,7 +748,7 @@ namespace {
CACHE_ALIGN VT test_vals[vec::size()];
//all sets will be within 0 2^(n-1)
auto power_sets = 1 << (vec::size());
for (int expected = 0; expected < power_sets; expected++) {
for (const auto expected : c10::irange(power_sets)) {
// generate test_val based on expected
for (int i = 0; i < vec::size(); ++i)
{
@ -894,7 +895,7 @@ namespace {
void blend_init(T(&a)[N], T(&b)[N]) {
a[0] = (T)1.0;
b[0] = a[0] + (T)N;
for (int i = 1; i < N; i++) {
for (const auto i : c10::irange(1, N)) {
a[i] = a[i - 1] + (T)(1.0);
b[i] = b[i - 1] + (T)(1.0);
}
@ -905,7 +906,7 @@ namespace {
auto add = Complex<float>(1., 100.);
a[0] = Complex<float>(1., 100.);
b[0] = Complex<float>(5., 1000.);
for (int i = 1; i < 4; i++) {
for (const auto i : c10::irange(1, 4)) {
a[i] = a[i - 1] + add;
b[i] = b[i - 1] + add;
}
@ -1051,7 +1052,8 @@ namespace {
float minv = static_cast<float>(static_cast<double>(min_val) * 2.0);
float maxv = static_cast<float>(static_cast<double>(max_val) * 2.0);
ValueGen<float> gen(minv, maxv, seed.add(2));
for (int i = 0; i < trials; i++) {
for (const auto i : c10::irange(trials)) {
(void)i; // Suppress unused variable warning
float scale = generator_sc.get();
float inv_scale = 1.0f / static_cast<float>(scale);
auto zero_point_val = generator_zp.get();
@ -1088,7 +1090,8 @@ namespace {
ValueGen<int> generator(min_val, max_val, seed.add(1));
//scale
ValueGen<float> generator_sc(1.f, 15.f, seed.add(2));
for (int i = 0; i < trials; i++) {
for (const auto i : c10::irange(trials)) {
(void)i; // Suppress unused variable warning
float scale = generator_sc.get();
int32_t zero_point_val = generator.get();
float scale_zp_premul = -(scale * zero_point_val);
@ -1135,7 +1138,8 @@ namespace {
ValueGen<int32_t> generator(min_val, max_val, seed);
//scale
ValueGen<float> generator_sc(1.f, 15.f, seed.add(1));
for (int i = 0; i < trials; i++) {
for (const auto i : c10::irange(trials)) {
(void)i; // Suppress unused variable warning
float multiplier = 1.f / (generator_sc.get());
auto zero_point_val = generator.get();
int index = 0;
@ -1172,7 +1176,8 @@ namespace {
typename vec::int_vec_return_type expected_int_ret;
auto seed = TestSeed();
ValueGen<underlying> generator(min_val, max_val, seed);
for (int i = 0; i < trials; i++) {
for (const auto i : c10::irange(trials)) {
(void)i; // Suppress unused variable warning
//generate vals
for (int j = 0; j < vec::size(); j++) {
qint_vals[j] = generator.get();
@ -1251,7 +1256,7 @@ namespace {
CACHE_ALIGN VT ref_y[N];
auto seed = TestSeed();
ValueGen<VT> generator(VT(-100), VT(100), seed);
for (int64_t i = 0; i < N; i++) {
for (const auto i : c10::irange(N)) {
x1[i] = generator.get();
x2[i] = generator.get();
x3[i] = generator.get();
@ -1263,19 +1268,19 @@ namespace {
};
// test map: y = x1
at::vec::map<VT>([](vec x) { return x; }, y, x1, N);
for (int64_t i = 0; i < N; i++) { ref_y[i] = x1[i]; }
for (const auto i : c10::irange(N)) { ref_y[i] = x1[i]; }
cmp(y, ref_y);
// test map2: y = x1 + x2
at::vec::map2<VT>([](vec x1, vec x2) { return x1 + x2; }, y, x1, x2, N);
for (int64_t i = 0; i < N; i++) { ref_y[i] = x1[i] + x2[i]; }
for (const auto i : c10::irange(N)) { ref_y[i] = x1[i] + x2[i]; }
cmp(y, ref_y);
// test map3: y = x1 + x2 + x3
at::vec::map3<VT>([](vec x1, vec x2, vec x3) { return x1 + x2 + x3; }, y, x1, x2, x3, N);
for (int64_t i = 0; i < N; i++) { ref_y[i] = x1[i] + x2[i] + x3[i]; }
for (const auto i : c10::irange(N)) { ref_y[i] = x1[i] + x2[i] + x3[i]; }
cmp(y, ref_y);
// test map4: y = x1 + x2 + x3 + x4
at::vec::map4<VT>([](vec x1, vec x2, vec x3, vec x4) { return x1 + x2 + x3 + x4; }, y, x1, x2, x3, x4, N);
for (int64_t i = 0; i < N; i++) { ref_y[i] = x1[i] + x2[i] + x3[i] + x4[i]; }
for (const auto i : c10::irange(N)) { ref_y[i] = x1[i] + x2[i] + x3[i] + x4[i]; }
cmp(y, ref_y);
}
TYPED_TEST(FunctionalBF16Tests, Reduce) {
@ -1294,7 +1299,7 @@ namespace {
CACHE_ALIGN VT x_b3[N];
auto seed = TestSeed();
ValueGen<RT> generator(RT(-1), RT(1), seed);
for (int64_t i = 0; i < N; i++) {
for (const auto i : c10::irange(N)) {
x_f1[i] = generator.get();
x_f2[i] = generator.get();
x_f3[i] = generator.get();
@ -1362,7 +1367,7 @@ namespace {
CACHE_ALIGN VT y_b[N];
auto seed = TestSeed();
ValueGen<RT> generator(RT(-1), RT(1), seed);
for (int64_t i = 0; i < N; i++) {
for (const auto i : c10::irange(N)) {
x_f1[i] = generator.get();
x_f2[i] = generator.get();
x_f3[i] = generator.get();
@ -1379,7 +1384,7 @@ namespace {
for (int64_t len = 1; len <= N; len++) {
at::vec::map<RT>([](auto x) { return x; }, y_f, x_f1, len);
at::vec::map<VT>([](auto x) { return x; }, y_b, x_b1, len);
for (int64_t i = 0; i < len; i++) {
for (const auto i : c10::irange(len)) {
ASSERT_TRUE(cmp(y_f[i], y_b[i])) << "Failure Details:\nTest Seed to reproduce: " << seed
<< "\nmap, Length: " << len << "; index: " << i << "; fp32 reference: " << y_f[i] << "; bf16 value: " << RT(y_b[i]);
}
@ -1388,7 +1393,7 @@ namespace {
for (int64_t len = 1; len <= N; len++) {
at::vec::map2<RT>([](auto x, auto y) { return x + y; }, y_f, x_f1, x_f2, len);
at::vec::map2<VT>([](auto x, auto y) { return x + y; }, y_b, x_b1, x_b2, len);
for (int64_t i = 0; i < len; i++) {
for (const auto i : c10::irange(len)) {
ASSERT_TRUE(cmp(y_f[i], y_b[i])) << "Failure Details:\nTest Seed to reproduce: " << seed
<< "\nmap2, Length: " << len << "; index: " << i << "; fp32 reference: " << y_f[i] << "; bf16 value: " << RT(y_b[i]);
}
@ -1397,7 +1402,7 @@ namespace {
for (int64_t len = 1; len <= N; len++) {
at::vec::map3<RT>([](auto x, auto y, auto z) { return x + y * z; }, y_f, x_f1, x_f2, x_f3, len);
at::vec::map3<VT>([](auto x, auto y, auto z) { return x + y * z; }, y_b, x_b1, x_b2, x_b3, len);
for (int64_t i = 0; i < len; i++) {
for (const auto i : c10::irange(len)) {
ASSERT_TRUE(cmp(y_f[i], y_b[i])) << "Failure Details:\nTest Seed to reproduce: " << seed
<< "\nmap3, Length: " << len << "; index: " << i << "; fp32 reference: " << y_f[i] << "; bf16 value: " << RT(y_b[i]);
}
@ -1406,7 +1411,7 @@ namespace {
for (int64_t len = 1; len <= N; len++) {
at::vec::map4<RT>([](auto x, auto y, auto z, auto w) { return x + y * z - w; }, y_f, x_f1, x_f2, x_f3, x_f4, len);
at::vec::map4<VT>([](auto x, auto y, auto z, auto w) { return x + y * z - w; }, y_b, x_b1, x_b2, x_b3, x_b4, len);
for (int64_t i = 0; i < len; i++) {
for (const auto i : c10::irange(len)) {
ASSERT_TRUE(cmp(y_f[i], y_b[i])) << "Failure Details:\nTest Seed to reproduce: " << seed
<< "\nmap4, Length: " << len << "; index: " << i << "; fp32 reference: " << y_f[i] << "; bf16 value: " << RT(y_b[i]);
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <ATen/cpu/vec/vec.h>
#include <ATen/cpu/vec/functional.h>
#include <c10/util/irange.h>
#include <gtest/gtest.h>
#include <chrono>
#include <exception>
@ -869,8 +870,7 @@ public:
act.store(actArr);
if (bitwise)
{
for (int i = 0; i < sizeX; i++)
{
for (const auto i : c10::irange(sizeX)) {
BVT b_exp = bit_cast<BVT>(expArr[i]);
BVT b_act = bit_cast<BVT>(actArr[i]);
EXPECT_EQ(b_exp, b_act) << getDetail(i / unitStorageCount);
@ -880,8 +880,7 @@ public:
}
else if (checkWithTolerance)
{
for (int i = 0; i < sizeX; i++)
{
for (const auto i : c10::irange(sizeX)) {
EXPECT_EQ(nearlyEqual<UVT>(expArr[i], actArr[i], absErr), true) << expArr[i] << "!=" << actArr[i] << "\n" << getDetail(i / unitStorageCount);
if (::testing::Test::HasFailure())
return true;
@ -889,8 +888,7 @@ public:
}
else
{
for (int i = 0; i < sizeX; i++)
{
for (const auto i : c10::irange(sizeX)) {
if (std::is_same<UVT, float>::value)
{
if (!check_both_nan(expArr[i], actArr[i])) {
@ -952,8 +950,9 @@ void test_unary(
UVT start = dmn_argc > 0 ? dmn.ArgsDomain[0].start : default_start;
UVT end = dmn_argc > 0 ? dmn.ArgsDomain[0].end : default_end;
ValueGen<VT> generator(start, end, seed.add(changeSeedBy));
for (int trial = 0; trial < trialCount; trial++) {
for (int k = 0; k < el_count; k++) {
for (const auto trial : c10::irange(trialCount)) {
(void)trial; // Suppress unused variable warning
for (const auto k : c10::irange(el_count)) {
vals[k] = generator.get();
call_filter(filter, vals[k]);
//map operator
@ -1011,8 +1010,9 @@ void test_binary(
UVT end1 = dmn_argc > 1 ? dmn.ArgsDomain[1].end : default_end;
ValueGen<VT> generator0(start0, end0, seed.add(changeSeedBy));
ValueGen<VT> generator1(start1, end1, seed.add(changeSeedBy + 1));
for (int trial = 0; trial < trialCount; trial++) {
for (int k = 0; k < el_count; k++) {
for (const auto trial : c10::irange(trialCount)) {
(void)trial; // Suppress unused variable warning
for (const auto k : c10::irange(el_count)) {
vals0[k] = generator0.get();
vals1[k] = generator1.get();
call_filter(filter, vals0[k], vals1[k]);
@ -1076,8 +1076,9 @@ void test_ternary(
ValueGen<VT> generator1(start1, end1, seed.add(changeSeedBy + 1));
ValueGen<VT> generator2(start2, end2, seed.add(changeSeedBy + 2));
for (int trial = 0; trial < trialCount; trial++) {
for (int k = 0; k < el_count; k++) {
for (const auto trial : c10::irange(trialCount)) {
(void)trial; // Suppress unused variable warning
for (const auto k : c10::irange(el_count)) {
vals0[k] = generator0.get();
vals1[k] = generator1.get();
vals2[k] = generator2.get();

View File

@ -3,6 +3,7 @@
#include <ATen/ATen.h>
#include <ATen/core/Vitals.h>
#include <c10/util/irange.h>
#include <cstdlib>
using namespace at::vitals;
@ -62,7 +63,7 @@ TEST(Vitals, MultiString) {
}
TEST(Vitals, OnAndOff) {
for (auto i = 0; i < 2; ++i) {
for (const auto i : c10::irange(2)) {
std::stringstream buffer;
std::streambuf* sbuf = std::cout.rdbuf();

View File

@ -3,6 +3,7 @@
#include <ATen/ATen.h>
#include <ATen/BatchedTensorImpl.h>
#include <ATen/VmapTransforms.h>
#include <c10/util/irange.h>
using namespace at;
@ -55,7 +56,7 @@ TEST(VmapTest, TestBatchedTensor) {
// returns {{lvl=0,dim=0}, {lvl=1,dim=1}, ..., {lvl=kVmapNumLevels-1,dim=kVmapNumLevels-1}};
static BatchDims maxBatchDimsAtFront() {
BatchDims result;
for (int64_t lvl = 0; lvl < kVmapNumLevels; lvl++) {
for (const auto lvl : c10::irange(kVmapNumLevels)) {
result.emplace_back(lvl, /*dim=*/lvl);
}
return result;
@ -169,7 +170,8 @@ TEST(VmapTest, TestBatchedTensorActualDim) {
{
// ActualDim on kVmapMaxTensorDims sized underlying tensor
auto tensor = ones({});
for (int64_t i = 0; i < kVmapMaxTensorDims; i++) {
for (const auto i : c10::irange(kVmapMaxTensorDims)) {
(void)i; // Suppress unused variable warning
tensor = tensor.unsqueeze(0);
}
ASSERT_EQ(tensor.dim(), kVmapMaxTensorDims);
@ -260,7 +262,7 @@ TEST(VmapTest, TestMultiBatchVmapTransform) {
BatchDims batch_dims = {
{0, 2}, {1, 1}, {2, kVmapNumLevels - 1}, {3, 5}, {4, 0}, {5, 3}, {6, 4}
};
for (int64_t level = 7; level < kVmapNumLevels; level++ ) {
for (const auto level : c10::irange(7, kVmapNumLevels)) {
batch_dims.emplace_back(level, /*dim=*/level - 1);
}
auto tensor = ones(sizes);
@ -303,7 +305,7 @@ TEST(VmapTest, TestVmapPhysicalViewGetPhysicalDims) {
static void checkBatchDimsEqual(BatchDimsRef bdims, BatchDimsRef expected_bdims) {
ASSERT_EQ(bdims.size(), expected_bdims.size());
for (int64_t idx = 0; idx < bdims.size(); idx++) {
for (const auto idx : c10::irange(bdims.size())) {
ASSERT_EQ(bdims[idx].dim(), expected_bdims[idx].dim());
ASSERT_EQ(bdims[idx].level(), expected_bdims[idx].level());
}
@ -394,7 +396,7 @@ TEST(VmapTest, TestBatchedTensorSum) {
static void checkBroadcastingVmapTransform(TensorList inputs, TensorList expected_outputs) {
auto outputs = BroadcastingVmapTransform::logicalToPhysical(inputs);
ASSERT_EQ(outputs.size(), expected_outputs.size());
for (int64_t idx = 0; idx < outputs.size(); idx++) {
for (const auto idx : c10::irange(outputs.size())) {
const auto& output = outputs[idx].tensor();
ASSERT_EQ(output.data_ptr(), expected_outputs[idx].data_ptr());
ASSERT_TRUE(at::allclose(output, expected_outputs[idx]));
@ -878,7 +880,7 @@ TEST(VmapTest, TestBatchedTensorPermute) {
static void checkMultiBatchVmapTransform(TensorList inputs, TensorList expected_outputs) {
auto outputs = MultiBatchVmapTransform::logicalToPhysical(inputs);
ASSERT_EQ(outputs.size(), expected_outputs.size());
for (int64_t idx = 0; idx < outputs.size(); idx++) {
for (const auto idx : c10::irange(outputs.size())) {
const auto& output = outputs[idx].tensor();
ASSERT_EQ(output.data_ptr(), expected_outputs[idx].data_ptr());
ASSERT_EQ(output.sizes(), expected_outputs[idx].sizes());

View File

@ -5,6 +5,7 @@
#include <ATen/ATen.h>
#include <ATen/core/dispatch/Dispatcher.h>
#include <ATen/vulkan/Context.h>
#include <c10/util/irange.h>
bool checkRtol(const at::Tensor& diff, const std::vector<at::Tensor> inputs) {
double maxValue = 0.0;
@ -145,7 +146,7 @@ TEST(VulkanTest, addScalar) {
auto t_in = at::rand({3, 2, 2, 3}, at::device(at::kCPU).dtype(at::kFloat));
float* data = t_in.data_ptr<float>();
auto numel = t_in.numel();
for (int i = 0; i < numel; i++) {
for (const auto i : c10::irange(numel)) {
// NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions)
data[i] = i;
}
@ -772,7 +773,7 @@ TEST(VulkanTest, tensor5d_transpose) {
at::empty({1, 2, 3, 2, 1}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
float* data = t_in.data_ptr<float>();
auto numel = t_in.numel();
for (int i = 0; i < numel; i++) {
for (const auto i : c10::irange(numel)) {
// NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions)
data[i] = i;
}
@ -816,7 +817,7 @@ TEST(VulkanTest, slice) {
at::empty({1, 4, 2, 2}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
float* data = t_in.data_ptr<float>();
auto numel = t_in.numel();
for (int i = 0; i < numel; i++) {
for (const auto i : c10::irange(numel)) {
// NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions)
data[i] = i;
}
@ -841,7 +842,7 @@ TEST(VulkanTest, select) {
at::empty({1, 4, 2, 2}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
float* data = t_in.data_ptr<float>();
auto numel = t_in.numel();
for (int i = 0; i < numel; i++) {
for (const auto i : c10::irange(numel)) {
// NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions)
data[i] = i;
}
@ -866,7 +867,7 @@ TEST(VulkanTest, unsqueeze) {
at::empty({1, 2, 2}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
float* data = t_in.data_ptr<float>();
auto numel = t_in.numel();
for (int i = 0; i < numel; i++) {
for (const auto i : c10::irange(numel)) {
// NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions)
data[i] = i;
}

View File

@ -1,4 +1,5 @@
#include <benchmark/benchmark.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/tensorexpr/ir_simplifier.h>
#include <torch/csrc/jit/tensorexpr/llvm_codegen.h>
#include <torch/csrc/jit/tensorexpr/loopnest.h>
@ -15,14 +16,14 @@ class ConcatBench : public benchmark::Fixture {
input_sizes_ = std::move(input_sizes);
concat_dim_ = concat_dim;
inputs_.resize(input_sizes_.size());
for (size_t i = 0; i < input_sizes_.size(); ++i) {
for (const auto i : c10::irange(input_sizes_.size())) {
inputs_[i] = torch::ones({input_sizes_[i][0], input_sizes_[i][1]});
}
output_size_.resize(input_sizes_.front().size());
for (size_t i = 0; i < output_size_.size(); ++i) {
for (const auto i : c10::irange(output_size_.size())) {
if (i == static_cast<size_t>(concat_dim_)) {
output_size_[i] = 0;
for (size_t j = 0; j < input_sizes_.size(); ++j) {
for (const auto j : c10::irange(input_sizes_.size())) {
output_size_[i] += input_sizes_[j][i];
}
} else {
@ -64,7 +65,7 @@ class ConcatBench : public benchmark::Fixture {
[&](const VarHandle& m, const VarHandle& n) {
int d = 0;
std::vector<int> cumulative_concat_dim_sizes(num_inputs);
for (size_t i = 0; i < num_inputs; ++i) {
for (const auto i : c10::irange(num_inputs)) {
cumulative_concat_dim_sizes[i] = d;
d += input_sizes_[i][concat_dim_];
}
@ -119,7 +120,7 @@ class ConcatBench : public benchmark::Fixture {
{input_sizes_[i][0], input_sizes_[i][1]},
kFloat));
std::vector<VarPtr> for_vars(num_inputs);
for (size_t d = 0; d < num_dims; ++d) {
for (const auto d : c10::irange(num_dims)) {
for_vars[d] =
alloc<Var>("i" + std::to_string(i) + "_" + std::to_string(d), kInt);
}

View File

@ -1,5 +1,6 @@
#include <benchmark/benchmark.h>
#include <c10/core/InferenceMode.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/codegen/fuser/interface.h>
#include <torch/torch.h>
@ -22,7 +23,8 @@ static void FusedOverhead(benchmark::State& state) {
auto z = torch::ones({1});
// Warmup.
for (int i = 0; i < 8; i++) {
for (const auto i : c10::irange(8)) {
(void)i; // Suppress unused variable warning
m.run_method("two_adds", x, y, z);
}
@ -43,7 +45,8 @@ static void UnfusedOverhead(benchmark::State& state) {
auto z = torch::ones({1});
// Warmup.
for (int i = 0; i < 8; i++) {
for (const auto i : c10::irange(8)) {
(void)i; // Suppress unused variable warning
m.run_method("two_adds", x, y, z);
}

View File

@ -1,4 +1,5 @@
#include <benchmark/benchmark.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/tensorexpr/analysis.h>
#include <torch/csrc/jit/tensorexpr/ir_simplifier.h>
#include <torch/csrc/jit/tensorexpr/llvm_codegen.h>
@ -53,7 +54,7 @@ BENCHMARK_DEFINE_F(ParallelAdd, Simple)(benchmark::State& state) {
float* c_ptr = C.data_ptr<float>();
std::vector<void*> args({c_ptr, a_ptr, b_ptr});
cg.value<int>(args);
for (int i = 0; i < M; i++) {
for (const auto i : c10::irange(M)) {
float diff = fabs(a_ptr[i] + b_ptr[i] - c_ptr[i]);
TORCH_CHECK(diff < 1e-5);
}

View File

@ -1,4 +1,5 @@
#include <benchmark/benchmark.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/tensorexpr/analysis.h>
#include <torch/csrc/jit/tensorexpr/ir_simplifier.h>
#include <torch/csrc/jit/tensorexpr/llvm_codegen.h>
@ -78,7 +79,7 @@ static void reduce1d_naive(at::Tensor& A, at::Tensor& B) {
int size = A.numel();
TORCH_CHECK(B.numel() == 1);
*pB = 0.;
for (int i = 0; i < size; i++) {
for (const auto i : c10::irange(size)) {
*pB += pA[i];
}
}
@ -101,18 +102,18 @@ static void reduce1d_native_rfactor(at::Tensor& A, at::Tensor& B) {
TORCH_CHECK(size % kChunkSize == 0);
*pB = 0.;
float temp[kChunkSize];
for (int j = 0; j < kChunkSize; j++) {
for (const auto j : c10::irange(kChunkSize)) {
temp[j] = 0;
}
int chunk_count = size / kChunkSize;
for (int i = 0; i < chunk_count; i++) {
for (int j = 0; j < kChunkSize; j++) {
for (const auto i : c10::irange(chunk_count)) {
for (const auto j : c10::irange(kChunkSize)) {
temp[j] += pA[i * kChunkSize + j];
}
}
for (int j = 0; j < kChunkSize; j++) {
for (const auto j : c10::irange(kChunkSize)) {
*pB += temp[j];
}
}
@ -163,7 +164,7 @@ static void reduce1d_native_vector(at::Tensor& A, at::Tensor& B) {
temp = _mm256_setzero_ps();
int tile_count = size / kChunkSize;
for (int i = 0; i < tile_count; i++) {
for (const auto i : c10::irange(tile_count)) {
__m256 data = _mm256_load_ps(pA + i * kChunkSize);
temp = _mm256_add_ps(temp, data);
}
@ -196,7 +197,7 @@ static void reduce1d_native_tiled(at::Tensor& A, at::Tensor& B) {
kChunkSize,
" ! = 0");
__m256 t[kTileSize];
for (int j = 0; j < kTileSize; j++) {
for (const auto j : c10::irange(kTileSize)) {
t[j] = _mm256_setzero_ps();
}
@ -211,7 +212,7 @@ static void reduce1d_native_tiled(at::Tensor& A, at::Tensor& B) {
}
float result = sum_f32x8(t[0]);
for (int j = 1; j < kTileSize; j++) {
for (const auto j : c10::irange(1, kTileSize)) {
result += sum_f32x8(t[j]);
}
*pB = result;
@ -540,16 +541,16 @@ BENCHMARK_DEFINE_F(Reduce2DRow, Hand)(benchmark::State& state) {
for (int m_outer = 0; m_outer < M; m_outer += Mb) {
float bregs[Mb][Nb] = {0.0f};
for (int n_outer = 0; n_outer < N; n_outer += Nb) {
for (int m_inner = 0; m_inner < Mb; m_inner++) {
for (int n_inner = 0; n_inner < Nb; n_inner++) {
for (const auto m_inner : c10::irange(Mb)) {
for (const auto n_inner : c10::irange(Nb)) {
bregs[m_inner][n_inner] +=
a[(m_outer + m_inner) * N + n_outer + n_inner];
}
}
}
for (int m_inner = 0; m_inner < Mb; m_inner++) {
for (const auto m_inner : c10::irange(Mb)) {
b[m_outer + m_inner] = 0.f;
for (int n_inner = 0; n_inner < Nb; n_inner++) {
for (const auto n_inner : c10::irange(Nb)) {
b[m_outer + m_inner] += bregs[m_inner][n_inner];
}
}

View File

@ -24,6 +24,7 @@
#include "caffe2/core/operator.h"
#include "caffe2/utils/string_utils.h"
#include "c10/util/string_utils.h"
#include <c10/util/irange.h>
using std::map;
using std::shared_ptr;
@ -55,12 +56,12 @@ void writeTextOutput(
int dims_size = tensor_proto.dims_size();
long long elem_dim_size =
dims_size > 1 ? tensor_proto.dims(1) : tensor_proto.dims(0);
for (int i = 2; i < dims_size; i++) {
for (const auto i : c10::irange(2, dims_size)) {
elem_dim_size *= tensor_proto.dims(i);
}
std::vector<std::string> lines;
std::string dims;
for (int i = 0; i < dims_size; i++) {
for (const auto i : c10::irange(dims_size)) {
int dim = tensor_proto.dims(i);
if (i > 0) {
dims += ", ";

View File

@ -2,6 +2,7 @@
#include <c10/core/DeviceType.h>
#include <c10/mobile/CPUCachingAllocator.h>
#include <c10/mobile/CPUProfilingAllocator.h>
#include <c10/util/irange.h>
// TODO: rename flags to C10
C10_DEFINE_bool(
@ -30,7 +31,7 @@ void memset_junk(void* data, size_t num) {
int32_t int64_count = num / sizeof(kJunkPattern64);
int32_t remaining_bytes = num % sizeof(kJunkPattern64);
int64_t* data_i64 = reinterpret_cast<int64_t*>(data);
for (int i = 0; i < int64_count; i++) {
for (const auto i : c10::irange(int64_count)) {
data_i64[i] = kJunkPattern64;
}
if (remaining_bytes > 0) {

View File

@ -5,6 +5,7 @@
#include <c10/core/WrapDimMinimal.h>
#include <c10/core/impl/LocalDispatchKeySet.h>
#include <c10/util/Optional.h>
#include <c10/util/irange.h>
C10_DEFINE_bool(
caffe2_keep_on_shrink,
@ -335,7 +336,7 @@ bool TensorImpl::compute_non_overlapping_and_dense() const {
}
SmallVector<int64_t, 5> perm;
perm.resize(dim());
for (int64_t i = 0; i < dim(); i++) {
for (const auto i : c10::irange(dim())) {
perm[i] = i;
}
// Sort by strides, leaving 0 and 1 sized dims at the end of the array
@ -349,7 +350,7 @@ bool TensorImpl::compute_non_overlapping_and_dense() const {
sizes_and_strides_.stride_at_unchecked(b);
});
auto require_stride = 1;
for (int64_t i = 0; i < dim(); i++) {
for (const auto i : c10::irange(dim())) {
const auto size_perm_i = sizes_and_strides_.size_at_unchecked(perm[i]);
if (size_perm_i < 2) {
return true;

View File

@ -19,6 +19,7 @@
#include <c10/util/Logging.h>
#include <c10/util/Optional.h>
#include <c10/util/accumulate.h>
#include <c10/util/irange.h>
#include <c10/util/python_stub.h>
// A global boolean variable to control whether we free memory when a Tensor
@ -68,7 +69,7 @@ inline std::vector<int64_t> ToVectorint64_t(ArrayRef<int> src) {
*/
inline int64_t size_from_dim_(int k, IntArrayRef dims) {
int64_t r = 1;
for (size_t i = k; i < dims.size(); ++i) {
for (const auto i : c10::irange(k, dims.size())) {
r *= dims[i];
}
return r;
@ -78,7 +79,7 @@ inline int64_t size_from_dim_(int k, IntArrayRef dims) {
inline int64_t size_to_dim_(int k, IntArrayRef dims) {
TORCH_CHECK((unsigned)k <= dims.size());
int64_t r = 1;
for (int i = 0; i < k; ++i) {
for (const auto i : c10::irange(k)) {
r *= dims[i];
}
return r;
@ -2163,7 +2164,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
auto old_numel = numel_;
sizes_and_strides_.resize(src.size());
int64_t new_numel = 1;
for (size_t i = 0; i < src.size(); ++i) {
for (const auto i : c10::irange(src.size())) {
new_numel *= src[i];
sizes_and_strides_.size_at_unchecked(i) = src[i];
}

View File

@ -2,6 +2,7 @@
#include <c10/core/impl/InlineDeviceGuard.h>
#include <c10/util/ArrayRef.h>
#include <c10/util/irange.h>
namespace c10 {
namespace impl {
@ -237,7 +238,7 @@ class InlineMultiStreamGuard {
static DeviceType getDeviceTypeOfStreams(ArrayRef<Stream> streams) {
TORCH_INTERNAL_ASSERT(!streams.empty());
DeviceType type = streams[0].device_type();
for (size_t idx = 1; idx < streams.size(); idx++) {
for (const auto idx : c10::irange(1, streams.size())) {
TORCH_CHECK_VALUE(
streams[idx].device_type() == type,
"Streams have a mix of device types: stream 0 is on ",

View File

@ -201,7 +201,7 @@ static SizesAndStrides makeBig(int offset = 0) {
static void checkSmall(const SizesAndStrides& sm, int offset = 0) {
std::vector<int64_t> sizes(3), strides(3);
for (int ii = 0; ii < 3; ++ii) {
for (const auto ii : c10::irange(3)) {
sizes[ii] = ii + 1 + offset;
strides[ii] = 2 * (ii + 1 + offset);
}
@ -210,7 +210,7 @@ static void checkSmall(const SizesAndStrides& sm, int offset = 0) {
static void checkBig(const SizesAndStrides& big, int offset = 0) {
std::vector<int64_t> sizes(8), strides(8);
for (int ii = 0; ii < 8; ++ii) {
for (const auto ii : c10::irange(8)) {
sizes[ii] = ii - 1 + offset;
strides[ii] = 2 * (ii - 1 + offset);
}

View File

@ -1,6 +1,7 @@
#include <gtest/gtest.h>
#include <c10/util/Bitset.h>
#include <c10/util/irange.h>
using c10::utils::bitset;
@ -37,7 +38,7 @@ TEST(BitsetTest, givenEmptyBitset_whenSettingBit_thenIsSet) {
TEST(BitsetTest, givenEmptyBitset_whenSettingBit_thenOthersStayUnset) {
bitset b;
b.set(6);
for (size_t i = 0; i < 6; ++i) {
for (const auto i : c10::irange(6)) {
EXPECT_FALSE(b.get(i));
}
for (size_t i = 7; i < bitset::NUM_BITS(); ++i) {
@ -56,10 +57,10 @@ TEST(BitsetTest, givenNonemptyBitset_whenSettingBit_thenOthersStayAtOldValue) {
bitset b;
b.set(6);
b.set(30);
for (size_t i = 0; i < 6; ++i) {
for (const auto i : c10::irange(6)) {
EXPECT_FALSE(b.get(i));
}
for (size_t i = 7; i < 30; ++i) {
for (const auto i : c10::irange(7, 30)) {
EXPECT_FALSE(b.get(i));
}
for (size_t i = 31; i < bitset::NUM_BITS(); ++i) {
@ -82,7 +83,7 @@ TEST(
b.set(6);
b.set(30);
b.unset(6);
for (size_t i = 0; i < 30; ++i) {
for (const auto i : c10::irange(30)) {
EXPECT_FALSE(b.get(i));
}
EXPECT_TRUE(b.get(30));
@ -100,7 +101,7 @@ struct IndexCallbackMock final {
void expect_was_called_for_indices(std::vector<size_t> expected_indices) {
EXPECT_EQ(expected_indices.size(), called_for_indices.size());
for (size_t i = 0; i < expected_indices.size(); ++i) {
for (const auto i : c10::irange(expected_indices.size())) {
EXPECT_EQ(expected_indices[i], called_for_indices[i]);
}
}

View File

@ -1,6 +1,7 @@
// clang-format off
#include <c10/util/BFloat16.h>
#include <c10/util/BFloat16-math.h>
#include <c10/util/irange.h>
// clang-format on
#include <gtest/gtest.h>
@ -24,7 +25,7 @@ float float_from_bytes(uint32_t sign, uint32_t exponent, uint32_t fraction) {
TEST(BFloat16Conversion, FloatToBFloat16AndBack) {
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,cppcoreguidelines-avoid-magic-numbers,modernize-avoid-c-arrays)
float in[100];
for (int i = 0; i < 100; ++i) {
for (const auto i : c10::irange(100)) {
// NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions,cppcoreguidelines-avoid-magic-numbers)
in[i] = i + 1.25;
}
@ -34,7 +35,7 @@ TEST(BFloat16Conversion, FloatToBFloat16AndBack) {
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,cppcoreguidelines-avoid-magic-numbers,modernize-avoid-c-arrays)
float out[100];
for (int i = 0; i < 100; ++i) {
for (const auto i : c10::irange(100)) {
bfloats[i].x = c10::detail::bits_from_f32(in[i]);
out[i] = c10::detail::f32_from_bits(bfloats[i].x);
@ -47,7 +48,7 @@ TEST(BFloat16Conversion, FloatToBFloat16AndBack) {
TEST(BFloat16Conversion, FloatToBFloat16RNEAndBack) {
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,cppcoreguidelines-avoid-magic-numbers,modernize-avoid-c-arrays)
float in[100];
for (int i = 0; i < 100; ++i) {
for (const auto i : c10::irange(100)) {
// NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions,cppcoreguidelines-avoid-magic-numbers)
in[i] = i + 1.25;
}
@ -57,7 +58,7 @@ TEST(BFloat16Conversion, FloatToBFloat16RNEAndBack) {
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,cppcoreguidelines-avoid-magic-numbers,modernize-avoid-c-arrays)
float out[100];
for (int i = 0; i < 100; ++i) {
for (const auto i : c10::irange(100)) {
bfloats[i].x = c10::detail::round_to_nearest_even(in[i]);
out[i] = c10::detail::f32_from_bits(bfloats[i].x);

View File

@ -4,6 +4,7 @@
#include <c10/macros/Macros.h>
#include <c10/util/Exception.h>
#include <c10/util/irange.h>
#include <c10/util/order_preserving_flat_hash_map.h>
#include <gtest/gtest.h>
@ -15,14 +16,15 @@ using dict_int_int =
ska_ordered::order_preserving_flat_hash_map<int64_t, int64_t>;
dict_int_int test_dict(dict_int_int& dict) {
for (int64_t i = 0; i < 100; ++i) {
for (const auto i : c10::irange(100)) {
dict[i] = i + 1;
}
int64_t i = 0;
int64_t entry_i = 0;
for (auto entry : dict) {
TORCH_INTERNAL_ASSERT(entry.first == i && entry.second == i + 1);
++i;
TORCH_INTERNAL_ASSERT(
entry.first == entry_i && entry.second == entry_i + 1);
++entry_i;
}
// erase a few entries by themselves
@ -33,29 +35,32 @@ dict_int_int test_dict(dict_int_int& dict) {
// erase via iterators
auto begin = dict.begin();
for (size_t i = 0; i < 20; ++i)
for (const auto i : c10::irange(20)) {
(void)i; // Suppress unused variable warning
begin++;
}
auto end = begin;
for (size_t i = 0; i < 20; ++i) {
for (const auto i : c10::irange(20)) {
(void)i; // Suppress unused variable warning
erase_set.insert(end->first);
end++;
}
dict.erase(begin, end);
std::vector<size_t> order;
for (size_t i = 0; i < 100; ++i) {
for (const auto i : c10::irange(100)) {
if (!erase_set.count(i)) {
order.push_back(i);
}
}
i = 0;
entry_i = 0;
for (auto entry : dict) {
TORCH_INTERNAL_ASSERT(order[i] == entry.first);
TORCH_INTERNAL_ASSERT(dict[order[i]] == entry.second);
TORCH_INTERNAL_ASSERT(entry.second == order[i] + 1);
i++;
TORCH_INTERNAL_ASSERT(order[entry_i] == entry.first);
TORCH_INTERNAL_ASSERT(dict[order[entry_i]] == entry.second);
TORCH_INTERNAL_ASSERT(entry.second == order[entry_i] + 1);
entry_i++;
}
TORCH_INTERNAL_ASSERT(dict.size() == order.size());
return dict;
@ -113,12 +118,12 @@ TEST(OrderedPreservingDictTest, DictCollisions) {
for (auto init_dict_size : {27, 34, 41}) {
bad_hash_dict dict;
for (int64_t i = 0; i < init_dict_size; ++i) {
for (const auto i : c10::irange(init_dict_size)) {
dict[i] = i + 1;
}
int64_t i = 0;
for (auto entry : dict) {
for (const auto& entry : dict) {
TORCH_INTERNAL_ASSERT(entry.first == i && entry.second == i + 1);
++i;
}
@ -131,20 +136,22 @@ TEST(OrderedPreservingDictTest, DictCollisions) {
// erase a few entries via iterator
auto begin = dict.begin();
for (size_t i = 0; i < 10; ++i) {
for (const auto j : c10::irange(10)) {
(void)j; // Suppress unused variable warning
begin++;
}
auto end = begin;
for (size_t i = 0; i < 7; ++i) {
for (const auto j : c10::irange(7)) {
(void)j; // Suppress unused variable warning
erase_set.insert(end->first);
end++;
}
dict.erase(begin, end);
std::vector<int64_t> order;
for (int64_t i = 0; i < init_dict_size; ++i) {
if (!erase_set.count(i)) {
order.push_back(i);
for (const auto j : c10::irange(init_dict_size)) {
if (!erase_set.count(j)) {
order.push_back(j);
}
}
@ -167,7 +174,7 @@ TEST(OrderedPreservingDictTest, test_range_insert) {
// check values
const int nb_values = 1000;
std::vector<std::pair<int, int>> values;
for (int i = 0; i < nb_values; i++) {
for (const auto i : c10::irange(nb_values)) {
// NOLINTNEXTLINE(modernize-use-emplace,performance-inefficient-vector-operation)
values.push_back(std::make_pair(i, i + 1));
}
@ -190,7 +197,7 @@ TEST(OrderedPreservingDictTest, test_range_erase_all) {
// insert x values, delete all
const std::size_t nb_values = 1000;
dict_int_int map;
for (size_t i = 0; i < nb_values; ++i) {
for (const auto i : c10::irange(nb_values)) {
map[i] = i + 1;
}
auto it = map.erase(map.begin(), map.end());
@ -206,7 +213,7 @@ TEST(OrderedPreservingDictTest, test_range_erase) {
const std::size_t nb_values = 1000;
HMap map;
for (size_t i = 0; i < nb_values; ++i) {
for (const auto i : c10::irange(nb_values)) {
map[c10::guts::to_string(i)] = i;
auto begin = map.begin();
for (size_t j = 0; j <= i; ++j, begin++) {
@ -305,7 +312,7 @@ TEST(OrderedPreservingDictTest, test_copy_constructor_and_operator) {
const std::size_t nb_values = 100;
HMap map;
for (size_t i = 0; i < nb_values; ++i) {
for (const auto i : c10::irange(nb_values)) {
map[c10::guts::to_string(i)] = c10::guts::to_string(i);
}

View File

@ -1,6 +1,7 @@
#include <c10/util/Backtrace.h>
#include <c10/util/Optional.h>
#include <c10/util/Type.h>
#include <c10/util/irange.h>
#include <functional>
#include <memory>
@ -281,8 +282,7 @@ std::string get_backtrace(
// Toggles to true after the first skipped python frame.
bool has_skipped_python_frames = false;
for (size_t frame_number = 0; frame_number < callstack.size();
++frame_number) {
for (const auto frame_number : c10::irange(callstack.size())) {
const auto frame = parse_frame_information(symbols[frame_number]);
if (skip_python_frames && frame && is_python_frame(*frame)) {

View File

@ -27,6 +27,7 @@
#include <c10/util/flat_hash_map.h>
#include <c10/core/ScalarType.h>
#include <c10/util/irange.h>
/*
* TypeIdentifier is a small type containing an id.
@ -170,7 +171,7 @@ struct TypeMetaData final {
template <typename T>
inline void _PlacementNew(void* ptr, size_t n) {
T* typed_ptr = static_cast<T*>(ptr);
for (size_t i = 0; i < n; ++i) {
for (const auto i : c10::irange(n)) {
new (typed_ptr + i) T;
}
}
@ -234,7 +235,7 @@ template <typename T>
inline void _Copy(const void* src, void* dst, size_t n) {
const T* typed_src = static_cast<const T*>(src);
T* typed_dst = static_cast<T*>(dst);
for (size_t i = 0; i < n; ++i) {
for (const auto i : c10::irange(n)) {
typed_dst[i] = typed_src[i];
}
}
@ -274,7 +275,7 @@ inline constexpr TypeMetaData::Copy* _PickCopy() {
template <typename T>
inline void _PlacementDelete(void* ptr, size_t n) {
T* typed_ptr = static_cast<T*>(ptr);
for (size_t i = 0; i < n; ++i) {
for (const auto i : c10::irange(n)) {
typed_ptr[i].~T();
}
}

View File

@ -3,6 +3,7 @@
#include <string>
#include <ATen/ATen.h>
#include <c10/macros/Macros.h>
#include <c10/util/irange.h>
#include <caffe2/core/context.h>
#include <caffe2/core/operator.h>
#include <caffe2/utils/math.h>
@ -130,7 +131,7 @@ private:
void assignListStartingAt(
size_t offset,
const std::vector<at::Tensor>& tensors) {
for (size_t i = 0; i < tensors.size(); i++) {
for (const auto i : c10::irange(tensors.size())) {
assignTo(Output(offset + i), tensors[i]);
}
}
@ -176,7 +177,7 @@ private:
std::stringstream descriptor;
descriptor << op;
std::vector<std::string> attrs;
for(size_t i = 0; i < operator_def.arg_size(); i++) {
for (const auto i : c10::irange(operator_def.arg_size())) {
auto & attr = operator_def.arg(i);
if(attr.name() == "operator" || attr.name() == "type" )
continue;
@ -223,7 +224,7 @@ private:
std::vector<int64_t> ints =
OperatorBase::GetRepeatedArgument<int64_t>(name, {});
std::array<bool, N> result;
for (size_t i = 0; i < N; ++i) {
for (const auto i : c10::irange(N)) {
result[i] = ints.at(i);
}
return result;

View File

@ -118,8 +118,8 @@ class Fp16FCAccOp final : public Operator<Context> {
if (!W_fbgemm->packed()) {
float* W_fp16_trans = new float[W_size];
fbgemm::Float16ToFloat_avx2(W_fbgemm->pmat(), W_fp16_trans, W_size);
for (int i = 0; i < N; i++) {
for (int j = 0; j < K; j++) {
for (const auto i : c10::irange(N)) {
for (const auto j : c10::irange(K)) {
W_fp16_[j * N + i] = W_fp16_trans[i * K + j];
}
}
@ -136,8 +136,8 @@ class Fp16FCAccOp final : public Operator<Context> {
const auto& W = Input(1);
W_data = W.template data<T_W>();
// Transpose W
for (int i = 0; i < N; i++) {
for (int j = 0; j < K; j++) {
for (const auto i : c10::irange(N)) {
for (const auto j : c10::irange(K)) {
W_fp16_[j * N + i] = W_data[i * K + j];
}
}
@ -352,7 +352,7 @@ class Fp16FCAccOp final : public Operator<Context> {
#ifdef LOG_LEVEL_FOR_FBFCPACKEDACC16_ACCURACY_LOG
float compute_L2_norm(float* A, int size) {
float square_sum = 0.0;
for (int i = 0; i < size; i++) {
for (const auto i : c10::irange(size)) {
square_sum += A[i] * A[i];
}
return std::sqrt(square_sum);
@ -360,7 +360,7 @@ class Fp16FCAccOp final : public Operator<Context> {
float compute_relative_error(float* A, float* A_ref, int size) {
float error = 0.0;
for (int i = 0; i < size; i++) {
for (const auto i : c10::irange(size)) {
error += (A[i] - A_ref[i]) * (A[i] - A_ref[i]);
}
error = std::sqrt(error);

View File

@ -22,7 +22,7 @@ void Int8DequantizeNNPI(
const float X_scale,
const int32_t X_offset) {
float X_scale_fp32 = 1.0f / X_scale;
for (auto i = 0; i < N; ++i) {
for (const auto i : c10::irange(N)) {
out[i] = (float)(static_cast<int32_t>(in[i]) - X_offset) / X_scale_fp32;
}
} // namespace

View File

@ -53,12 +53,12 @@ void Int8QuantizeNNPI(
std::vector<float> inv_scalev(N, inv_scale_fp16);
std::vector<float> offsetv(N, -offset_tmp);
fake_fp16::fma_fp16(N, in_fp16.data(), inv_scalev.data(), offsetv.data());
for (int i = 0; i < N; i++) {
for (const auto i : c10::irange(N)) {
offsetv[i] = round(offsetv[i]);
}
fbgemm::RoundToFloat16(
offsetv.data(), offsetv.data(), N, false /* no clamping */);
for (int i = 0; i < N; i++) {
for (const auto i : c10::irange(N)) {
float halfRes = offsetv[i];
if (std::isinf(halfRes)) {
if (halfRes > 0) {

View File

@ -29,7 +29,7 @@ void SwishFakeInt8NNPI(
int32_t quant_val = 0;
uint8_t result = 0;
for (auto i = 0; i < N; ++i) {
for (const auto i : c10::irange(N)) {
deq_val = (static_cast<uint8_t>(in[i]) - X_offset) / X_scale_fp32;
deq_swish = deq_val / (1 + exp(-deq_val));
quant_val = round(deq_swish / Y_scale + Y_offset);

View File

@ -129,7 +129,7 @@ class LayerNormFakeFp16Op final : public Operator<CPUContext> {
FLAGS_caffe2_fbgemm_fake_fp16_clamp,
false /*USE_ACC_FP16*/);
for (int i = 0; i < M; ++i) {
for (const auto i : c10::irange(M)) {
// fma_fp16(A, B, Out) -> Out = A * B + Out
std::vector<float> out(N);
std::memcpy(out.data(), bias_data.data(), sizeof(float) * N);
@ -169,7 +169,7 @@ class LayerNormFakeFp16Op final : public Operator<CPUContext> {
const int32_t qmin = std::numeric_limits<uint8_t>::min();
const int32_t qmax = std::numeric_limits<uint8_t>::max();
for (int i = 0; i < Nout; i++) {
for (const auto i : c10::irange(Nout)) {
float halfRes = offsetv[i];
halfRes = round(halfRes);
if (std::isinf(halfRes)) {

View File

@ -85,7 +85,7 @@ class SparseLengthsFused4BitRowwiseFakeFP16Op final : public Operator<Context> {
const auto scale_bias_offset = 2 * sizeof(at::Half);
const int64_t input_fused_block_size = input_block_size + scale_bias_offset;
int64_t current = 0;
for (int m = 0; m < output_size; ++m) {
for (const auto m : c10::irange(output_size)) {
if (!use_fp16_for_embedding_only) {
memset(rowTempSums[0].data(), 0, sizeof(float) * output_block_size);
memset(rowTempSums[1].data(), 0, sizeof(float) * output_block_size);
@ -135,7 +135,7 @@ class SparseLengthsFused4BitRowwiseFakeFP16Op final : public Operator<Context> {
// Unpack int4 elements
std::vector<float> input_rounded(output_block_size);
int k = 0;
for (int j = 0; j < input_block_size; j++) {
for (const auto j : c10::irange(input_block_size)) {
input_rounded[k++] =
input[input_fused_block_size * indices_data[current] + j] & 0x0f;
input_rounded[k++] =
@ -150,7 +150,7 @@ class SparseLengthsFused4BitRowwiseFakeFP16Op final : public Operator<Context> {
input_rounded.data(),
product_rounded.data());
for (int j = 0; j < output_block_size; ++j) {
for (const auto j : c10::irange(output_block_size)) {
product_rounded[j] += bias;
}
@ -190,7 +190,7 @@ class SparseLengthsFused4BitRowwiseFakeFP16Op final : public Operator<Context> {
}
if (!use_fp16_for_embedding_only) {
for (int j = 0; j < output_block_size; ++j) {
for (const auto j : c10::irange(output_block_size)) {
out[j] = rowTempSums[0][j] + rowTempSums[1][j];
}
fbgemm::RoundToFloat16(

View File

@ -84,7 +84,7 @@ class SparseLengthsFused8BitRowwiseFakeFP16Op final : public Operator<Context> {
const auto scale_bias_offset = 8 / sizeof(uint8_t);
const int64_t fused_block_size = block_size + scale_bias_offset;
int64_t current = 0;
for (int m = 0; m < output_size; ++m) {
for (const auto m : c10::irange(output_size)) {
memset(out, 0, sizeof(float) * block_size);
memset(rowTempSums[0].data(), 0, sizeof(float) * block_size);
memset(rowTempSums[1].data(), 0, sizeof(float) * block_size);
@ -152,7 +152,7 @@ class SparseLengthsFused8BitRowwiseFakeFP16Op final : public Operator<Context> {
// Fake fp16 rounding of input/ it is already ints
std::vector<float> input_rounded(block_size);
for (int j = 0; j < block_size; ++j) {
for (const auto j : c10::irange(block_size)) {
input_rounded[j] =
input[fused_block_size * indices_data[current] + j];
}
@ -164,7 +164,7 @@ class SparseLengthsFused8BitRowwiseFakeFP16Op final : public Operator<Context> {
TypedAxpy<float, float>(
block_size, scale, input_rounded.data(), product_rounded.data());
for (int j = 0; j < block_size; ++j) {
for (const auto j : c10::irange(block_size)) {
product_rounded[j] += bias;
}
@ -215,7 +215,7 @@ class SparseLengthsFused8BitRowwiseFakeFP16Op final : public Operator<Context> {
block_size,
FLAGS_caffe2_fbgemm_fake_fp16_clamp);
for (int j = 0; j < block_size; ++j) {
for (const auto j : c10::irange(block_size)) {
product_rounded[j] += bias;
}
// Fake fp16 rounding of w x scale x input + w x bias
@ -239,7 +239,7 @@ class SparseLengthsFused8BitRowwiseFakeFP16Op final : public Operator<Context> {
block_size,
FLAGS_caffe2_fbgemm_fake_fp16_clamp);
} else if (use_acc_fp32) {
for (int j = 0; j < block_size; ++j) {
for (const auto j : c10::irange(block_size)) {
float deqVal = fake_fp16::fmafp32_avx_emulation(
scale,
input_rounded[j],
@ -256,7 +256,7 @@ class SparseLengthsFused8BitRowwiseFakeFP16Op final : public Operator<Context> {
TypedAxpy<float, float>(block_size, scale, input_rounded.data(), out);
for (int j = 0; j < block_size; ++j) {
for (const auto j : c10::irange(block_size)) {
out[j] += bias;
}
}
@ -264,7 +264,7 @@ class SparseLengthsFused8BitRowwiseFakeFP16Op final : public Operator<Context> {
}
if (use_nnpi_fma || use_acc_fp32) {
for (int j = 0; j < block_size; ++j) {
for (const auto j : c10::irange(block_size)) {
out[j] = rowTempSums[0][j] + rowTempSums[1][j];
}
}

View File

@ -94,7 +94,7 @@ class SparseLengthsReductionFakeFp16Op final : public Operator<CPUContext> {
float* out = out_data;
int64_t current = 0;
for (int m = 0; m < output_size; ++m) {
for (const auto m : c10::irange(output_size)) {
memset(out, 0, sizeof(float) * block_size);
if (current + lengths[m] > index_size) {
return false;

View File

@ -39,7 +39,7 @@ class TanhInt8QuantizeNNPIOp final : public Operator<CPUContext> {
Y_scale = 1.0f / Y_scale;
// create table once
for (int i = 0; i < lutSize; i++) {
for (const auto i : c10::irange(lutSize)) {
short input = i + tanhLUTMinOffset;
float x = _cvtsh_ss(input);
float tanh_x = tanh(x);
@ -54,7 +54,7 @@ class TanhInt8QuantizeNNPIOp final : public Operator<CPUContext> {
}
const float* X_data = X.template data<float>();
for (int i = 0; i < X.numel(); i++) {
for (const auto i : c10::irange(X.numel())) {
short val = _cvtss_sh(X_data[i], 0);
unsigned short max16BitPositive = 0x7FFF;
unsigned short input16Bit = (*(unsigned short*)& val);

View File

@ -159,7 +159,7 @@ class SpatialBNFakeLoweredFp16Op : public Operator<CPUContext> {
const int stride = C * HxW;
const float* X_ptr = X;
float* Y_ptr = Y;
for (int i = 0; i < N; ++i) {
for (const auto i : c10::irange(N)) {
EigenArrayMap<float>(Y_ptr, HxW, C) =
ConstEigenArrayMap<float>(X_ptr, HxW, C).rowwise() -
mean_arr.transpose();
@ -356,9 +356,9 @@ class SpatialBNFakeFp16Op : public Operator<CPUContext> {
float* Y_ptr = Y;
// Do Y = X * scale + bias
for (int i = 0; i < N; i++) {
for (int j = 0; j < C; j++) {
for (int k = 0; k < HxW; k++) {
for (const auto i : c10::irange(N)) {
for (const auto j : c10::irange(C)) {
for (const auto k : c10::irange(HxW)) {
Y_ptr[HxW * j + k] = bias[j];
}

View File

@ -18,7 +18,7 @@ class SumFP16FP16AccOp : public Operator<Context> {
size_t N = input0.numel();
auto* output = Output(0, input0.sizes(), at::dtype<float>());
// Dimension checking
for (int i = 1; i < InputSize(); ++i) {
for (const auto i : c10::irange(1, InputSize())) {
if (output->sizes() != Input(i).sizes()) {
CAFFE_THROW(
"Check failed: output->sizes() == Input(i).sizes().",
@ -37,7 +37,7 @@ class SumFP16FP16AccOp : public Operator<Context> {
std::vector<float> t1(N);
std::vector<float> t2(N);
for (auto i = 0; i < InputSize(); i++) {
for (const auto i : c10::irange(InputSize())) {
fbgemm::RoundToFloat16(
Input(i).template data<float>(),
t1.data(),

View File

@ -85,13 +85,13 @@ class AllgatherOp final : public Operator<Context> {
// Verify tensors all have same size
size_t size = Input(1).numel();
for (auto i = 2; i < InputSize(); i++) {
for (const auto i : c10::irange(2, InputSize())) {
CAFFE_ENFORCE_EQ(Input(i).numel(), size);
}
// Verify tensors all have same type
TypeMeta meta = Input(1).dtype();
for (auto i = 2; i < InputSize(); i++) {
for (const auto i : c10::irange(2, InputSize())) {
CAFFE_ENFORCE(Input(i).dtype() == meta);
}
@ -113,7 +113,7 @@ class AllgatherOp final : public Operator<Context> {
params.inputs.resize(InputSize() - 1);
params.size = Input(1).numel();
params.meta = Input(1).dtype();
for (auto i = 0; i < params.inputs.size(); i++) {
for (const auto i : c10::irange(params.inputs.size())) {
params.inputs[i] = Input(i + 1).raw_data();
}
params.outputs.resize(OutputSize());

View File

@ -65,19 +65,19 @@ class AllreduceOp final : public Operator<Context> {
// Verify inputs == outputs
CAFFE_ENFORCE_EQ(init_.inputs.size(), init_.outputs.size());
for (auto i = 0U; i < init_.inputs.size(); i++) {
for (const auto i : c10::irange(0U, init_.inputs.size())) {
CAFFE_ENFORCE_EQ(init_.inputs[i], init_.outputs[i]);
}
// Verify tensors all have same size
auto size = Input(1).numel();
for (auto i = 2; i < InputSize(); i++) {
for (const auto i : c10::irange(2, InputSize())) {
CAFFE_ENFORCE_EQ(Input(i).numel(), size);
}
// Verify tensors all have same type
TypeMeta meta = Input(1).dtype();
for (auto i = 2; i < InputSize(); i++) {
for (const auto i : c10::irange(2, InputSize())) {
CAFFE_ENFORCE(Input(i).dtype() == meta);
}
@ -115,7 +115,7 @@ class AllreduceOp final : public Operator<Context> {
params.context = OperatorBase::Input<std::shared_ptr<::gloo::Context>>(0);
params.inputs.resize(InputSize() - 1);
params.outputs.resize(OutputSize());
for (auto i = 0U; i < params.inputs.size(); i++) {
for (const auto i : c10::irange(0U, params.inputs.size())) {
params.inputs[i] = Input(i + 1).raw_data();
params.outputs[i] = Output(i)->raw_mutable_data();
}

View File

@ -60,19 +60,19 @@ class BroadcastOp final : public Operator<Context> {
// Verify inputs == outputs
CAFFE_ENFORCE_EQ(init_.inputs.size(), init_.outputs.size());
for (auto i = 0; i < init_.inputs.size(); i++) {
for (const auto i : c10::irange(init_.inputs.size())) {
CAFFE_ENFORCE_EQ(init_.inputs[i], init_.outputs[i]);
}
// Verify tensors all have same size
size_t size = Input(1).numel();
for (auto i = 2; i < InputSize(); i++) {
for (const auto i : c10::irange(2, InputSize())) {
CAFFE_ENFORCE_EQ(Input(i).numel(), size);
}
// Verify tensors all have same size
TypeMeta meta = Input(1).dtype();
for (auto i = 2; i < InputSize(); i++) {
for (const auto i : c10::irange(2, InputSize())) {
CAFFE_ENFORCE(Input(i).dtype() == meta);
}
@ -94,7 +94,7 @@ class BroadcastOp final : public Operator<Context> {
params.context = OperatorBase::Input<std::shared_ptr<::gloo::Context>>(0);
params.inputs.resize(InputSize() - 1);
params.outputs.resize(OutputSize());
for (auto i = 0; i < params.inputs.size(); i++) {
for (const auto i : c10::irange(params.inputs.size())) {
params.inputs[i] = Input(i + 1).raw_data();
params.outputs[i] = Output(i)->raw_mutable_data();
}

View File

@ -75,7 +75,7 @@ class ReduceScatterOp final : public Operator<Context> {
// Verify inputs == outputs
CAFFE_ENFORCE_EQ(init_.inputs.size(), init_.outputs.size());
for (auto i = 0; i < init_.inputs.size(); i++) {
for (const auto i : c10::irange(init_.inputs.size())) {
CAFFE_ENFORCE_EQ(init_.inputs[i], init_.outputs[i]);
}
@ -107,7 +107,7 @@ class ReduceScatterOp final : public Operator<Context> {
params.context = OperatorBase::Input<std::shared_ptr<::gloo::Context>>(0);
params.inputs.resize(InputSize() - 2);
params.outputs.resize(OutputSize() - 1);
for (auto i = 0; i < params.inputs.size(); i++) {
for (const auto i : c10::irange(params.inputs.size())) {
params.inputs[i] = Input(i + 1).raw_data();
params.outputs[i] = Output(i)->raw_mutable_data();
}

View File

@ -1241,7 +1241,7 @@ inline cl_int getInfoHelper(Func f, cl_uint name, size_t<N>* param, long)
return err;
}
for(int i = 0; i < N; ++i) {
for (const auto i : c10::irange(N)) {
(*param)[i] = value[i];
}

View File

@ -9,6 +9,8 @@
#include "caffe2/core/blob.h"
#include "caffe2/core/blob_serializer_base.h"
#include "caffe2/core/tensor.h"
#include <c10/util/irange.h>
#include <c10/util/typeid.h>
#include "caffe2/core/types.h"
#include "caffe2/utils/simple_queue.h"
@ -201,7 +203,7 @@ void ExtendRepeatedField(
#else
// We unfortunately do still need to support old protobuf versions in some
// build configurations.
for (size_t i = 0; i < size; ++i) {
for (const auto i : c10::irange(size)) {
field->Add(0);
}
#endif
@ -236,7 +238,7 @@ inline void CopyToProtoWithCast(
context->template CopyToCPU<SrcType>(size, src, buffer.get());
context->FinishDeviceComputation();
field->Reserve(size);
for (size_t i = 0; i < size; ++i) {
for (const auto i : c10::irange(size)) {
field->Add(static_cast<DstType>(buffer[i]));
}
}
@ -267,7 +269,7 @@ inline void CopyFromProtoWithCast(
// CPUContext. Remove it if it is performance critical.
unique_ptr<DstType[]> buffer(new DstType[size]);
const SrcType* src = field.data();
for (size_t i = 0; i < size; ++i) {
for (const auto i : c10::irange(size)) {
buffer[i] = static_cast<DstType>(src[i]);
}
context->template CopyFromCPU<DstType>(size, buffer.get(), dst);

View File

@ -17,6 +17,7 @@
#if !defined(CAFFE2_IS_XPLAT_BUILD) && !defined(C10_MOBILE)
#include <c10/core/GeneratorImpl.h>
#include <c10/util/irange.h>
#include <ATen/core/DistributionsHelper.h>
#include <ATen/core/MT19937RNGEngine.h>
#else
@ -155,7 +156,7 @@ class TORCH_API CPUContext final : public BaseContext {
static_cast<const void*>(src),
static_cast<void*>(dst));
} else {
for (size_t i = 0; i < n; ++i) {
for (const auto i : c10::irange(n)) {
dst[i] = src[i];
}
}

View File

@ -4,6 +4,7 @@
#include <mutex>
#include <c10/util/Registry.h>
#include <c10/util/irange.h>
#include <c10/util/string_view.h>
#include "caffe2/core/blob_serialization.h"
#include "caffe2/proto/caffe2_pb.h"
@ -248,7 +249,8 @@ class TORCH_API DBReader {
*value = cursor_->value();
// In sharded mode, each read skips num_shards_ records
for (uint32_t s = 0; s < num_shards_; s++) {
for (const auto s : c10::irange(num_shards_)) {
(void)s; // Suppress unused variable
cursor_->Next();
if (!cursor_->Valid()) {
MoveToBeginning();
@ -292,7 +294,8 @@ class TORCH_API DBReader {
void MoveToBeginning() const {
cursor_->SeekToFirst();
for (uint32_t s = 0; s < shard_id_; s++) {
for (const auto s : c10::irange(shard_id_)) {
(void)s; // Suppress unused variable
cursor_->Next();
CAFFE_ENFORCE(
cursor_->Valid(), "Db has fewer rows than shard id: ", s, shard_id_);

View File

@ -12,6 +12,7 @@
#include <c10/util/C++17.h>
#include <c10/util/Metaprogramming.h>
#include "caffe2/core/export_caffe2_op_to_c10.h"
#include <c10/util/irange.h>
namespace caffe2 {
@ -136,7 +137,7 @@ class C10OperatorWrapper final : public Operator<Context> {
void popOutputs_() {
AT_ASSERT(stack_.size() == op_.schema().returns().size());
for (size_t i = 0; i < op_.schema().returns().size(); ++i) {
for (const auto i : c10::irange(op_.schema().returns().size())) {
OperatorBase::SetOutputTensor(i, Tensor(std::move(stack_[i]).toTensor()));
}
stack_.clear();
@ -146,7 +147,7 @@ class C10OperatorWrapper final : public Operator<Context> {
c10::List<at::Tensor> result;
result.reserve(InputSize());
// NOLINTNEXTLINE(clang-diagnostic-sign-compare)
for (size_t i = 0; i < InputSize(); ++i) {
for (const auto i : c10::irange(InputSize())) {
result.emplace_back(Input(i));
}
return result;
@ -156,7 +157,7 @@ class C10OperatorWrapper final : public Operator<Context> {
c10::List<at::Tensor> result;
result.reserve(OutputSize());
// NOLINTNEXTLINE(clang-diagnostic-sign-compare)
for (size_t i = 0; i < OutputSize(); ++i) {
for (const auto i : c10::irange(OutputSize())) {
result.emplace_back(OperatorBase::OutputTensorOrUndefined(i));
}
return result;

View File

@ -9,6 +9,7 @@
#include <ATen/core/op_registration/op_registration.h>
#include <torch/csrc/jit/frontend/function_schema_parser.h>
#include <c10/core/CompileTimeFunctionPointer.h>
#include <c10/util/irange.h>
#include <torch/library.h>
#include <vector>
@ -94,7 +95,7 @@ inline void _call_caffe2_op_from_c10(
// We should not unwrap the list if we expect tensor list in the schema.
torch::jit::push(*stack, outputs);
} else {
for (size_t i = 0; i < outputs.size(); ++i) {
for (const auto i : c10::irange(outputs.size())) {
torch::jit::push(*stack, outputs.extract(i));
}
}

View File

@ -1,6 +1,7 @@
#ifndef NOM_CONVERTERS_DOT_H
#define NOM_CONVERTERS_DOT_H
#include "c10/util/irange.h"
#include "nomnigraph/Graph/Algorithms.h"
#include "nomnigraph/Graph/Graph.h"
#include "nomnigraph/Support/Casting.h"
@ -42,7 +43,7 @@ class DotGenerator {
for (const auto& node : sg.getNodes()) {
generateNode(node, sg, output);
}
for (size_t i = 0; i < subgraphs.size(); ++i) {
for (const auto i : c10::irange(subgraphs.size())) {
const auto& subgraph = subgraphs[i];
output << "subgraph cluster" << i << " {\n";
output << "style=dotted;\n";

View File

@ -1,6 +1,7 @@
#ifndef NOM_TRANFORMATIONS_SUBGRAPH_MATCHER_H
#define NOM_TRANFORMATIONS_SUBGRAPH_MATCHER_H
#include "c10/util/irange.h"
#include "caffe2/core/common.h"
#include "nomnigraph/Graph/Graph.h"
@ -240,8 +241,7 @@ class MatchGraph : public Graph<MatchPredicate<GraphType>> {
// criteria in the given order.
int currentEdgeIdx = 0;
for (int criteriaIdx = 0; criteriaIdx < numChildrenCriteria;
criteriaIdx++) {
for (const auto criteriaIdx : c10::irange(numChildrenCriteria)) {
auto childrenCriteriaRef = invertGraphTraversal
? criteriaEdges[criteriaIdx]->tail()
: criteriaEdges[criteriaIdx]->head();

View File

@ -9,13 +9,14 @@
#include <unordered_map>
#include <vector>
#include "c10/util/Registry.h"
#include "caffe2/core/common.h"
#include "caffe2/core/logging.h"
#include "caffe2/core/types.h"
#include "caffe2/proto/caffe2_pb.h"
#include "caffe2/utils/filler.h"
#include "caffe2/utils/proto_utils.h"
#include <c10/util/irange.h>
#include <c10/util/Registry.h>
#include <caffe2/core/common.h>
#include <caffe2/core/logging.h>
#include <caffe2/core/types.h>
#include <caffe2/proto/caffe2_pb.h>
#include <caffe2/utils/filler.h>
#include <caffe2/utils/proto_utils.h>
namespace caffe2 {
@ -519,7 +520,7 @@ inline uint64_t nElemFromDim(const TensorShape& X, int dim = 0) {
CAFFE_ENFORCE_GE(dim, 0, "Invalid maximum index specified");
uint64_t nElem = 1;
for (int i = dim; i < X.dims_size(); ++i) {
for (const auto i : c10::irange(dim, X.dims_size())) {
nElem *= X.dims(i);
}
return nElem;
@ -531,7 +532,7 @@ inline uint64_t nElemBetweenDim(const TensorShape& X, int start, int stop) {
CAFFE_ENFORCE_LE(stop, X.dims_size(), "Invalid maximum index specified");
uint64_t nElem = 1;
for (int i = start; i < stop; ++i) {
for (const auto i : c10::irange(start, stop)) {
nElem *= X.dims(i);
}
return nElem;
@ -560,7 +561,7 @@ OpSchema::Cost PointwiseCostInference(
const TensorShape X = inputs[0];
uint64_t nElemX = nElemFromDim(X);
uint64_t nElemRead = 0;
for (size_t i = 0; i < inputs.size(); ++i) {
for (const auto i : c10::irange(inputs.size())) {
nElemRead += nElemFromDim(inputs[i]);
}

View File

@ -5,6 +5,7 @@
#include "caffe2/core/context.h"
#include "caffe2/core/tensor.h"
#include <c10/util/accumulate.h>
#include <c10/util/irange.h>
#include <c10/util/typeid.h>
#include <algorithm>
@ -218,7 +219,7 @@ class C10_EXPORT QTensor {
*/
inline int64_t size_from_dim(int k) const {
int64_t r = 1;
for (int i = k; i < dims_.size(); ++i) {
for (const auto i : c10::irange(k, dims_.size())) {
r *= dims_[i];
}
return r;
@ -230,7 +231,7 @@ class C10_EXPORT QTensor {
inline int64_t size_to_dim(int k) const {
CAFFE_ENFORCE(k < dims_.size());
int64_t r = 1;
for (int i = 0; i < k; ++i) {
for (const auto i : c10::irange(k)) {
r *= dims_[i];
}
return r;

View File

@ -46,7 +46,7 @@ void QTensorSerializer<Context>::Serialize(
blob_proto.set_type(kQTensorBlobQType);
QTensorProto& proto = *blob_proto.mutable_qtensor();
proto.set_name(name);
for (int i = 0; i < qtensor.ndim(); ++i) {
for (const auto i : c10::irange(qtensor.ndim())) {
proto.add_dims(qtensor.dim32(i));
}
proto.set_precision(qtensor.precision());

View File

@ -73,7 +73,7 @@ TORCH_API ExportedStatMap toMap(const ExportedStatList& stats);
* int main() {
* MyCaffeClass a("first");
* MyCaffeClass b("second");
* for (int i = 0; i < 10; ++i) {
* for (const auto i : c10::irange(10)) {
* a.run(10);
* b.run(5);
* }

View File

@ -6,6 +6,7 @@
#include "caffe2/utils/proto_utils.h"
#include <c10/macros/Macros.h>
#include <c10/util/irange.h>
#include <cmath>
#include <string>
@ -34,7 +35,7 @@ void assertTensorEquals(
float epsilon = 0.1f) {
CAFFE_ENFORCE(tensor.IsType<T>());
CAFFE_ENFORCE_EQ(tensor.numel(), data.size());
for (auto idx = 0; idx < tensor.numel(); ++idx) {
for (const auto idx : c10::irange(tensor.numel())) {
if (tensor.IsType<float>()) {
assertNear(tensor.data<T>()[idx], data[idx], epsilon);
} else {
@ -88,7 +89,7 @@ void randomFill(
std::mt19937 gen(42);
std::uniform_real_distribution<RealType> dis(
static_cast<RealType>(min), static_cast<RealType>(max));
for (size_t i = 0; i < size; i++) {
for (const auto i : c10::irange(size)) {
data[i] = dis(gen);
}
}

View File

@ -120,7 +120,7 @@ inline std::string GetUniqueName() {
std::stringstream ss;
ss << "_cuda_kernel_";
for (int i = 0; i < len; ++i) {
for (const auto i : c10::irange(len)) {
ss << alpha[rand() % (sizeof(alpha) - 1)];
}
return ss.str();

View File

@ -32,7 +32,7 @@ template <int N>
const std::vector<int64_t>& shape(Shape<N> vs) {
static thread_local std::vector<int64_t> cache;
cache.resize(vs.size());
for (auto i = 0; i < vs.size(); ++i) {
for (const auto i : c10::irange(vs.size())) {
cache[i] = vs[i];
}
return cache;
@ -86,7 +86,7 @@ void MaskMatrix_Inc<float, CPUContext>(
int /*N*/,
int seq_len,
float target) {
for (int i = 0; i < seq_len; ++i) {
for (const auto i : c10::irange(seq_len)) {
// assume that the mask_seq is smaller than size
// Although it seems that random access gets bad performance,
// we make sure that seq is in order;

View File

@ -35,7 +35,7 @@ template <int N>
const std::vector<int64_t>& shape(Shape<N> vs) {
static thread_local std::vector<int64_t> cache;
cache.resize(vs.size());
for (auto i = 0; i < vs.size(); ++i) {
for (const auto i : c10::irange(vs.size())) {
cache[i] = vs[i];
}
return cache;
@ -71,8 +71,8 @@ void trans_mat<float, CPUContext>(
int m,
int n,
CPUContext* /*context*/) {
for (int i = 0; i < m; ++i) {
for (int j = 0; j < n; ++j) {
for (const auto i : c10::irange(m)) {
for (const auto j : c10::irange(n)) {
t[j * m + i] = o[i * n + j];
}
}

View File

@ -67,7 +67,7 @@ class FunHashOp : public Operator<Context> {
int64_t n_segments = num_segments_;
if (num_segments_ == -1) {
for (int64_t i = 0; i < num_nz_ent; ++i) {
for (const auto i : c10::irange(num_nz_ent)) {
if (seg_data[i] > n_segments) {
n_segments = seg_data[i];
}
@ -86,14 +86,14 @@ class FunHashOp : public Operator<Context> {
const auto* val_data = val.template data<T>();
const auto* key_data = key.template data<int64_t>();
for (int64_t j = 0; j < num_nz_ent; ++j) {
for (const auto j : c10::irange(num_nz_ent)) {
int64_t cur_seg = seg_data[j];
int64_t cur_key = key_data[j];
T cur_val = val_data[j];
int64_t output_stride = cur_seg * num_outputs_;
for (int64_t i = 0; i < num_outputs_; ++i) {
for (const auto i : c10::irange(num_outputs_)) {
T sum = 0;
for (int64_t k = 0; k < num_alpha; ++k) {
for (const auto k : c10::irange(num_alpha)) {
uint64_t hash;
// The hash function takes as input four integers:
// 1. feature index
@ -186,14 +186,14 @@ class FunHashGradientOp : public Operator<Context> {
memset(grad_weight_data, 0, sizeof(T) * num_weight);
for (int64_t j = 0; j < num_nz_ent; ++j) {
for (const auto j : c10::irange(num_nz_ent)) {
int64_t cur_seg = seg_data[j];
int64_t cur_key = key_data[j];
T cur_val = val_data[j];
int64_t grad_out_stride = cur_seg * num_outputs_;
for (int64_t i = 0; i < num_outputs_; ++i) {
for (const auto i : c10::irange(num_outputs_)) {
T grad_out_scale = grad_out_data[grad_out_stride + i] * cur_val;
for (int64_t k = 0; k < num_alpha; ++k) {
for (const auto k : c10::irange(num_alpha)) {
uint64_t hash;
hash_data[0] = cur_key;
hash_data[1] = i;

View File

@ -66,7 +66,7 @@ class SparseFunHashOp : public Operator<Context> {
int64_t n_segments = num_segments_;
if (num_segments_ == -1) {
for (int64_t i = 0; i < num_nz_ent; ++i) {
for (const auto i : c10::irange(num_nz_ent)) {
if (seg_data[i] > n_segments) {
n_segments = seg_data[i];
}
@ -85,14 +85,14 @@ class SparseFunHashOp : public Operator<Context> {
const auto* val_data = val.template data<T>();
const auto* key_data = key.template data<int64_t>();
for (int64_t j = 0; j < num_nz_ent; ++j) {
for (const auto j : c10::irange(num_nz_ent)) {
int64_t cur_seg = seg_data[j];
int64_t cur_key = key_data[j];
T cur_val = val_data[j];
int64_t output_stride = cur_seg * num_outputs_;
for (int64_t i = 0; i < num_outputs_; ++i) {
for (const auto i : c10::irange(num_outputs_)) {
T sum = 0;
for (int64_t k = 0; k < num_alpha; ++k) {
for (const auto k : c10::irange(num_alpha)) {
// The hash function takes as input three integers:
// 1. feature index
// 2. output index
@ -190,14 +190,14 @@ class SparseFunHashGradientOp : public Operator<Context> {
const auto* key_data = key.template data<int64_t>();
int64_t w_ind = 0;
for (int64_t j = 0; j < num_nz_ent; ++j) {
for (const auto j : c10::irange(num_nz_ent)) {
int64_t cur_seg = seg_data[j];
int64_t cur_key = key_data[j];
T cur_val = val_data[j];
int64_t grad_out_stride = cur_seg * num_outputs_;
for (int64_t i = 0; i < num_outputs_; ++i) {
for (const auto i : c10::irange(num_outputs_)) {
T grad_out_scale = grad_out_data[grad_out_stride + i] * cur_val;
for (int64_t k = 0; k < num_alpha; ++k) {
for (const auto k : c10::irange(num_alpha)) {
hash_data[0] = cur_key;
hash_data[1] = i;
hash_data[2] = k;

View File

@ -111,7 +111,7 @@ class SparseMatrixReshapeOp : public Operator<Context> {
auto* new_col_data = new_col->template mutable_data<int64_t>();
auto* new_row_data = new_row->template mutable_data<int>();
for (int i = 0; i < nnz; ++i) {
for (const auto i : c10::irange(nnz)) {
int64_t offset = old_row_data[i] * old_stride_ + old_col_data[i];
new_row_data[i] = offset / new_stride_;
new_col_data[i] = offset % new_stride_;