Revert D31705359: use irange for loops 8

Test Plan: revert-hammer Differential Revision: D31705359 (17e5200441) Original commit changeset: c9ea2fbc0f9c fbshipit-source-id: 08fff2d12beca953ad30dd0baabf86e39ac84f14
2025-10-21 05:34:18 +08:00 · 2021-12-02 12:53:25 -08:00
parent 97750e03a4
commit f587267dc7
64 changed files with 327 additions and 353 deletions
--- a/caffe2/operators/reshape_op.h
+++ b/caffe2/operators/reshape_op.h
@ -97,7 +97,7 @@ class ReshapeOp : public Operator<Context> {
    }

    int unknown_idx = -1;
-    for (const auto i : c10::irange(actual_new_shape.size())) {
+    for (int i = 0; i < actual_new_shape.size(); ++i) {
      const auto dim = actual_new_shape[i];
      if (dim == -1) {
        CAFFE_ENFORCE(
@ -153,7 +153,7 @@ class ReshapeOp : public Operator<Context> {
    old_shape->Resize(input.sizes().size());
    T* old_shape_data = old_shape->template mutable_data<T>();
    std::vector<T> old_shape_vector(input.sizes().begin(), input.sizes().end());
-    for (const auto i : c10::irange(old_shape_vector.size())) {
+    for (int i = 0; i < old_shape_vector.size(); ++i) {
      old_shape_data[i] = old_shape_vector[i];
    }

--- a/caffe2/operators/reverse_packed_segs_op.h
+++ b/caffe2/operators/reverse_packed_segs_op.h
@ -62,7 +62,7 @@ class ReversePackedSegsOp final : public Operator<Context> {
    context_.FinishDeviceComputation();

    T* rev_data_ptr = output->template mutable_data<T>();
-    for (const auto i : c10::irange(batch_size)) {
+    for (int64_t i = 0; i < batch_size; i++) {
      const auto& seg_length = lengths_host[i];
      CAFFE_ENFORCE_LE(seg_length, max_length);
      int64_t j = 0;
--- a/caffe2/operators/rnn/recurrent_network_blob_fetcher_op.h
+++ b/caffe2/operators/rnn/recurrent_network_blob_fetcher_op.h
@ -32,7 +32,7 @@ class RecurrentNetworkBlobFetcherOp final : public Operator<Context> {
    std::vector<std::string> blob_names_vector = {};

    // NOLINTNEXTLINE(clang-diagnostic-sign-compare)
-    for (const auto i : c10::irange(stepWorkspaces.size())) {
+    for (int64_t i = 0; i < stepWorkspaces.size(); i++) {
      Workspace* currentStepWorkspace = stepWorkspaces[i].get();
      std::vector<std::string> blob_names = currentStepWorkspace->LocalBlobs();

--- a/caffe2/operators/rnn/recurrent_network_executor.h
+++ b/caffe2/operators/rnn/recurrent_network_executor.h
@ -38,7 +38,7 @@ class RecurrentNetworkExecutorBase {
        recurrent_input_map_(recurrent_input_map),
        timestep_blob_(timestep_blob) {
    const bool net_def_has_device_option = step_net_def_.has_device_option();
-    for (const auto i : c10::irange(step_net_def_.op_size())) {
+    for (int i = 0; i < step_net_def_.op_size(); i++) {
      if (net_def_has_device_option) {
        // In the case when net def specifies device option, final device option
        // will be equal to merge of operator and net def device options, with
@ -86,7 +86,7 @@ class RecurrentNetworkExecutorBase {
      for (auto& rnn_op : timestep_ops_template_) {
        rnn_op.has_timestep_blob = false;
        const OperatorDef& op = step_net_def_.op(rnn_op.order);
-        for (const auto i : c10::irange(op.input_size())) {
+        for (int i = 0; i < op.input_size(); i++) {
          if (op.input(i) == timestep_blob_) {
            rnn_op.has_timestep_blob = true;
            break;
@ -137,7 +137,7 @@ class RecurrentNetworkExecutorBase {
        if (rnn_op.has_timestep_blob) {
          OperatorDef op_copy = step_net_def_.op(rnn_op.order);

-          for (const auto i : c10::irange(op_copy.input_size())) {
+          for (int i = 0; i < op_copy.input_size(); i++) {
            if (op_copy.input(i) == timestep_blob_) {
              op_copy.set_input(i, this_timestep_blob);
            }
@ -283,7 +283,7 @@ class RecurrentNetworkExecutorBase {
      int opidx,
      std::vector<RNNNetOperator>& rnn_ops,
      std::unordered_set<int>* dep_ops) {
-    for (const auto i : c10::irange(rnn_ops.size())) {
+    for (int i = 0; i < rnn_ops.size(); i++) {
      if (i == opidx) {
        continue;
      }
@ -315,7 +315,7 @@ class RecurrentNetworkExecutorBase {
   * for each timestep.
   */
  void CalculateInternalDependencies() {
-    for (const auto i : c10::irange(step_net_def_.op_size())) {
+    for (int i = 0; i < step_net_def_.op_size(); i++) {
      timestep_ops_template_.push_back(RNNNetOperator(step_net_def_.op(i), i));
    }
    // Then see which outputs appear as inputs, and those are
--- a/caffe2/operators/rnn/recurrent_network_op.h
+++ b/caffe2/operators/rnn/recurrent_network_op.h
@ -103,7 +103,7 @@ void repeatCopy(
    T* dst,
    Context* context) {
  // NOLINTNEXTLINE(clang-diagnostic-sign-compare)
-  for (const auto i : c10::irange(repeat_n)) {
+  for (int i = 0; i < repeat_n; ++i) {
    context->template CopySameDevice<T>(n, src, dst + i * n);
  }
 }
@ -228,7 +228,7 @@ class RecurrentNetworkOp final : public Operator<Context> {
    CAFFE_ENFORCE_EQ(states.size(), inputs.size(), "states/inputs mismatch");
    std::vector<detail::RecurrentInput> ris;
    // NOLINTNEXTLINE(clang-diagnostic-sign-compare)
-    for (const auto i : c10::irange(states.size())) {
+    for (auto i = 0; i < states.size(); ++i) {
      // States need to be "global" (since they are shared between
      // forward and backward).
      sharedWs->CreateBlob(states[i]);
@ -254,7 +254,7 @@ class RecurrentNetworkOp final : public Operator<Context> {
        dst.size() == offset.size(), "alias_dst/alias_offset mismatch");
    std::vector<detail::OffsetAlias> aliases;
    // NOLINTNEXTLINE(clang-diagnostic-sign-compare)
-    for (const auto i : c10::irange(src.size())) {
+    for (auto i = 0; i < src.size(); ++i) {
      detail::OffsetAlias oc;
      oc.src = src[i];
      oc.dst = dst[i];
@ -343,7 +343,7 @@ class RecurrentNetworkOp final : public Operator<Context> {
      stepWorkspaces.resize(num_workspaces_on_fwd_only);
    }

-    for (const auto t : c10::irange(seqLen)) {
+    for (auto t = 0; t < seqLen; ++t) {
      auto& currentStepWorkspace =
          (has_backward_pass ? stepWorkspaces[t] :
              stepWorkspaces[t % num_workspaces_on_fwd_only]);
@ -472,7 +472,7 @@ class RecurrentNetworkGradientOp final : public Operator<Context> {
  }

  void renameOpInputOutput(std::string from_name, std::string to_name) {
-    for (const auto j : c10::irange(stepNetDef_.op_size())) {
+    for (int j = 0; j < stepNetDef_.op_size(); j++) {
      auto* op = stepNetDef_.mutable_op(j);
      for (int i = 0; i < op->input_size(); i++) {
        if (op->input(i) == from_name) {
@ -498,7 +498,7 @@ class RecurrentNetworkGradientOp final : public Operator<Context> {
        " != ",
        param_grads.size());
    // NOLINTNEXTLINE(clang-diagnostic-sign-compare)
-    for (const auto i : c10::irange(param.size())) {
+    for (int i = 0; i < param.size(); ++i) {
      detail::Param p;
      // Forward inputs come after [outputs_with_grads] gradient inputs
      p.param = operator_def.input(param[i] + gradInputs_.size());
@ -526,17 +526,17 @@ class RecurrentNetworkGradientOp final : public Operator<Context> {
        this->template GetRepeatedArgument<int32_t>("alias_offset");

    // NOLINTNEXTLINE(clang-diagnostic-sign-compare)
-    for (const auto i : c10::irange(recurrent.size())) {
+    for (auto i = 0; i < recurrent.size(); ++i) {
      detail::RecurrentGradient rg;
      rg.param = recurrent[i];
      rg.grad = remappedName(recurrent[i] + "_grad");

-      for (const auto j : c10::irange(alias_src.size())) {
+      for (int j = 0; j < alias_src.size(); ++j) {
        if (alias_src[j] != recurrent[i]) {
          continue;
        }
        int idx = -1;
-        for (const auto k : c10::irange(gradInputs_.size())) {
+        for (int k = 0; k < gradInputs_.size(); ++k) {
          if (gradInputs_[k] == j) {
            idx = k;
          }
@ -575,7 +575,7 @@ class RecurrentNetworkGradientOp final : public Operator<Context> {
        "",
        &links);
    // NOLINTNEXTLINE(clang-diagnostic-sign-compare)
-    for (const auto i : c10::irange(links.size())) {
+    for (int i = 0; i < links.size(); i++) {
      links[i] = remappedLink(links[i]);
    }
    return links;
@ -715,7 +715,7 @@ class RecurrentNetworkGradientOp final : public Operator<Context> {
    // This code assumes that there are several inputs
    // sequences. Actually it is not supported by the rest of the code,
    // and numSequences_ is a constant, equal to 1.
-    for (const auto i : c10::irange(numSequences_)) {
+    for (int i = 0; i < numSequences_; ++i) {
      // Offseting as the first gradInputs_.size() inputs of the op
      // are from GO. Then all I(0..N).
      const int gradientInputIndex = i + gradInputs_.size();
@ -790,7 +790,7 @@ class RecurrentNetworkGradientOp final : public Operator<Context> {

    CAFFE_ENFORCE_EQ(recurrentInputIds_.size(), recurrentGradients_.size());
    // NOLINTNEXTLINE(clang-diagnostic-sign-compare)
-    for (const auto i : c10::irange(recurrentInputIds_.size())) {
+    for (int i = 0; i < recurrentInputIds_.size(); ++i) {
      // See GetRecurrentNetworkGradient to understand offseting here
      // Outputs of the gradient are inputs of the forward pass.
      // So we need to offset on all inputs that go before recurrent
--- a/caffe2/operators/rowmul_op.h
+++ b/caffe2/operators/rowmul_op.h
@ -32,9 +32,9 @@ class RowMulOp : public Operator<Context> {
        "Length of w should be equal to the first dim of mat");

    auto block_size = mat.size_from_dim(1);
-    for (const auto i : c10::irange(w.numel())) {
+    for (int i = 0; i < w.numel(); i++) {
      size_t offset = i * block_size;
-      for (const auto j : c10::irange(block_size)) {
+      for (int j = 0; j < block_size; j++) {
        output_data[offset + j] = mat_data[offset + j] * w_data[i];
      }
    }
@ -60,10 +60,10 @@ class ReduceTailSumOp : public Operator<Context> {
    T* output_data = output->template mutable_data<T>();
    const T* mat_data = mat.template data<T>();

-    for (const auto i : c10::irange(N)) {
+    for (int i = 0; i < N; i++) {
      output_data[i] = 0;
      size_t offset = i * block_size;
-      for (const auto j : c10::irange(block_size)) {
+      for (int j = 0; j < block_size; j++) {
        output_data[i] += mat_data[offset + j];
      }
    }
--- a/caffe2/operators/scale_blobs_op.h
+++ b/caffe2/operators/scale_blobs_op.h
@ -20,7 +20,7 @@ class ScaleBlobsOp final : public Operator<Context> {
  bool DoRunWithType() {
    int batchSize = InputSize();

-    for (const auto i : c10::irange(batchSize)) {
+    for (int i = 0; i < batchSize; ++i) {
      const auto& X = Input(i);
      auto* Y = Output(i, X.sizes(), at::dtype<T>());
      math::Scale<float, T, Context>(
@ -34,7 +34,7 @@ class ScaleBlobsOp final : public Operator<Context> {
  }

  bool RunOnDevice() override {
-    for (const auto i : c10::irange(InputSize())) {
+    for (int i = 0; i < InputSize(); ++i) {
      auto& input = this->template Input<Tensor>(i, CPU);
      auto* output = this->template Output<Tensor>(i, CPU);
      output->ResizeLike(input);
--- a/caffe2/operators/segment_reduction_op.h
+++ b/caffe2/operators/segment_reduction_op.h
@ -2,7 +2,6 @@
 #define CAFFE2_OPERATORS_SEGMENT_REDUCTION_OP_H_

 #include "caffe2/core/export_caffe2_op_to_c10.h"
-#include <c10/util/irange.h>
 #include "caffe2/core/context.h"
 #include "caffe2/core/logging.h"
 #include "caffe2/core/operator.h"
@ -336,7 +335,7 @@ class AbstractReduceFrontOrBackOp : public Operator<Context> {
    const int num_blocks = block_size > 0 ? data.numel() / block_size : 0;

    Reducer r(ctx, out, &context_);
-    for (const auto i : c10::irange(num_blocks)) {
+    for (int64_t i = 0; i < num_blocks; ++i) {
      r.template process<FixedSize>(
          ctx, inputAccessor_.getBlockPtr(block_size, i), i, &context_);
    }
@ -407,7 +406,7 @@ class AbstractReduceFrontOrBackGradientOp : public Operator<Context> {
    T* out = data_grads->template mutable_data<T>();

    ReducerGradient r(ctx, r_grad, &context_);
-    for (const auto i : c10::irange(block_num)) {
+    for (int64_t i = 0; i < block_num; ++i) {
      r.template fillGrad<FixedSize>(
          ctx,
          out + block_size * i,
@ -1071,7 +1070,7 @@ class AbstractUnsortedSegmentOp : public Operator<Context> {
      K = num_segments_;
    } else {
      K = 0;
-      for (const auto i : c10::irange(N)) {
+      for (int64_t i = 0; i < N; ++i) {
        K = std::max(K, s_ids[i] + 1);
      }
    }
@ -1087,11 +1086,11 @@ class AbstractUnsortedSegmentOp : public Operator<Context> {

    reducers_.clear();
    reducers_.reserve(K);
-    for (const auto i : c10::irange(K)) {
+    for (int64_t i = 0; i < K; ++i) {
      reducers_.emplace_back(ctx, out + out_block_size * i, &context_);
    }

-    for (const auto i : c10::irange(N)) {
+    for (int64_t i = 0; i < N; ++i) {
      auto s_id = s_ids[i];
      CAFFE_ENFORCE(
          0 <= s_id && s_id < K,
@ -1115,7 +1114,7 @@ class AbstractUnsortedSegmentOp : public Operator<Context> {
          ctx, inputAccessor_.getBlockPtr(in_block_size, idx), i, &context_);
    }

-    for (const auto i : c10::irange(K)) {
+    for (int64_t i = 0; i < K; ++i) {
      reducers_[i].template finish<FixedSize>(ctx, &context_);
    }
    // call reducers destructors (if there is any)
@ -1189,7 +1188,7 @@ class AbstractUnsortedSegmentGradientOp : public Operator<Context> {

    if (ReducerGradient::computeLength()) {
      segment_length_.resize(K, 0);
-      for (const auto i : c10::irange(N)) {
+      for (int i = 0; i < N; ++i) {
        auto s_id = s_ids[i];
        CAFFE_ENFORCE(
            0 <= s_id && s_id < K,
@ -1207,7 +1206,7 @@ class AbstractUnsortedSegmentGradientOp : public Operator<Context> {
      reducers_.emplace_back(ctx, s_grads + s_block_size * i, &context_);
    }

-    for (const auto i : c10::irange(N)) {
+    for (int64_t i = 0; i < N; ++i) {
      auto s_id = s_ids[i];
      if (ReducerGradient::computeLength()) {
        reducers_[s_id].template fillGrad<FixedSize>(
@ -1463,7 +1462,7 @@ class AbstractLengthsOp : public Operator<Context> {
    TData* out = output->template mutable_data<TData>();

    int64_t dataIndex = 0;
-    for (const auto rangeIndex : c10::irange(outputSize)) {
+    for (int64_t rangeIndex = 0; rangeIndex < outputSize; ++rangeIndex) {
      Reducer reducer(ctx, out + out_block_size * rangeIndex, &context_);
      for (int64_t start = dataIndex; dataIndex < start + lengths[rangeIndex];
           ++dataIndex) {
@ -1552,7 +1551,7 @@ class AbstractLengthsGradientOp : public Operator<Context> {
    CAFFE_ENFORCE(segmentGradsInput.dim() > 0);
    CAFFE_ENFORCE(numSegments == segmentGradsInput.size(0));
    const TLengths* lengths = lengthsInput.template data<TLengths>();
-    for (const auto i : c10::irange(numSegments)) {
+    for (int64_t i = 0; i < numSegments; ++i) {
      reducedDataSize += lengths[i];
    }

@ -1581,7 +1580,7 @@ class AbstractLengthsGradientOp : public Operator<Context> {
    T* dataGrads = dataGradsOutput->template mutable_data<T>();

    int64_t dataIndex = 0;
-    for (const auto rangeIndex : c10::irange(numSegments)) {
+    for (int64_t rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
      ReducerGradient reducer(
          ctx, segmentGrads + segmentBlockSize * rangeIndex, &context_);
      for (int64_t start = dataIndex; dataIndex < start + lengths[rangeIndex];
@ -1691,7 +1690,7 @@ class AbstractLengthsWithMainInputGradientOp : public Operator<Context> {

    const Tembedding* data = dataInput.template data<Tembedding>();
    int64_t dataIndex = 0;
-    for (const auto rangeIndex : c10::irange(numSegments)) {
+    for (int64_t rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
      ReducerGradient reducer(
          ctx, segmentGrads + segmentBlockSize * rangeIndex, &context_);
      for (int64_t start = dataIndex; dataIndex < start + lengths[rangeIndex];
@ -1789,7 +1788,7 @@ class AbstractLengthsWithMainInputAndForwardOutputGradientOp
    const T* data = dataInput.template data<T>();

    int64_t dataIndex = 0;
-    for (const auto rangeIndex : c10::irange(numSegments)) {
+    for (int64_t rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
      ReducerGradient reducer(
          ctx, segmentGrads + segmentBlockSize * rangeIndex, &context_);
      for (int64_t start = dataIndex; dataIndex < start + lengths[rangeIndex];
--- a/caffe2/operators/self_binning_histogram_op.h
+++ b/caffe2/operators/self_binning_histogram_op.h
@ -59,12 +59,12 @@ class SelfBinningHistogramOp final : public Operator<Context> {
    T max = 0;
    T min = 0;
    int64_t total_count = 0;
-    for (const auto input_idx : c10::irange(InputSize())) {
+    for (int input_idx = 0; input_idx < InputSize(); input_idx++) {
      const auto& x = Input(input_idx);
      const int64_t N = x.numel();
      total_count += N;
      const auto* x_data = x.template data<T>();
-      for (const auto data_idx : c10::irange(N)) {
+      for (int64_t data_idx = 0; data_idx < N; data_idx++) {
        const T val = this->abs_ ? abs(x_data[data_idx]) :  x_data[data_idx];
        if (!first_seen) {
          max = val;
@ -91,7 +91,7 @@ class SelfBinningHistogramOp final : public Operator<Context> {
      scaled_max = min + (max - min) * RANGE_SCALING;
      T scaled_range = (scaled_max - min);
      // Avoid underflow by calculating advancement through multiplication.
-      for (const auto i : c10::irange(num_edges_)) {
+      for (int i = 0; i < num_edges_; i++) {
        T advancement_ratio = T(i) / num_bins_;
        histogram_values_data[i] = min + advancement_ratio * scaled_range;
      }
@ -112,7 +112,7 @@ class SelfBinningHistogramOp final : public Operator<Context> {
      T log_multiplier_numerator =log(scaled_max) - log(min);
      // Avoid underflow by:
      // - Calculating each advancement separately for each i.
-      for (const auto i : c10::irange(num_edges_)) {
+      for (int i = 0; i < num_edges_; i++) {
        T advancement_ratio = T(i)/num_bins_;
        histogram_values_data[i] = min * exp(log_multiplier_numerator * advancement_ratio);
      }
@ -127,11 +127,11 @@ class SelfBinningHistogramOp final : public Operator<Context> {
      histogram_counts_data[0] = total_count;
    }
    else {
-      for (const auto input_idx : c10::irange(InputSize())) {
+      for (int input_idx = 0; input_idx < InputSize(); input_idx++) {
        const auto& x = Input(input_idx);
        const int64_t N = x.numel();
        const auto* x_data = x.template data<T>();
-        for (const auto data_idx : c10::irange(N)) {
+        for (int64_t data_idx = 0; data_idx < N; data_idx++) {
          const T val = this->abs_ ? abs(x_data[data_idx]) :  x_data[data_idx];
          const auto bisection_it = std::upper_bound(
              histogram_values_data,
@ -163,7 +163,7 @@ class SelfBinningHistogramOp final : public Operator<Context> {

  void CheckInputs() {
    const auto& input_zero = Input(0);
-    for (const auto i : c10::irange(1, InputSize())) {
+    for (int i = 1; i < InputSize(); i++) {
      CAFFE_ENFORCE_EQ(
          Input(i).dtype(),
          input_zero.dtype(),
--- a/caffe2/operators/shape_op.h
+++ b/caffe2/operators/shape_op.h
@ -34,7 +34,7 @@ class ShapeOp : public Operator<Context> {
    auto* output = Output(0, {numAxes}, at::dtype<int64_t>());
    auto src = reinterpret_cast<const char*>(data.sizes().data());
    auto out = reinterpret_cast<char*>(output->template mutable_data<int64_t>());
-    for (const auto i : c10::irange(numAxes)) {
+    for (int i = 0; i < numAxes; i++) {
      auto axis = axes_[i];
      CAFFE_ENFORCE_LT(axis, numDims, "Axis out of range");
      CAFFE_ENFORCE_GE(axis, 0, "Each axis should be non-negative");
--- a/caffe2/operators/sinusoid_position_encoding_op.h
+++ b/caffe2/operators/sinusoid_position_encoding_op.h
@ -51,7 +51,7 @@ class SinusoidPositionEncodingOp : public Operator<Context> {
    float max_alpha_pow =
        ((float)embedding_size_ - 1.0f) / (float)embedding_size_;

-    for (const auto i : c10::irange(M)) {
+    for (int i = 0; i < M; ++i) {
      float pos = (float)idxs[i * K];

      // Compute the embedding for position i, example 0 first
@ -72,7 +72,7 @@ class SinusoidPositionEncodingOp : public Operator<Context> {
      row_array = amplitude_ * row_array.sin().eval();

      // Copy the embedding to position i in the other examples
-      for (const auto j : c10::irange(1, K)) {
+      for (int j = 1; j < K; ++j) {
        int base = i * K * embedding_size_;
        std::copy(
            &out[base],
--- a/caffe2/operators/slice_op.h
+++ b/caffe2/operators/slice_op.h
@ -30,7 +30,7 @@ bool SliceImpl(
  std::vector<SIndex> ends_idx(data.dim());
  std::vector<SIndex> dst_sizes(data.dim());

-  for (const auto i : c10::irange(data.dim())) {
+  for (int i = 0; i < data.dim(); ++i) {
    if (i >= starts.numel()) {
      starts_idx[i] = 0;
      ends_idx[i] = data.size(i);
@ -78,7 +78,7 @@ bool SliceImpl(
  }
  // for now only supports slicing in 1 dimension
  int dim = -1;
-  for (const auto i : c10::irange(data.dim())) {
+  for (int i = 0; i < data.dim(); ++i) {
    if (starts_idx[i] > 0 || ends_idx[i] < data.size(i)) {
      CAFFE_ENFORCE_EQ(
          dim, -1, "Currently only possible to slice in 1 dimension.");
@ -131,7 +131,7 @@ bool SliceImpl(

    char* src_offset_bytes = src_bytes + itemsize * src_offset;
    char* dst_offset_bytes = dst_bytes;
-    for (const auto i : c10::irange(num_blocks)) {
+    for (size_t i = 0; i < num_blocks; ++i) {
      char* local_src_offset_bytes =
          src_offset_bytes + i * src_block_size_bytes;
      char* local_dst_offset_bytes =
@ -177,7 +177,7 @@ bool SliceImpl(
      return true;
    }

-    for (const auto i : c10::irange(num_blocks)) {
+    for (size_t i = 0; i < num_blocks; ++i) {
      char* local_src_offset_bytes =
          src_offset_bytes + i * src_block_size_bytes;
      char* local_dst_offset_bytes =
--- a/caffe2/operators/space_batch_op.h
+++ b/caffe2/operators/space_batch_op.h
@ -29,14 +29,14 @@ void spaceToBatch(
  const int input_height = input.dim32(2);
  const int input_width = input.dim32(3);

-  for (const auto out_b : c10::irange(output_batch)) {
+  for (int out_b = 0; out_b < output_batch; ++out_b) {
    const int in_b = out_b % input_batch;
    const int offset_w = (out_b / input_batch) % block_size;
    const int offset_h = (out_b / input_batch) / block_size;
-    for (const auto d : c10::irange(input_depth)) {
-      for (const auto out_h : c10::irange(output_height)) {
+    for (int d = 0; d < input_depth; ++d) {
+      for (int out_h = 0; out_h < output_height; ++out_h) {
        const int in_h = out_h * block_size + offset_h - pad_t;
-        for (const auto out_w : c10::irange(output_width)) {
+        for (int out_w = 0; out_w < output_width; ++out_w) {
          const int in_w = out_w * block_size + offset_w - pad_l;
          const auto output_offset =
              ((out_b * output_depth + d) * output_height + out_h) *
@ -80,14 +80,14 @@ void batchToSpace(
  const int input_width = input.dim32(3);

  CAFFE_ENFORCE(input_depth == output_depth);
-  for (const auto in_b : c10::irange(input_batch)) {
+  for (int in_b = 0; in_b < input_batch; ++in_b) {
    const int out_b = in_b % output_batch;
    const int offset_w = (in_b / output_batch) % block_size;
    const int offset_h = (in_b / output_batch) / block_size;
-    for (const auto d : c10::irange(input_depth)) {
-      for (const auto in_h : c10::irange(input_height)) {
+    for (int d = 0; d < input_depth; ++d) {
+      for (int in_h = 0; in_h < input_height; ++in_h) {
        const int out_h = in_h * block_size + offset_h - pad_t;
-        for (const auto in_w : c10::irange(input_width)) {
+        for (int in_w = 0; in_w < input_width; ++in_w) {
          const int out_w = in_w * block_size + offset_w - pad_l;
          if (out_h >= 0 && out_w >= 0 && out_h < output_height &&
              out_w < output_width) {
--- a/caffe2/operators/sparse_to_dense_mask_op.h
+++ b/caffe2/operators/sparse_to_dense_mask_op.h
@ -6,7 +6,6 @@
 #include <vector>
 #include "caffe2/core/context.h"
 #include "caffe2/core/export_caffe2_op_to_c10.h"
-#include <c10/util/irange.h>
 #include "caffe2/core/operator.h"
 #include "caffe2/core/tensor.h"
 #include "caffe2/utils/math.h"
@ -30,7 +29,7 @@ class SparseToDenseMaskBase : public Operator<Context> {
    auto biggest = *std::max_element(mask.begin(), mask.end());
    dense_.assign(std::min(kMaxDenseSize, biggest + 1), -1);
    // NOLINTNEXTLINE(clang-diagnostic-sign-compare)
-    for (const auto i : c10::irange(mask.size())) {
+    for (int i = 0; i < mask.size(); i++) {
      int64_t id = mask[i];
      CAFFE_ENFORCE_GE(id, 0, "Only positive IDs are allowed.");
      if (id >= kMaxDenseSize) {
@ -156,7 +155,7 @@ class SparseToDenseMaskOp : public SparseToDenseMaskBase<Context> {
    }

    int64_t offset = 0;
-    for (const auto r : c10::irange(rows)) {
+    for (int r = 0; r < rows; r++) {
      bool skippedSparseIndex = false;
      for (int c = 0; c < lengths_vec[r]; c++) {
        const auto sparse_index = sparse_indices_vec[offset + c];
@ -273,7 +272,7 @@ class SparseToDenseMaskGradientOp : public SparseToDenseMaskBase<Context> {
    // SparseToDenseMask is not injective; gradient_used records
    // if the gradient is used for other input value from the same row
    vector<bool> gradient_used(cols, false);
-    for (const auto r : c10::irange(rows)) {
+    for (int r = 0; r < rows; r++) {
      std::fill(gradient_used.begin(), gradient_used.end(), false);
      for (int c = lengths_vec[r] - 1; c >= 0; c--) {
        int idx = this->getFeatureIdx(sparse_indices_vec[offset + c]);
--- a/caffe2/operators/sparse_to_dense_op.h
+++ b/caffe2/operators/sparse_to_dense_op.h
@ -89,7 +89,7 @@ class SparseToDenseOp final : public Operator<Context> {
    const auto block_nitems = sparse_values.size_from_dim(1);
    const TData* sparse_values_vec = sparse_values.template data<TData>();

-    for (const auto i : c10::irange(sparse_indices_len)) {
+    for (int32_t i = 0; i < sparse_indices_len; i++) {
      const TInd idx = sparse_indices_vec[i];
      CAFFE_ENFORCE_GE(idx, 0);
      CAFFE_ENFORCE_LT(idx, output_first_dim);
--- a/caffe2/operators/square_root_divide_op.h
+++ b/caffe2/operators/square_root_divide_op.h
@ -41,7 +41,7 @@ class SquareRootDivideOp final : public Operator<Context> {
    auto* scalePtr = scale.template data<TScale>();
    auto* dataPtr = data.template data<TData>();
    auto* yPtr = Y->template mutable_data<TData>();
-    for (const auto i : c10::irange(0U, batchSize)) {
+    for (auto i = 0U; i < batchSize; ++i) {
      auto scale = scalePtr[i];
      CAFFE_ENFORCE(scale >= 0, scale, " < 0");
      auto multiplier = scale == 0 ? 1.0 : 1 / std::sqrt(scale);
--- a/caffe2/operators/string_ops.h
+++ b/caffe2/operators/string_ops.h
@ -20,7 +20,7 @@ struct ForEach {

  template <typename In, typename Out, typename Context>
  bool operator()(int n, const In* in, Out* out, Context* /*c*/) {
-    for (const auto i : c10::irange(n)) {
+    for (int i = 0; i < n; ++i) {
      out[i] = functor(in[i]);
    }
    return true;
--- a/caffe2/operators/tensor_protos_db_input.h
+++ b/caffe2/operators/tensor_protos_db_input.h
@ -51,7 +51,7 @@ bool TensorProtosDBInput<Context>::Prefetch() {
    TensorProtos protos;
    CAFFE_ENFORCE(protos.ParseFromString(value_));
    CAFFE_ENFORCE(protos.protos_size() == OutputSize());
-    for (const auto i : c10::irange(protos.protos_size())) {
+    for (int i = 0; i < protos.protos_size(); ++i) {
      if (protos.protos(i).has_device_detail()) {
        protos.mutable_protos(i)->clear_device_detail();
      }
@ -62,14 +62,14 @@ bool TensorProtosDBInput<Context>::Prefetch() {
      //     CPU));
    }
  } else {
-    for (const auto item_id : c10::irange(batch_size_)) {
+    for (int item_id = 0; item_id < batch_size_; ++item_id) {
      reader.Read(&key_, &value_);
      TensorProtos protos;
      CAFFE_ENFORCE(protos.ParseFromString(value_));
      CAFFE_ENFORCE(protos.protos_size() == OutputSize());
      // Note: shape_inferred_ is ignored, we'll always get dimensions from
      // proto
-      for (const auto i : c10::irange(protos.protos_size())) {
+      for (int i = 0; i < protos.protos_size(); ++i) {
        vector<int64_t> dims(
            protos.protos(i).dims().begin(), protos.protos(i).dims().end());
        dims.insert(dims.begin(), batch_size_);
@ -94,7 +94,7 @@ bool TensorProtosDBInput<Context>::Prefetch() {

 template <class Context>
 bool TensorProtosDBInput<Context>::CopyPrefetched() {
-  for (const auto i : c10::irange(OutputSize())) {
+  for (int i = 0; i < OutputSize(); ++i) {
    OperatorBase::template Output<Tensor>(i, Context::GetDeviceType())
        ->CopyFrom(
            prefetched_blobs_[i].template Get<TensorCPU>(), /* async */ true);
--- a/caffe2/operators/tile_op.h
+++ b/caffe2/operators/tile_op.h
@ -113,12 +113,12 @@ class TileOp final : public Operator<Context> {
  bool DoTile(const int outer_size, const int inner_size, const T* X, T* Y) {
    if (inner_size == 1) {
      EigenArrayMap<T> Y_arr(Y, tiles_, outer_size);
-      for (const auto i : c10::irange(outer_size)) {
+      for (int i = 0; i < outer_size; ++i) {
        Y_arr.col(i) = X[i];
      }
    } else {
      ConstEigenArrayMap<T> X_arr(X, inner_size, outer_size);
-      for (const auto i : c10::irange(outer_size)) {
+      for (int i = 0; i < outer_size; ++i) {
        EigenArrayMap<T>(Y + i * tiles_ * inner_size, inner_size, tiles_)
            .colwise() = X_arr.col(i);
      }
@ -245,10 +245,10 @@ class TileGradientOp final : public Operator<Context> {
          dX,
          inner_size,
          &context_);
-      for (const auto i : c10::irange(outer_size)) {
+      for (int i = 0; i < outer_size; ++i) {
        const T* dY_ptr = dY + i * tiles_ * inner_size;
        T* dX_ptr = dX + i * inner_size;
-        for (const auto j : c10::irange(1, tiles_)) {
+        for (int j = 1; j < tiles_; ++j) {
          math::Add<T, Context>(
              inner_size, dX_ptr, dY_ptr + j * inner_size, dX_ptr, &context_);
        }
--- a/caffe2/operators/transpose_op.h
+++ b/caffe2/operators/transpose_op.h
@ -49,7 +49,7 @@ class TransposeOp : public Operator<Context> {
    }
    const at::IntArrayRef X_dims = X.sizes();
    std::vector<std::int64_t> Y_dims(ndim);
-    for (const auto i : c10::irange(ndim)) {
+    for (int i = 0; i < ndim; ++i) {
      Y_dims[i] = X_dims[axes_[i]];
    }
    Y->Resize(Y_dims);
--- a/caffe2/operators/tt_linear_op.h
+++ b/caffe2/operators/tt_linear_op.h
@ -127,7 +127,7 @@ class TTLinearOp final : public Operator<Context> {
    // Check that output size of Y is the element-wise product of out_sizes
    int prod_out_sizes = 1;
    // NOLINTNEXTLINE(clang-diagnostic-sign-compare)
-    for (const auto i : c10::irange(out_sizes_.size())) {
+    for (int i = 0; i < out_sizes_.size(); i++) {
      prod_out_sizes *= out_sizes_[i];
    }
    CAFFE_ENFORCE(
--- a/caffe2/operators/unsafe_coalesce.h
+++ b/caffe2/operators/unsafe_coalesce.h
@ -3,7 +3,6 @@

 #include "caffe2/core/context.h"
 #include "caffe2/core/export_caffe2_op_to_c10.h"
-#include <c10/util/irange.h>
 #include "caffe2/core/operator.h"


@ -17,7 +16,7 @@ class UnsafeCoalesceOp final : public Operator<Context> {

  bool RunOnDevice() override {
    size_t coalesced_size = 0;
-    for (const auto i : c10::irange(InputSize())) {
+    for (int i = 0; i < InputSize(); ++i) {
      // For now only float type is supported
      CAFFE_ENFORCE(
          Input(i).dtype().template Match<float>(),
@ -25,14 +24,14 @@ class UnsafeCoalesceOp final : public Operator<Context> {
          i);
    }

-    for (const auto i : c10::irange(InputSize())) {
+    for (int i = 0; i < InputSize(); ++i) {
      coalesced_size += Input(i).numel();
    }
    auto* coalesced = Output(OutputSize() - 1, coalesced_size, at::dtype<float>());
    auto coalesced_data = coalesced->template mutable_data<float>();

    size_t coalesced_offset = 0;
-    for (const auto i : c10::irange(InputSize())) {
+    for (auto i = 0; i < InputSize(); ++i) {
      const auto num_elems = Input(i).numel();
      auto input_sizes = Input(i).sizes().vec();
      // Don't do anything if both tensors are already pointing on the same data
--- a/caffe2/operators/utility_ops.h
+++ b/caffe2/operators/utility_ops.h
@ -8,7 +8,6 @@
 #include "caffe2/core/common_omp.h"
 #include "caffe2/core/context.h"
 #include "caffe2/core/export_caffe2_op_to_c10.h"
-#include <c10/util/irange.h>
 #include "caffe2/core/logging.h"
 #include "caffe2/core/operator.h"
 #include "caffe2/core/types.h"
@ -65,7 +64,7 @@ class IsNanOp final : public Operator<Context> {
    const auto* X_data = X.template data<T>();
    uint8_t* Y_data = Y->template mutable_data<uint8_t>();
    // NOLINTNEXTLINE(clang-diagnostic-sign-compare)
-    for (const auto i : c10::irange(X.numel())) {
+    for (size_t i = 0; i < X.numel(); i++) {
      Y_data[i] = (uint8_t)(std::isnan(X_data[i]));
    }
    return true;
@ -300,7 +299,7 @@ class SumOp : public Operator<Context> {
    auto* output = Output(0, input0.sizes(), at::dtype<T>());
    T* output_data = output->template mutable_data<T>();
    // Dimension checking
-    for (const auto i : c10::irange(1, InputSize())) {
+    for (int i = 1; i < InputSize(); ++i) {
      if (output->sizes() != Input(i).sizes()) {
        CAFFE_THROW(
            "Check failed: output->sizes() == Input(i).sizes().",
@ -321,7 +320,7 @@ class SumOp : public Operator<Context> {
        output_data,
        &context_);
    // Add remaining.
-    for (const auto i : c10::irange(2, InputSize())) {
+    for (int i = 2; i < InputSize(); ++i) {
      math::Add(
          output->numel(),
          output_data,
@ -578,7 +577,7 @@ class ScatterWeightedSumOp : public Operator<Context> {
    float w0 = *weight0.template data<float>();
    // It's most likely a constant so exact comparison is fine
    if (w0 != 1.0) {
-      for (const auto i : c10::irange(K)) {
+      for (int i = 0; i < K; ++i) {
        Index idx = idxs[i];
        CAFFE_ENFORCE(
            0 <= idx && idx < N,
@ -601,7 +600,7 @@ class ScatterWeightedSumOp : public Operator<Context> {
      CAFFE_ENFORCE_EQ(weight.numel(), 1);
      const T* x_data = X.template data<T>();
      float w = *weight.template data<float>();
-      for (const auto i : c10::irange(K)) {
+      for (int i = 0; i < K; ++i) {
        Index idx = idxs[i];
        // double-checking the indices, but it's fine as it's DCHECK only
        DCHECK(0 <= idx && idx < N)
@ -747,7 +746,7 @@ class ScatterAssignOp : public Operator<Context> {
      int64_t N,
      int64_t K,
      int64_t block_size) {
-    for (const auto i : c10::irange(K)) {
+    for (int i = 0; i < K; ++i) {
      Index idx = idxs[i];
      // double-checking the indices, but it's fine as it's DCHECK only
      DCHECK(0 <= idx && idx < N)
@ -839,9 +838,11 @@ class ScatterOp : public Operator<CPUContext> {
    // dst should have the same rank as idxs and src, but the dimension of dim
    // axis can be different. That is why in the above equation, there is the
    // difference of J_src and J_dst.
-    for (const auto outer_batch : c10::irange(outer_dims_product)) {
-      for (const auto i : c10::irange(N)) {
-        for (const auto inner_batch : c10::irange(idxs_block_size)) {
+    for (int64_t outer_batch = 0; outer_batch < outer_dims_product;
+         ++outer_batch) {
+      for (int64_t i = 0; i < N; ++i) {
+        for (int64_t inner_batch = 0; inner_batch < idxs_block_size;
+             ++inner_batch) {
          auto idxs_elem_idx =
              outer_batch * idxs_batch_size + i * idxs_block_size + inner_batch;
          auto src_elem_idx =
@ -866,7 +867,7 @@ class ScatterOp : public Operator<CPUContext> {
      const IndexType* indices,
      int64_t n,
      IndexType indexing_axis_dim) {
-    for (const auto i : c10::irange(n)) {
+    for (auto i = 0; i < n; ++i) {
      auto idx = indices[i];
      CAFFE_ENFORCE(
          0 <= idx && idx < indexing_axis_dim,
@ -899,7 +900,7 @@ class LengthsToSegmentIdsOp : public Operator<Context> {
    output->Resize(total_length);
    auto* output_data = output->template mutable_data<int32_t>();

-    for (const auto i : c10::irange(input.numel())) {
+    for (int i = 0; i < input.numel(); ++i) {
      auto len = input_data[i];
      std::fill(output_data, output_data + len, i);
      output_data += len;
@ -926,7 +927,7 @@ class LengthsToRangesOp : public Operator<Context> {
    auto* output_data = output->template mutable_data<int32_t>();

    int32_t offset = 0;
-    for (const auto i : c10::irange(size)) {
+    for (int i = 0; i < size; ++i) {
      auto len = input_data[i];
      output_data[i * 2] = offset;
      output_data[i * 2 + 1] = len;
@ -960,7 +961,7 @@ class LengthsToOffsetsOp : public Operator<Context> {
    auto* output_data = output->template mutable_data<int32_t>();

    int32_t offset = 0;
-    for (const auto i : c10::irange(size)) {
+    for (int i = 0; i < size; ++i) {
      auto len = input_data[i];
      output_data[i] = offset;
      offset += len;
@ -1017,7 +1018,7 @@ class SegmentIdsToLengthsOp : public Operator<Context> {
    }
    std::fill(output_data, output_data + num_segments, 0);
    Index prev = 0; // Assume that segment_id >= 0.
-    for (const auto i : c10::irange(input_size)) {
+    for (int64_t i = 0; i < input_size; i++) {
      CAFFE_ENFORCE(
          prev <= input_data[i],
          "Segment ids must be sorted: ",
@ -1068,7 +1069,7 @@ class SegmentIdsToRangesOp : public Operator<Context> {
    }
    std::fill(output_data, output_data + num_segments * 2, 0);
    Index prev = input_data[0];
-    for (const auto i : c10::irange(input_size)) {
+    for (int64_t i = 0; i < input_size; i++) {
      CAFFE_ENFORCE(
          prev <= input_data[i],
          "Segment ids must be sorted: ",
@ -1108,7 +1109,7 @@ class LengthsToWeightsOp : public Operator<Context> {
    auto* output = Output(0);

    int64_t output_size = 0;
-    for (const auto i : c10::irange(input_size)) {
+    for (auto i = 0; i < input_size; i++) {
      CAFFE_ENFORCE_GE(input_data[i], 0, "unexpected negative length value");
      output_size += input_data[i];
    }
@ -1131,7 +1132,7 @@ class LengthsToWeightsOp : public Operator<Context> {
    output->Resize(output_size);
    auto* output_data = output->template mutable_data<float>();
    int64_t cnt = 0;
-    for (const auto i : c10::irange(input_size)) {
+    for (auto i = 0; i < input_size; i++) {
      auto len = input_data[i];
      if (len == 0) {
        continue;
@ -1158,7 +1159,7 @@ class HasElementsOp : public Operator<Context> {

  bool RunOnDevice() override {
    bool res = false;
-    for (const auto i : c10::irange(InputSize())) {
+    for (auto i = 0; i < InputSize(); ++i) {
      const auto& input = Input(i);
      res = res || input.numel() > 0;
    }
@ -1207,7 +1208,7 @@ class LengthsToShapeOp : public Operator<Context> {
    auto size = input.numel();
    auto first = input_data[0];

-    for (const auto i : c10::irange(1, size)) {
+    for (int i = 1; i < size; i++) {
      CAFFE_ENFORCE(
          input_data[i] == first, "All elements of input must be same ");
    }
@ -1254,7 +1255,7 @@ class GatherRangesOp : public Operator<Context> {
    size_t start = 0;
    size_t blockSize = ranges.size_from_dim(1);
    // NOLINTNEXTLINE(clang-diagnostic-sign-compare)
-    for (const auto i : c10::irange(batchSize)) {
+    for (size_t i = 0; i < batchSize; ++i) {
      auto end = start + blockSize;
      outputLengthsPtr[i] = accumulate(rangesData, start, end);
      start = end;
@ -1328,7 +1329,7 @@ class LengthsGatherOp : public Operator<Context> {

    int64_t total_length = 0;
    // NOLINTNEXTLINE(clang-diagnostic-sign-compare)
-    for (const auto i : c10::irange(indices.numel())) {
+    for (size_t i = 0; i < indices.numel(); ++i) {
      auto idx = indices_data[i];
      CAFFE_ENFORCE_LT(idx, lengths.numel());
      total_length += lengths_data[idx];
@ -1340,7 +1341,7 @@ class LengthsGatherOp : public Operator<Context> {
    offsets_.clear();
    int64_t running_offset = 0;
    offsets_.reserve(lengths.numel());
-    for (const auto i : c10::irange(lengths.numel())) {
+    for (size_t i = 0; i < lengths.numel(); ++i) {
      offsets_.push_back(running_offset);
      running_offset += lengths_data[i];
    }
@ -1354,7 +1355,7 @@ class LengthsGatherOp : public Operator<Context> {
    auto block_bytesize = block_size * items.itemsize();
    auto out = static_cast<char*>(output->raw_mutable_data(items.dtype()));

-    for (const auto i : c10::irange(indices.numel())) {
+    for (size_t i = 0; i < indices.numel(); ++i) {
      auto idx = indices_data[i];
      auto length = lengths_data[idx];
      context_.CopyItemsSameDevice(
@ -1405,7 +1406,7 @@ class AccumulateHistogramOp : public Operator<Context> {
    math::Set<int64_t, Context>(
        num_output_buckets_, 0, cur_hist_data, &context_);

-    for (const auto i : c10::irange(N)) {
+    for (int i = 0; i < N; i++) {
      int bucket_index = -1;
      if (X_data[i] < lower_bound_) {
        bucket_index = 0;
@ -1418,7 +1419,7 @@ class AccumulateHistogramOp : public Operator<Context> {
      accumulate_hist_[bucket_index] += 1;
    }

-    for (const auto i : c10::irange(num_output_buckets_)) {
+    for (int i = 0; i < num_output_buckets_; i++) {
      acc_hist_data[i] = accumulate_hist_[i];
    }

@ -1463,7 +1464,7 @@ class RangeOp : public Operator<Context> {
    T start = 0;
    T step = 1;

-    for (const auto i : c10::irange(InputSize())) {
+    for (int i = 0; i < InputSize(); ++i) {
      CAFFE_ENFORCE_EQ(
          Input(i).numel(), 1, "All inputs must be scalar/1D tensor.");
    }
--- a/caffe2/operators/variable_length_sequence_padding.h
+++ b/caffe2/operators/variable_length_sequence_padding.h
@ -17,7 +17,7 @@ void VariableLengthSequencePadding(
    const int32_t* seqLengths,
    const T padValue,
    Context* /*context*/) {
-  for (const auto j : c10::irange(B)) {
+  for (int j = 0; j < B; j++) {
    for (int i = seqLengths[j]; i < N; i++) {
      EigenVectorArrayMap<T>(X + B * M * i + M * j, M).setConstant(padValue);
    }
--- a/caffe2/opt/custom/cc_amrc.h
+++ b/caffe2/opt/custom/cc_amrc.h
@ -54,7 +54,7 @@ class ConcatAddMulReplaceNaNClipOp final : public Operator<Context> {
    }
    int before = 1, after = 1;
    vector<int64_t> output_dims(concat_input_0.sizes().vec());
-    for (const auto i : c10::irange(concat_input_0.dim())) {
+    for (int i = 0; i < concat_input_0.dim(); ++i) {
      if (i == canonical_axis) {
        continue;
      }
@ -65,7 +65,7 @@ class ConcatAddMulReplaceNaNClipOp final : public Operator<Context> {
        after *= dim;
      }
      // check the input dims are compatible.
-      for (const auto j : c10::irange(concat_input_start, InputSize())) {
+      for (int j = concat_input_start; j < InputSize(); ++j) {
        int dim_j = Input(j).dim32(i);
        CAFFE_ENFORCE(
            dim == dim_j,
@ -93,7 +93,7 @@ class ConcatAddMulReplaceNaNClipOp final : public Operator<Context> {
        "Cannot handle fused concat with dim > 2, please update your fusion logic");

    int output_channels = 0;
-    for (const auto i : c10::irange(concat_input_start, InputSize())) {
+    for (int i = concat_input_start; i < InputSize(); ++i) {
      axis_data[i - concat_input_start] = Input(i).dim32(canonical_axis);
      output_channels += Input(i).dim32(canonical_axis);
    }
@ -101,7 +101,7 @@ class ConcatAddMulReplaceNaNClipOp final : public Operator<Context> {
    auto* output = Output(0, output_dims, at::dtype<float>());

    size_t output_offset = 0;
-    for (const auto i : c10::irange(concat_input_start, InputSize())) {
+    for (int i = concat_input_start; i < InputSize(); ++i) {
      auto& input = Input(i);
      auto axis_dim = input.dim32(canonical_axis);
      math::CopyMatrix<Context>(
@ -127,7 +127,7 @@ class ConcatAddMulReplaceNaNClipOp final : public Operator<Context> {
    const auto _zeros = _mm256_set1_ps(0.f);

    output_offset = 0;
-    for (const auto outer : c10::irange(before)) {
+    for (auto outer = 0; outer < before; ++outer) {
      auto axis_dim = output->dim32(canonical_axis);
      size_t inner_size = axis_dim * after;
      auto inner = 0;
@ -148,7 +148,7 @@ class ConcatAddMulReplaceNaNClipOp final : public Operator<Context> {
        _mm256_storeu_ps(&output_data[output_offset + inner], out_val);
      }

-      for (const auto inner_omp : c10::irange(inner, inner_size)) {
+      for (auto inner_omp = inner; inner_omp < inner_size; ++inner_omp) {
        float elem = output_data[output_offset + inner_omp];
        float add_elem = add_input_data[inner_omp];
        float mul_elem = mul_input_data[inner_omp];
--- a/caffe2/opt/nql/ast.h
+++ b/caffe2/opt/nql/ast.h
@ -1,5 +1,4 @@
 #pragma once
-#include "c10/util/irange.h"
 #include <iostream>
 #include <string>
 #include <vector>
@ -21,7 +20,8 @@ struct ASTExpr {
    return starInputsFlag;
  }
  void dump(int level = 0) const {
-    for (const auto i : c10::irange(level))std::cout << "  ";
+    for (int i = 0; i < level; i++)
+      std::cout << "  ";
    if (!isCall())
      std::cout << "Var: " << name << std::endl;
    else {
@ -41,7 +41,8 @@ struct ASTStmt {
    delete rhs;
  }
  void dump(int level = 0) const {
-    for (const auto i : c10::irange(level))std::cout << "  ";
+    for (int i = 0; i < level; i++)
+      std::cout << "  ";
    std::cout << "LHS:" << std::endl;
    for (auto s : lhs) {
      for (int i = 0; i < level + 1; i++)
--- a/caffe2/opt/onnxifi_op.h
+++ b/caffe2/opt/onnxifi_op.h
@ -6,7 +6,6 @@

 #include <c10/util/Exception.h>
 #include <c10/util/SmallVector.h>
-#include <c10/util/irange.h>
 #include "caffe2/core/context.h"
 #include "caffe2/core/logging.h"
 #include "caffe2/core/operator.h"
@ -139,7 +138,7 @@ class OnnxifiOp final : public Operator<Context> {

    if (use_passed_output_shapes_) {
      // Populate output_shapes_per_bs_
-      for (const auto bs : c10::irange(1, max_batch_size_)) {
+      for (int bs = 1; bs < max_batch_size_; ++bs) {
        auto output_shapes_tp = helper.GetRepeatedArgument<TensorProto>("output_shapes_bs_" + caffe2::to_string(bs));
        auto output_qshapes_tp = helper.GetRepeatedArgument<TensorProto>("output_qshapes_bs_" + caffe2::to_string(bs));
        CAFFE_ENFORCE_EQ(output_names_.size(), output_shapes_tp.size() + output_qshapes_tp.size());
@ -268,7 +267,7 @@ class OnnxifiOp final : public Operator<Context> {
          ONNXIFI_STATUS_SUCCESS);

      // Release unused backend ids.
-      for (const auto i : c10::irange(num_backends)) {
+      for (size_t i = 0; i < num_backends; ++i) {
        if (i == static_cast<size_t>(backend_index)) {
          continue;
        }
@ -288,7 +287,7 @@ class OnnxifiOp final : public Operator<Context> {

      // Extra weight shapes
      std::unordered_map<std::string, ShapeInfo> weight_shape_info;
-      for (const auto i : c10::irange(weight_names.size())) {
+      for (size_t i = 0; i < weight_names.size(); ++i) {
        TensorShape shape;
        const auto& shape0 = weight_shapes[i];
        for (const auto d : shape0) {
--- a/caffe2/perfkernels/adagrad.h
+++ b/caffe2/perfkernels/adagrad.h
@ -6,7 +6,6 @@
 #include <immintrin.h>
 #endif
 #include <c10/util/Half.h>
-#include <c10/util/irange.h>

 namespace caffe2 {

@ -27,7 +26,7 @@ static inline void adagrad_update_base_inlined(
    float epsilon,
    float lr,
    float weight_decay = 0.f) {
-  for (const auto i : c10::irange(N)) {
+  for (auto i = 0; i < N; ++i) {
    float gi = std::fma(weight_decay, w[i], g[i]);
    float hi = decay * h[i] + gi * gi;
    nh[i] = hi;
--- a/caffe2/perfkernels/lstm_unit_cpu-impl.h
+++ b/caffe2/perfkernels/lstm_unit_cpu-impl.h
@ -2,7 +2,6 @@
 #include <string.h>
 #include <cmath>
 #include <cstdint>
-#include "c10/util/irange.h"
 #include "caffe2/utils/conversions.h"

 #if (ENABLE_VECTORIZATION > 0) && !defined(_DEBUG) && !defined(DEBUG)
@ -54,7 +53,7 @@ inline void LstmUnitImpl(
    T* H,
    const float forget_bias) {
  const T forgetBias = convert::To<float, T>(forget_bias);
-  for (const auto n : c10::irange(N)) {
+  for (int n = 0; n < N; ++n) {
    const bool valid = seqLengths == nullptr || t < seqLengths[n];
    if (!valid) {
      if (drop_states) {
@ -68,7 +67,7 @@ inline void LstmUnitImpl(
      const T* X_D = &X[D];
      const T* X_2D = &X[2 * D];
      const T* X_3D = &X[3 * D];
-      VECTOR_LOOP for (const auto d : c10::irange(D)) {
+      VECTOR_LOOP for (int d = 0; d < D; ++d) {
        const T i = sigmoid(X[d]);
        const T f = sigmoid(X_D[d] + forgetBias);
        const T o = sigmoid(X_2D[d]);
@ -106,7 +105,7 @@ inline void LstmUnitGradientImpl(
    T* X_diff,
    const float forget_bias) {
  const T localForgetBias = convert::To<float, T>(forget_bias);
-  for (const auto n : c10::irange(N)) {
+  for (int n = 0; n < N; ++n) {
    const bool valid = seqLengths == nullptr || t < seqLengths[n];

    if (!valid) {
@ -119,7 +118,7 @@ inline void LstmUnitGradientImpl(
      }
      memset(X_diff, 0, 4 * sizeof(T) * D);
    } else {
-      VECTOR_LOOP for (const auto d : c10::irange(D)) {
+      VECTOR_LOOP for (int d = 0; d < D; ++d) {
        T* c_prev_diff = C_prev_diff + d;
        T* h_prev_diff = H_prev_diff + d;
        T* i_diff = X_diff + d;
--- a/caffe2/predictor/emulator/data_filler.h
+++ b/caffe2/predictor/emulator/data_filler.h
@ -59,12 +59,12 @@ class DataNetFiller : public Filler {
      : init_net_(init_net), data_net_(data_net) {
    // The output of the data_net_ will be served as the input
    int op_size = data_net_.op_size();
-    for (const auto i : c10::irange(op_size)) {
+    for (int i = 0; i < op_size; ++i) {
      OperatorDef op_def = data_net_.op(i);
      // We rely on Fill op to generate inputs
      CAFFE_ENFORCE(op_def.type().find("Fill") != std::string::npos);
      int output_size = op_def.output_size();
-      for (const auto j : c10::irange(output_size)) {
+      for (int j = 0; j < output_size; ++j) {
        input_names_.push_back(op_def.output(j));
      }
    }
@ -105,7 +105,7 @@ class DataRandomFiller : public Filler {
      int input_index,
      const std::vector<std::vector<int64_t>>& input_dims) {
    Workspace ws;
-    for (const auto i : c10::irange(op_def.input_size())) {
+    for (int i = 0; i < op_def.input_size(); ++i) {
      // CreateOperator requires all input blobs present
      ws.CreateBlob(op_def.input(i));
    }
--- a/caffe2/python/pybind_state.h
+++ b/caffe2/python/pybind_state.h
@ -153,12 +153,12 @@ class TensorFetcher : public BlobFetcherBase {
    if (numpy_type == NPY_OBJECT) {
      PyObject** outObj = reinterpret_cast<PyObject**>(outPtr);
      auto* str = tensor.template data<std::string>();
-      for (const auto i : c10::irange(tensor.numel())) {
+      for (int i = 0; i < tensor.numel(); ++i) {
        outObj[i] = PyBytes_FromStringAndSize(str->data(), str->size());
        str++;
        // cleanup on failure
        if (outObj[i] == nullptr) {
-          for (const auto j : c10::irange(i)) {
+          for (int j = 0; j < i; ++j) {
            Py_DECREF(outObj[j]);
          }
          CAFFE_THROW("Failed to allocate string for ndarray of strings.");
@ -212,7 +212,7 @@ class TensorFeeder : public BlobFeederBase {
    int ndim = PyArray_NDIM(array);
    npy_intp* npy_dims = PyArray_DIMS(array);
    std::vector<int64_t> dims;
-    for (const auto i : c10::irange(ndim)) {
+    for (int i = 0; i < ndim; ++i) {
      dims.push_back(npy_dims[i]);
    }

@ -229,7 +229,7 @@ class TensorFeeder : public BlobFeederBase {
              dims, at::dtype<std::string>().device(Context::GetDeviceType()));
        }
        auto* outPtr = tensor.template mutable_data<std::string>();
-        for (const auto i : c10::irange(tensor.numel())) {
+        for (int i = 0; i < tensor.numel(); ++i) {
          char* str;
          Py_ssize_t strSize;
          if (PyBytes_Check(input[i])) {
@ -375,7 +375,7 @@ class PythonOpBase : public Operator<Context> {

      std::vector<py::object> inputs;
      inputs.reserve(InputSize());
-      for (const auto i : c10::irange(InputSize())) {
+      for (auto i = 0; i < InputSize(); ++i) {
        const auto* blob = &InputBlob(i);
        // Allow CPU tensors in addition to operator context's tensors
        py::object py_obj;
@ -395,7 +395,7 @@ class PythonOpBase : public Operator<Context> {
      }
      std::vector<py::object> outputs;
      outputs.reserve(OutputSize());
-      for (const auto i : c10::irange(OutputSize())) {
+      for (auto i = 0; i < OutputSize(); ++i) {
        auto* blob = OutputBlob(i);

        // Python op is always used with CPUContext only and treats inputs and
--- a/caffe2/quantization/server/elementwise_dnnlowp_op.h
+++ b/caffe2/quantization/server/elementwise_dnnlowp_op.h
@ -127,7 +127,7 @@ class BinaryElementwiseDNNLowPOp : public DNNLowPOp<T, FP32_OP> {
        size_t n,                                                            \
        size_t post,                                                         \
        CPUContext*) {                                                       \
-      for (const auto i : c10::irange(pre)) {                                        \
+      for (int i = 0; i < pre; ++i) {                                        \
        EigenArrayMap<R>(out + i * n * post, post, n) = eigen_op(            \
            (ConstEigenArrayMap<T>(a + i * n * post, post, n).rowwise()),    \
            (Eigen::Map<const Eigen::Array<T, 1, Eigen::Dynamic>>(b, n)));   \
--- a/caffe2/quantization/server/im2col_dnnlowp.h
+++ b/caffe2/quantization/server/im2col_dnnlowp.h
@ -50,7 +50,7 @@ static void Im2ColNCHW(
      auto* dst = data_col + nip * (kernel_h * kernel_w * output_h * output_w) +
          kh * (kernel_w * output_h * output_w) + kw * (output_h * output_w);
      const auto* src = data_im + nip * (height * width);
-      for (const auto y : c10::irange(output_h)) {
+      for (auto y = 0; y < output_h; y++) {
        const auto iy = y * stride_h + kh;
        const auto ix = kw;
        if (stride_w == 1) {
@ -59,7 +59,7 @@ static void Im2ColNCHW(
              src + (iy * width + ix),
              sizeof(T) * output_w);
        } else {
-          for (const auto x : c10::irange(output_w)) {
+          for (auto x = 0; x < output_w; x++) {
            memcpy(
                dst + (y * output_w + x),
                src + (iy * width + ix + x * stride_w),
@ -78,8 +78,8 @@ static void Im2ColNCHW(
    const int pad_w = pad_l;
    const int channel_size = height * width;
    for (int channel = channels; channel--; data_im += channel_size) {
-      for (const auto kernel_row : c10::irange(kernel_h)) {
-        for (const auto kernel_col : c10::irange(kernel_w)) {
+      for (int kernel_row = 0; kernel_row < kernel_h; kernel_row++) {
+        for (int kernel_col = 0; kernel_col < kernel_w; kernel_col++) {
          int input_row = -pad_h + kernel_row * dilation_h;
          for (int output_rows = output_h; output_rows; output_rows--) {
            if (!utils::IsAGeZeroAndALtB(input_row, height)) {
@ -113,12 +113,12 @@ static void Im2ColNCHW(
  int width_col = (width + pad_l + pad_r - dkernel_w) / stride_w + 1;

  int channels_col = channels * kernel_h * kernel_w;
-  for (const auto c : c10::irange(channels_col)) {
+  for (int c = 0; c < channels_col; ++c) {
    int w_offset = c % kernel_w;
    int h_offset = (c / kernel_w) % kernel_h;
    int c_im = c / kernel_h / kernel_w;
-    for (const auto h : c10::irange(height_col)) {
-      for (const auto w : c10::irange(width_col)) {
+    for (int h = 0; h < height_col; ++h) {
+      for (int w = 0; w < width_col; ++w) {
        int h_pad = h * stride_h - pad_t + h_offset * dilation_h;
        int w_pad = w * stride_w - pad_l + w_offset * dilation_w;
        if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
@ -152,20 +152,20 @@ static void Im2ColNdNCHW(
      kernel_shape, kernel_shape + N, 1, std::multiplies<int>());
  std::vector<int> d_offset(N, 0);
  std::vector<int> d_iter(N, 0);
-  for (const auto i : c10::irange(outer_size)) {
+  for (int i = 0; i < outer_size; ++i) {
    // Loop over spatial axes in reverse order to compute a per-axis offset.
    int offset = i;
    for (int d_i = N - 1; d_i >= 0; --d_i) {
      d_offset[d_i] = offset % kernel_shape[d_i];
      offset /= kernel_shape[d_i];
    }
-    for (const auto j : c10::irange(inner_size)) {
+    for (int j = 0; j < inner_size; ++j) {
      // Loop over spatial axes in forward order to compute the indices in the
      // image and column, and whether the index lies in the padding.
      const int col_index = i * inner_size + j;
      int img_index = i / kernel_size;
      bool is_padding = false;
-      for (const auto d_i : c10::irange(N)) {
+      for (int d_i = 0; d_i < N; ++d_i) {
        const int d_img = d_iter[d_i] * stride[d_i] - pad[d_i] +
            d_offset[d_i] * dilation[d_i];
        is_padding |= d_img < 0 || d_img >= img_shape[d_i + 1];
@ -216,13 +216,13 @@ static void Im2ColNHWC(
    T* data_col_temp =
        data_col + h * width_col * kernel_h * kernel_w * channels;
    int w_pad = -pad_l;
-    for (const auto w : c10::irange(width_col)) {
+    for (int w = 0; w < width_col; ++w) {
      int r = 0;
      for (int ih = h_pad; ih < h_pad + dkernel_h; ih += dilation_h, ++r) {
        int s = 0;
        for (int iw = w_pad; iw < w_pad + dkernel_w; iw += dilation_w, ++s) {
          if (ih >= 0 && ih < height && iw >= 0 && iw < width) {
-            for (const auto g : c10::irange(groups)) {
+            for (int g = 0; g < groups; ++g) {
              memcpy(
                  data_col_temp +
                      ((g * kernel_h + r) * kernel_w + s) * (channels / groups),
@ -232,7 +232,7 @@ static void Im2ColNHWC(
            }
          } else {
            // This should be simply padded with zero.
-            for (const auto g : c10::irange(groups)) {
+            for (int g = 0; g < groups; ++g) {
              for (int i = 0; i < channels / groups; ++i) {
                data_col_temp
                    [(((g * kernel_h + r) * kernel_w) + s) *
@ -293,12 +293,12 @@ static void Im2Col3DNHWC(
 #endif
  for (int t = 0; t < frame_col; ++t) {
    int t_pad = -pad_p + t * stride_t;
-    for (const auto h : c10::irange(height_col)) {
+    for (int h = 0; h < height_col; ++h) {
      int h_pad = -pad_t + h * stride_h;
      T* data_col_temp = data_col +
          (t * height_col + h) * width_col * kernel_t * kernel_h * kernel_w *
              channels;
-      for (const auto w : c10::irange(width_col)) {
+      for (int w = 0; w < width_col; ++w) {
        int w_pad = -pad_l + w * stride_w;
        int q = 0;
        for (int it = t_pad; it < t_pad + dkernel_t; it += dilation_t, ++q) {
@ -309,7 +309,7 @@ static void Im2Col3DNHWC(
                 iw += dilation_w, ++s) {
              if (it >= 0 && it < num_frames && ih >= 0 && ih < height &&
                  iw >= 0 && iw < width) {
-                for (const auto g : c10::irange(groups)) {
+                for (int g = 0; g < groups; ++g) {
                  memcpy(
                      data_col_temp +
                          (((g * kernel_t + q) * kernel_h + r) * kernel_w + s) *
@ -320,7 +320,7 @@ static void Im2Col3DNHWC(
                }
              } else {
                // This should be simply padded with zero.
-                for (const auto g : c10::irange(groups)) {
+                for (int g = 0; g < groups; ++g) {
                  for (int i = 0; i < channels / groups; ++i) {
                    data_col_temp
                        [((((g * kernel_t + q) * kernel_h + r) * kernel_w) +
--- a/caffe2/quantization/server/mmio.h
+++ b/caffe2/quantization/server/mmio.h
@ -36,8 +36,8 @@ void StoreMatrixInMatrixMarketFormat(
    }
    fprintf(fp, "%d %d\n", m, n);
    // matrix market array format uses column-major order
-    for (const auto j : c10::irange(n)) {
-      for (const auto i : c10::irange(m)) {
+    for (int j = 0; j < n; ++j) {
+      for (int i = 0; i < m; ++i) {
        if (is_integral<T>::value) {
          // NOLINTNEXTLINE(clang-analyzer-core.NullDereference)
          fprintf(fp, "%d\n", static_cast<int>(a[j * m + i]));
--- a/caffe2/quantization/server/utility_dnnlowp_ops.h
+++ b/caffe2/quantization/server/utility_dnnlowp_ops.h
@ -54,7 +54,7 @@ class GatherDNNLowPOp final : public GatherOp<CPUContext> {
    const Index* idxs = indices.template data<Index>();
    auto out = static_cast<char*>(output->raw_mutable_data(data.dtype()));

-    for (const auto i : c10::irange(N)) {
+    for (int i = 0; i < N; ++i) {
      auto idx = idxs[i];
      CAFFE_ENFORCE(
          0 <= idx && idx < data.size(0),
--- a/caffe2/queue/queue_ops.h
+++ b/caffe2/queue/queue_ops.h
@ -149,7 +149,7 @@ class SafeDequeueBlobsOp final : public Operator<Context> {
    }

    const int kTensorGrowthPct = 40;
-    for (const auto i : c10::irange(numRecords_)) {
+    for (int i = 0; i < numRecords_; ++i) {
      if (!queue->blockingRead(blobPtrs_)) {
        // if we read at least one record, status is still true
        return i > 0;
--- a/caffe2/queue/rebatching_queue_ops.h
+++ b/caffe2/queue/rebatching_queue_ops.h
@ -32,7 +32,7 @@ class EnqueueRebatchingQueueOp : public Operator<CPUContext> {
    CAFFE_ENFORCE_EQ(InputSize(), queue->numBlobs() + 1);
    std::vector<const Tensor*> inputTensors;
    inputTensors.reserve(InputSize() - 1);
-    for (const auto i : c10::irange(1, InputSize())) {
+    for (int i = 1; i < InputSize(); ++i) {
      inputTensors.push_back(&Input(i));
    }

@ -56,7 +56,7 @@ class DequeueRebatchingQueueOp : public Operator<CPUContext> {

    std::vector<Tensor*> outputTensors;
    outputTensors.reserve(OutputSize());
-    for (const auto i : c10::irange(OutputSize())) {
+    for (int i = 0; i < OutputSize(); ++i) {
      outputTensors.push_back(Output(i));
    }

--- a/caffe2/sgd/adadelta_op.h
+++ b/caffe2/sgd/adadelta_op.h
@ -18,7 +18,7 @@ void AdadeltaUpdate(
    float* nh,
    float* nd,
    Context* /*context*/) {
-  for (const auto i : c10::irange(N)) {
+  for (int i = 0; i < N; ++i) {
    float gi = g[i];
    float di = d[i];
    float hi = nh[i] = decay * h[i] + (1.0f - decay) * gi * gi;
@ -120,7 +120,7 @@ class SparseAdadeltaOp final : public Operator<Context> {
    }

    auto block_size = Input(GRAD).numel() / n;
-    for (const auto i : c10::irange(n)) {
+    for (int i = 0; i < n; ++i) {
      auto idx = indices[i];
      if (block_size == 1) {
        float gi = gradIn[i];
--- a/caffe2/sgd/adagrad_fused.h
+++ b/caffe2/sgd/adagrad_fused.h
@ -82,8 +82,8 @@ class SparseAdagradFusedWithSparseLengthsSumGradientOp final
    auto* grad_buffer_data =
        is_mean ? grad_buffer_.template mutable_data<T>() : NULL;
    if (is_mean) {
-      for (const auto rangeIndex : c10::irange(numSegments)) {
-        for (const auto tmpIndex : c10::irange(block_size)) {
+      for (auto rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
+        for (auto tmpIndex = 0; tmpIndex < block_size; ++tmpIndex) {
          auto offsetI = rangeIndex * block_size;
          grad_buffer_data[offsetI + tmpIndex] = lengths[rangeIndex] > 0
              ? gradIn[offsetI + tmpIndex] / lengths[rangeIndex]
@ -92,7 +92,7 @@ class SparseAdagradFusedWithSparseLengthsSumGradientOp final
      }
    }

-    for (const auto rangeIndex : c10::irange(numSegments)) {
+    for (auto rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
      for (auto start = dataIndex; dataIndex < start + lengths[rangeIndex];
           ++dataIndex) {
        std::size_t idx = indices[dataIndex];
@ -243,7 +243,7 @@ class SparseAdagradFusedWithSparseLengthsWeightedSumGradientOp final
    // ignores this dependency and fuses these two loops.
    std::vector<T> temp_grad(block_size);
    int dataIndex = 0;
-    for (const auto rangeIndex : c10::irange(numSegments)) {
+    for (auto rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
      for (auto start = dataIndex; dataIndex < start + lengths[rangeIndex];
           ++dataIndex) {
        std::size_t idx = indices[dataIndex];
@ -277,7 +277,7 @@ class SparseAdagradFusedWithSparseLengthsWeightedSumGradientOp final
    CAFFE_ENFORCE_EQ(dataIndex, n);

    dataIndex = 0;
-    for (const auto rangeIndex : c10::irange(numSegments)) {
+    for (auto rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
      for (auto start = dataIndex; dataIndex < start + lengths[rangeIndex];
           ++dataIndex) {
        std::size_t idx = indices[dataIndex];
@ -285,7 +285,7 @@ class SparseAdagradFusedWithSparseLengthsWeightedSumGradientOp final
        auto offsetIdx = idx * block_size;
        auto localOffset = dataIndex - start;

-        for (const auto i : c10::irange(block_size)) {
+        for (int i = 0; i < block_size; ++i) {
          temp_grad[i] = auxParamIn[localOffset] * gradIn[offsetI + i];
        }

@ -409,7 +409,7 @@ class SparseAdagradFusedWithSparseLengthsWeightedSumGradientApproxOp final

    std::vector<T> temp_grad(block_size);
    int dataIndex = 0;
-    for (const auto rangeIndex : c10::irange(numSegments)) {
+    for (auto rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
      for (auto start = dataIndex; dataIndex < start + lengths[rangeIndex];
           ++dataIndex) {
        std::size_t idx = indices[dataIndex];
@ -440,7 +440,7 @@ class SparseAdagradFusedWithSparseLengthsWeightedSumGradientApproxOp final
            auxGrad + dataIndex,
            &context_);

-        for (const auto i : c10::irange(block_size)) {
+        for (int i = 0; i < block_size; ++i) {
          temp_grad[i] = auxParamIn[localOffset] * gradIn[offsetI + i];
        }

--- a/caffe2/sgd/adagrad_op.h
+++ b/caffe2/sgd/adagrad_op.h
@ -39,7 +39,7 @@ void adagrad_update_output_effective_lr(
    const float* lr,
    Context* /*context*/,
    float weight_decay = 0.f) {
-  for (const auto i : c10::irange(N)) {
+  for (auto i = 0; i < N; ++i) {
    float grad = std::fma(weight_decay, paramIn[i], gradIn[i]);
    float moment = momentOut[i] = decay * momentIn[i] + grad * grad;
    float effective_lr = effectiveLROut[i] =
@ -63,7 +63,7 @@ void adagrad_update_output_effective_lr_and_update(
    const float* lr,
    Context* /*context*/,
    float weight_decay = 0.f) {
-  for (const auto i : c10::irange(N)) {
+  for (auto i = 0; i < N; ++i) {
    float grad = std::fma(weight_decay, paramIn[i], gradIn[i]);
    float moment = momentOut[i] = decay * momentIn[i] + grad * grad;
    float effective_lr = effectiveLROut[i] =
@ -300,7 +300,7 @@ class SparseAdagradOp final : public Operator<CPUContext> {
    const auto* momentIn = Input(MOMENT_1).template data<float>();

    std::vector<float> grad(block_size);
-    for (const auto i : c10::irange(n)) {
+    for (auto i = 0; i < n; ++i) {
      auto idx = indices[i];
      auto offsetI = i * block_size;
      auto offsetIdx = idx * block_size;
@ -504,7 +504,7 @@ class RowWiseSparseAdagradOp final : public Operator<Context> {
 #else
    VLOG(1) << "using plain adagrad updates in RowWiseSparseAdagradOp";

-    for (const auto i : c10::irange(n)) {
+    for (auto i = 0; i < n; ++i) {
      auto idx = indices[i];
      float freq = (counter_halflife_ > 0 && count[idx] > 0)
          ? counter_halflife_ / count[idx]
@ -542,13 +542,13 @@ class RowWiseSparseAdagradOp final : public Operator<Context> {
        const float* g = gradIn + offsetI;
        float* h = moment + idx;
        float hs = 0.;
-        for (const auto j : c10::irange(block_size)) {
+        for (auto j = 0; j < block_size; ++j) {
          float gj = std::fma(weight_decay_ * freq, w[j], g[j]);
          hs += gj * gj;
        }
        float hi = h[0] = h[0] + hs / block_size;
        float step = lr[0] / (std::sqrt(hi) + epsilon_);
-        for (const auto j : c10::irange(block_size)) {
+        for (auto j = 0; j < block_size; ++j) {
          float gj = std::fma(weight_decay_ * freq, w[j], g[j]);
          w[j] = w[j] + gj * step;
        }
--- a/caffe2/sgd/adam_op.h
+++ b/caffe2/sgd/adam_op.h
@ -21,7 +21,7 @@ void adam_update(
    float correction,
    const float* lr,
    Context* /*context*/) {
-  for (const auto i : c10::irange(N)) {
+  for (auto i = 0; i < N; ++i) {
    float gi = g[i];
    float mi = nm[i] = m[i] * beta1 + gi * (1 - beta1);
    float vi = nv[i] = v[i] * beta2 + gi * gi * (1 - beta2);
@ -45,7 +45,7 @@ void adam_compute(
    float correction,
    const float* lr,
    Context* /*context*/) {
-  for (const auto i : c10::irange(N)) {
+  for (auto i = 0; i < N; ++i) {
    float gi = g[i];
    float mi = nm[i] = m[i] * beta1 + gi * (1 - beta1);
    float vi = nv[i] = v[i] * beta2 + gi * gi * (1 - beta2);
@ -74,7 +74,7 @@ void adam_compute_smart_decay(
    Context* /*context*/) {
  float k = (float)(t - lastSeenIn[0]);
  lastSeenOut[0] = t;
-  for (const auto i : c10::irange(N)) {
+  for (auto i = 0; i < N; ++i) {
    float gi = g[i];
    // The number of steps since this param was last seen.
    // We don't need integer precision for k.  Float is fine and it's faster to convert here.
@ -107,7 +107,7 @@ void adam_compute_output_grad(
    float correction,
    const float* lr,
    Context* /*context*/) {
-  for (const auto i : c10::irange(N)) {
+  for (auto i = 0; i < N; ++i) {
    float gi = g[i];
    float mi = nm[i] = m[i] * beta1 + gi * (1 - beta1);
    float vi = nv[i] = v[i] * beta2 + gi * gi * (1 - beta2);
@ -135,7 +135,7 @@ void radam_update(
    float r_correction,
    const float* lr,
    Context* /*context*/) {
-  for (const auto i : c10::irange(N)) {
+  for (auto i = 0; i < N; ++i) {
    float gi = g[i];
    float mi = nm[i] = m[i] * beta1 + gi * (1 - beta1);
    float vi = nv[i] = v[i] * beta2 + gi * gi * (1 - beta2);
@ -169,7 +169,7 @@ void radam_compute(
    float r_correction,
    const float* lr,
    Context* /*context*/) {
-  for (const auto i : c10::irange(N)) {
+  for (auto i = 0; i < N; ++i) {
    float gi = g[i];
    float mi = nm[i] = m[i] * beta1 + gi * (1 - beta1);
    float vi = nv[i] = v[i] * beta2 + gi * gi * (1 - beta2);
@ -204,7 +204,7 @@ void radam_compute_output_grad(
    float r_correction,
    const float* lr,
    Context* /*context*/) {
-  for (const auto i : c10::irange(N)) {
+  for (auto i = 0; i < N; ++i) {
    float gi = g[i];
    float mi = nm[i] = m[i] * beta1 + gi * (1 - beta1);
    float vi = nv[i] = v[i] * beta2 + gi * gi * (1 - beta2);
@ -350,7 +350,7 @@ class SparseAdamOp final : public Operator<Context> {
    auto* moment2Out = Output(OUTPUT_MOMENT_2)->template mutable_data<T>();

    if (OutputSize() == 3) {
-      for (const auto i : c10::irange(n)) {
+      for (auto i = 0; i < n; ++i) {
        auto idx = indices[i];

        if (block_size == 1) {
@ -444,7 +444,7 @@ class SparseAdamOp final : public Operator<Context> {
    } else {
      Output(OUTPUT_GRAD)->ResizeLike(Input(GRAD));
      auto* gradOut = Output(OUTPUT_GRAD)->template mutable_data<T>();
-      for (const auto i : c10::irange(n)) {
+      for (auto i = 0; i < n; ++i) {
        auto idx = indices[i];

        if (block_size == 1) {
@ -593,7 +593,7 @@ class SmartDecaySparseAdamOp final : public Operator<Context> {
    auto* moment2Out = Output(OUTPUT_MOMENT_2)->template mutable_data<T>();
    int64_t* lastSeenOut = Output(OUTPUT_LAST_SEEN)->template mutable_data<int64_t>();

-    for (const auto i : c10::irange(n)) {
+    for (auto i = 0; i < n; ++i) {
        auto idx = indices[i];
        auto offsetI = i * block_size;
        auto offsetIdx = idx * block_size;
@ -673,7 +673,7 @@ class RowWiseSparseAdamOp final : public Operator<Context> {
    auto* moment2Out = Output(OUTPUT_MOMENT_2)->template mutable_data<T>();

    if (OutputSize() == 3) {
-      for (const auto i : c10::irange(n)) {
+      for (auto i = 0; i < n; ++i) {
        auto idx = indices[i];

        if (block_size == 1) {
@ -719,13 +719,13 @@ class RowWiseSparseAdamOp final : public Operator<Context> {
          float* nm2 = moment2Out + idx;

          float m2_sum = 0.;
-          for (const auto j : c10::irange(block_size)) {
+          for (auto j = 0; j < block_size; ++j) {
            float gj = g[j];
            m2_sum += gj * gj;
          }
          float vi = nm2[0] =
              m2[0] * beta2_ + (m2_sum / block_size) * (1 - beta2_);
-          for (const auto j : c10::irange(block_size)) {
+          for (auto j = 0; j < block_size; ++j) {
            float mi = nm1[j] = m1[j] * beta1_ + g[j] * (1 - beta1_);
            nw[j] = w[j] + lr[0] * correction * mi / (std::sqrt(vi) + epsilon_);
          }
@ -734,7 +734,7 @@ class RowWiseSparseAdamOp final : public Operator<Context> {
    } else {
      Output(OUTPUT_GRAD)->ResizeLike(Input(GRAD));
      auto* gradOut = Output(OUTPUT_GRAD)->template mutable_data<T>();
-      for (const auto i : c10::irange(n)) {
+      for (auto i = 0; i < n; ++i) {
        auto idx = indices[i];

        if (block_size == 1) {
@ -781,13 +781,13 @@ class RowWiseSparseAdamOp final : public Operator<Context> {
          float* ng = gradOut + offsetI;

          float m2_sum = 0.;
-          for (const auto j : c10::irange(block_size)) {
+          for (auto j = 0; j < block_size; ++j) {
            float gj = g[j];
            m2_sum += gj * gj;
          }
          float vi = nm2[0] =
              m2[0] * beta2_ + (m2_sum / block_size) * (1 - beta2_);
-          for (const auto j : c10::irange(block_size)) {
+          for (auto j = 0; j < block_size; ++j) {
            float mi = nm1[j] = m1[j] * beta1_ + g[j] * (1 - beta1_);
            float ngi = ng[j] = correction * mi / (std::sqrt(vi) + epsilon_);
            nw[j] = w[j] + lr[0] * ngi;
--- a/caffe2/sgd/learning_rate_adaption_op.h
+++ b/caffe2/sgd/learning_rate_adaption_op.h
@ -21,7 +21,7 @@ void lr_update(
  float x = 0;
  float y = 0, z = 0;
  const float kEps = 1e-12f;
-  for (const auto i : c10::irange(n)) {
+  for (auto i = 0; i < n; i++) {
    x += grad[i] * effgrad[i];
    if (normalized_lr_adaption) {
      y += grad[i] * grad[i];
--- a/caffe2/sgd/learning_rate_op.h
+++ b/caffe2/sgd/learning_rate_op.h
@ -5,7 +5,6 @@
 #include <cmath>
 #include "caffe2/core/context.h"
 #include "caffe2/core/export_caffe2_op_to_c10.h"
-#include <c10/util/irange.h>
 #include "caffe2/core/operator.h"
 #include "caffe2/sgd/learning_rate_functors.h"

@ -163,7 +162,7 @@ class LearningRateOp final : public Operator<Context> {
          sub_policy_num_iters.size(),
          0,
          "Must specify at least one sub learning rate policy.");
-      for (const auto i : c10::irange(sub_policy_num_iters.size())) {
+      for (size_t i = 0; i < sub_policy_num_iters.size(); ++i) {
        CAFFE_ENFORCE_GT(
            sub_policy_num_iters[i],
            0,
--- a/caffe2/sgd/momentum_sgd_op.h
+++ b/caffe2/sgd/momentum_sgd_op.h
@ -17,7 +17,7 @@ void momentum_sgd_update(
    float* param,
    Context* /*context*/) {
  const float LR = lr[0];
-  for (const auto i : c10::irange(N)) {
+  for (auto i = 0; i < N; ++i) {
    if (!nesterov) {
      const float adjusted_gradient = LR * g[i] + momentum * m[i];
      nm[i] = adjusted_gradient;
@ -154,7 +154,7 @@ class SparseMomentumSGDUpdateOp final : public Operator<Context> {
    auto* momentumOut = Output(OUTPUT_MOMENTUM)->template mutable_data<T>();
    auto* paramOut = Output(OUTPUT_PARAM)->template mutable_data<T>();

-    for (const auto i : c10::irange(n)) {
+    for (auto i = 0; i < n; ++i) {
      auto idx = indices[i];
      auto offsetI = i * block_size;
      auto offsetIdx = idx * block_size;
--- a/caffe2/sgd/rowwise_adagrad_fused.h
+++ b/caffe2/sgd/rowwise_adagrad_fused.h
@ -217,8 +217,8 @@ class RowWiseSparseAdagradFusedWithSparseLengthsSumGradientOp final
    auto* grad_buffer_data =
        is_mean ? grad_buffer_.template mutable_data<T>() : NULL;
    if (is_mean) {
-      for (const auto rangeIndex : c10::irange(numSegments)) {
-        for (const auto tmpIndex : c10::irange(block_size)) {
+      for (auto rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
+        for (auto tmpIndex = 0; tmpIndex < block_size; ++tmpIndex) {
          auto offsetI = rangeIndex * block_size;
          grad_buffer_data[offsetI + tmpIndex] = lengths[rangeIndex] > 0
              ? gradIn[offsetI + tmpIndex] / lengths[rangeIndex]
@ -269,7 +269,7 @@ class RowWiseSparseAdagradFusedWithSparseLengthsSumGradientOp final
      T counter_halflife,
      rowWiseAdagradT& kernel) {
    int dataIndex = 0;
-    for (const auto rangeIndex : c10::irange(numSegments)) {
+    for (auto rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
      auto offsetI = rangeIndex * block_size;
      const float* g = gradIn + offsetI;

@ -557,7 +557,7 @@ class RowWiseSparseAdagradFusedWithSparseLengthsWeightedSumGradientOp final
    // ignores this dependency and fuses these two loops.
    std::vector<T> temp_grad(block_size);
    int dataIndex = 0;
-    for (const auto rangeIndex : c10::irange(numSegments)) {
+    for (auto rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
      for (auto start = dataIndex; dataIndex < start + lengths[rangeIndex];
           ++dataIndex) {
        std::size_t idx = indices[dataIndex];
@ -591,7 +591,7 @@ class RowWiseSparseAdagradFusedWithSparseLengthsWeightedSumGradientOp final
    CAFFE_ENFORCE_EQ(dataIndex, n);

    dataIndex = 0;
-    for (const auto rangeIndex : c10::irange(numSegments)) {
+    for (auto rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
      auto offsetI = rangeIndex * block_size;
      const float* g = gradIn + offsetI;

@ -606,7 +606,7 @@ class RowWiseSparseAdagradFusedWithSparseLengthsWeightedSumGradientOp final
        auto offsetIdx = idx * block_size;
        auto localOffset = dataIndex - start;

-        for (const auto i : c10::irange(block_size)) {
+        for (int i = 0; i < block_size; ++i) {
          temp_grad[i] = auxParamIn[localOffset] * g[i];
        }

@ -839,7 +839,7 @@ class RowWiseSparseAdagradFusedWithSparseLengthsWeightedSumGradientApproxOp

    std::vector<T> temp_grad(block_size);
    int dataIndex = 0;
-    for (const auto rangeIndex : c10::irange(numSegments)) {
+    for (auto rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
      auto offsetI = rangeIndex * block_size;
      const float* g = gradIn + offsetI;

@ -902,7 +902,7 @@ class RowWiseSparseAdagradFusedWithSparseLengthsWeightedSumGradientApproxOp

        alignas(64) float temp[VLEN];
        _mm256_store_ps(temp, acc_v);
-        for (const auto j : c10::irange(VLEN)) {
+        for (int j = 0; j < VLEN; ++j) {
          acc += temp[j];
        }
 #endif
--- a/caffe2/sgd/rowwise_counter.h
+++ b/caffe2/sgd/rowwise_counter.h
@ -40,7 +40,7 @@ class RowWiseCounterOp final : public Operator<CPUContext> {
      return true;
    }

-    for (const auto i : c10::irange(n)) {
+    for (auto i = 0; i < n; ++i) {
      const std::size_t idx = indices[i];
      CAFFE_ENFORCE_GE(
          Input(COUNTER).numel(),
--- a/caffe2/sgd/storm_op.h
+++ b/caffe2/sgd/storm_op.h
@ -19,7 +19,7 @@ void storm_update(
    const float beta,
    Context* /*context*/) {
  float gradSqSumTmp = 0.0;
-  for (const auto i : c10::irange(N)) {
+  for (auto i = 0; i < N; ++i) {
    const float gi = gradIn[i];
    gradSqSumTmp += gi * gi;
  }
@ -27,7 +27,7 @@ void storm_update(

  const float nlr = lr[0] * std::pow(beta + gradSqSumOut[0], -1.0 / 3.0);
  const float alpha = momentum * nlr * nlr;
-  for (const auto i : c10::irange(N)) {
+  for (auto i = 0; i < N; ++i) {
    const float gi = gradIn[i];
    const float mi = momentIn[i];
    float new_mi = momentOut[i] = gi + (1.0 - alpha) * (mi - gi);
@ -120,7 +120,7 @@ class SparseStormOp final : public Operator<Context> {
    }

    float gradSqSumTmp = 0.0;
-    for (const auto i : c10::irange(Input(GRAD).numel())) {
+    for (auto i = 0; i < Input(GRAD).numel(); ++i) {
      const float gi = gradIn[i];
      gradSqSumTmp += gi * gi;
    }
@ -130,7 +130,7 @@ class SparseStormOp final : public Operator<Context> {
    const float alpha = momentum_ * nlr * nlr;
    const auto block_size = Input(GRAD).numel() / n;

-    for (const auto i : c10::irange(n)) {
+    for (auto i = 0; i < n; ++i) {
      auto idx = indices[i];
      if (block_size == 1) {
        const float gi = gradIn[i];
@ -162,7 +162,7 @@ class SparseStormOp final : public Operator<Context> {
            i);
 #endif

-        for (const auto j : c10::irange(block_size)) {
+        for (auto j = 0; j < block_size; ++j) {
          const float gi = gradIn[offsetI + j];
          const float mi = momentIn[offsetIdx + j];
          float new_mi = momentOut[offsetIdx + j] =
--- a/caffe2/sgd/wngrad_op.h
+++ b/caffe2/sgd/wngrad_op.h
@ -15,12 +15,12 @@ void wngrad_update(
    float epsilon,
    const float* lr,
    Context* /*context*/) {
-  for (const auto i : c10::irange(N)) {
+  for (auto i = 0; i < N; ++i) {
    float gi = g[i];
    nw[i] = w[i] + lr[0] * gi / (h[0] + epsilon);
  }
  float nhTmp = 0.0;
-  for (const auto i : c10::irange(N)) {
+  for (auto i = 0; i < N; ++i) {
    float gi = g[i];
    nhTmp += gi * gi;
  }
@ -42,13 +42,13 @@ void wngrad_update_output_effective_lr(
    Context* /*context*/) {
  effectiveLROut[0] = lr[0] / (seqBIn[0] + epsilon);
  float seqBTmp = 0.0;
-  for (const auto i : c10::irange(N)) {
+  for (auto i = 0; i < N; ++i) {
    float gi = gradIn[i];
    seqBTmp += gi * gi;
  }
  seqBTmp /= (seqBIn[0] + epsilon);
  seqBOut[0] = seqBIn[0] + seqBTmp;
-  for (const auto i : c10::irange(N)) {
+  for (auto i = 0; i < N; ++i) {
    float grad = gradIn[i];
    paramOut[i] = paramIn[i] + effectiveLROut[0] * grad;
  }
@ -69,14 +69,14 @@ void wngrad_update_output_effective_lr_and_update(
    Context* /*context*/) {
  effectiveLROut[0] = lr[0] / (seqBIn[0] + epsilon);
  float seqBTmp = 0.0;
-  for (const auto i : c10::irange(N)) {
+  for (auto i = 0; i < N; ++i) {
    float gi = gradIn[i];
    seqBTmp += gi * gi;
  }
  seqBTmp /= (seqBIn[0] + epsilon);
  seqBOut[0] = seqBIn[0] + seqBTmp;

-  for (const auto i : c10::irange(N)) {
+  for (auto i = 0; i < N; ++i) {
    float grad = gradIn[i];
    float update = updateOut[i] = effectiveLROut[0] * grad;
    paramOut[i] = paramIn[i] + update;
@ -193,7 +193,7 @@ class SparseWngradOp final : public Operator<Context> {

    auto block_size = Input(GRAD).numel() / n;

-    for (const auto i : c10::irange(n)) {
+    for (auto i = 0; i < n; ++i) {
      auto idx = indices[i];
      if (block_size == 1) {
        float gi = gradIn[i];
@ -222,7 +222,7 @@ class SparseWngradOp final : public Operator<Context> {
            " for input i:",
            i);
 #endif
-        for (const auto j : c10::irange(block_size)) {
+        for (auto j = 0; j < block_size; ++j) {
          float gi = gradIn[offsetI + j];
          paramOut[offsetIdx + j] =
              paramIn[offsetIdx + j] + lr[0] * gi / (seqBIn[0] + epsilon_);
@ -230,7 +230,7 @@ class SparseWngradOp final : public Operator<Context> {
      }
    }
    float seqBTmp = 0.0;
-    for (const auto i : c10::irange(Input(GRAD).numel())) {
+    for (auto i = 0; i < Input(GRAD).numel(); ++i) {
      float gi = gradIn[i];
      seqBTmp += gi * gi;
    }
--- a/caffe2/sgd/yellowfin_op.h
+++ b/caffe2/sgd/yellowfin_op.h
@ -133,7 +133,7 @@ CAFFE_ENFORCE_EQ(param_tensor.dim(), moment_tensor.dim());
 CAFFE_ENFORCE_EQ(param_tensor.dim(), g_avg_tensor.dim());
 CAFFE_ENFORCE_EQ(param_tensor.dim(), g2_avg_tensor.dim());
 CAFFE_ENFORCE_EQ(param_tensor.dim(), grad_tensor.dim());
-for (const auto i : c10::irange(param_tensor.dim())) {
+for (int i = 0; i < param_tensor.dim(); ++i) {
  CAFFE_ENFORCE_EQ(param_tensor.dim32(i), moment_tensor.dim32(i));
  CAFFE_ENFORCE_EQ(param_tensor.dim32(i), g_avg_tensor.dim32(i));
  CAFFE_ENFORCE_EQ(param_tensor.dim32(i), g2_avg_tensor.dim32(i));
--- a/caffe2/transforms/pattern_net_transform.h
+++ b/caffe2/transforms/pattern_net_transform.h
@ -28,7 +28,7 @@ class TORCH_API PatternNetTransform : public Transform {
        "External outputs do not match!");
    ordered_ops_ = GetPatternTraversalOrder(p_);
    inverse_ops_.resize(ordered_ops_.size());
-    for (const auto i : c10::irange(ordered_ops_.size())) {
+    for (size_t i = 0; i < ordered_ops_.size(); i++) {
      inverse_ops_[ordered_ops_[i]] = i;
    }
  }
--- a/caffe2/utils/proto_utils.h
+++ b/caffe2/utils/proto_utils.h
@ -9,7 +9,6 @@

 #include <c10/util/Logging.h>
 #include <c10/util/string_view.h>
-#include <c10/util/irange.h>

 #include "caffe2/utils/proto_wrap.h"
 #include "caffe2/proto/caffe2_pb.h"
--- a/caffe2/utils/threadpool/WorkersPool.h
+++ b/caffe2/utils/threadpool/WorkersPool.h
@ -4,7 +4,6 @@
 #include <condition_variable>
 #include <thread>
 #include "c10/util/thread_name.h"
-#include <c10/util/irange.h>
 #include "caffe2/core/common.h"
 #include "caffe2/core/logging.h"

@ -340,7 +339,7 @@ class WorkersPool {
    CreateWorkers(workers_count);
    DCHECK_LE(workers_count, (int)workers_.size());
    counter_to_decrement_when_ready_.Reset(workers_count);
-    for (const auto task : c10::irange(1, tasks.size())) {
+    for (size_t task = 1; task < tasks.size(); ++task) {
      workers_[task - 1]->StartWork(tasks[task].get());
    }
    // Execute the remaining workload immediately on the current thread.
--- a/caffe2/video/video_input_op.h
+++ b/caffe2/video/video_input_op.h
@ -8,7 +8,6 @@
 #include <string>

 #include <c10/core/thread_pool.h>
-#include <c10/util/irange.h>
 #include <caffe2/core/db.h>
 #include <caffe2/core/logging.h>
 #include <caffe2/operators/prefetch_op.h>
@ -226,7 +225,7 @@ void VideoInputOp<Context>::CheckParamsAndPrint() {
    if (random_sampling_rate_) {
      LOG(INFO) << "random sampling with max:" << random_sampling_rate_;
    }
-    for (const auto i : c10::irange(channels_rgb_)) {
+    for (int i = 0; i < channels_rgb_; i++) {
      LOG(INFO) << "    RGB " << i << "-th channel mean: " << mean_rgb_[i]
                << " std: " << 1.f / inv_std_rgb_[i];
    }
@ -238,7 +237,7 @@ void VideoInputOp<Context>::CheckParamsAndPrint() {
              << "and a sampling rate of 1:" << sampling_rate_of_
              << " flow_data_type_: " << flow_data_type_
              << " flow_alg_type_: " << flow_alg_type_;
-    for (const auto i : c10::irange(channels_of_)) {
+    for (int i = 0; i < channels_of_; i++) {
      LOG(INFO) << "    Optical flow" << i
                << "-th channel mean: " << mean_of_[i]
                << " std: " << 1.f / inv_std_of_[i];
@ -258,7 +257,7 @@ void VideoInputOp<Context>::CheckParamsAndPrint() {
  if (video_res_type_ == VideoResType::USE_SHORT_EDGE) {
    if (jitter_scales_.size() > 0) {
      LOG(INFO) << "Using scale jittering:";
-      for (const auto idx : c10::irange(jitter_scales_.size())) {
+      for (int idx = 0; idx < jitter_scales_.size(); idx++) {
        LOG(INFO) << "scale " << idx << ": " << jitter_scales_[idx];
      }
    } else {
@ -391,7 +390,7 @@ VideoInputOp<Context>::VideoInputOp(
      }

      channels_rgb_ = 3;
-      for (const auto i : c10::irange(4, 7)) {
+      for (int i = 4; i < 7; i++) {
        mean_rgb_.push_back(InputDataMean[i]);
        inv_std_rgb_.push_back(1.f / InputDataStd[i]);
      }
@ -404,7 +403,7 @@ VideoInputOp<Context>::VideoInputOp(
      get_optical_flow_ = false;
      get_rgb_ = true;
      sampling_rate_rgb_ = 1;
-      for (const auto i : c10::irange(4, 7)) {
+      for (int i = 4; i < 7; i++) {
        mean_rgb_.push_back(InputDataMean[i]);
        inv_std_rgb_.push_back(1.f / InputDataStd[i]);
      }
@ -421,7 +420,7 @@ VideoInputOp<Context>::VideoInputOp(
      switch (flow_data_type_) {
        case FlowDataType::Flow2C:
          channels_of_ = 2;
-          for (const auto i : c10::irange(channels_of_)) {
+          for (int i = 0; i < channels_of_; i++) {
            mean_of_.push_back(InputDataMean[i]);
            inv_std_of_.push_back(1.f / InputDataStd[i]);
          }
@ -429,7 +428,7 @@ VideoInputOp<Context>::VideoInputOp(

        case FlowDataType::Flow3C:
          channels_of_ = 3;
-          for (const auto i : c10::irange(channels_of_)) {
+          for (int i = 0; i < channels_of_; i++) {
            mean_of_.push_back(InputDataMean[i]);
            inv_std_of_.push_back(1.f / InputDataStd[i]);
          }
@ -438,7 +437,7 @@ VideoInputOp<Context>::VideoInputOp(
        // early fusion with gray
        case FlowDataType::FlowWithGray:
          channels_of_ = 3;
-          for (const auto i : c10::irange(2)) {
+          for (int i = 0; i < 2; i++) {
            mean_of_.push_back(InputDataMean[i]);
            inv_std_of_.push_back(1.f / InputDataStd[i]);
          }
@ -449,11 +448,11 @@ VideoInputOp<Context>::VideoInputOp(
        // early fusion with RGB
        case FlowDataType::FlowWithRGB:
          channels_of_ = 5;
-          for (const auto i : c10::irange(2)) {
+          for (int i = 0; i < 2; i++) {
            mean_of_.push_back(InputDataMean[i]);
            inv_std_of_.push_back(1.f / InputDataStd[i]);
          }
-          for (const auto i : c10::irange(4, 7)) {
+          for (int i = 4; i < 7; i++) {
            mean_of_.push_back(InputDataMean[i]);
            inv_std_of_.push_back(1.f / InputDataStd[i]);
          }
@ -528,15 +527,15 @@ void VideoInputOp<Context>::GetLabelsFromProto(
    int* label_data) {
  int num_clips = clip_per_video_ * crop_per_clip_;
  if (!do_multi_label_) {
-    for (const auto i : c10::irange(num_clips)) {
+    for (int i = 0; i < num_clips; i++) {
      label_data[i] = label_proto.int32_data(0);
    }
  } else {
    // For multiple label case, output label is a binary vector
    // where presented concepts are marked 1
    memset(label_data, 0, sizeof(int) * num_of_class_ * num_clips);
-    for (const auto i : c10::irange(num_clips)) {
-      for (const auto j : c10::irange(label_proto.int32_data_size())) {
+    for (int i = 0; i < num_clips; i++) {
+      for (int j = 0; j < label_proto.int32_data_size(); j++) {
        CAFFE_ENFORCE_LT(
            label_proto.int32_data(j),
            num_of_class_,
@ -660,7 +659,7 @@ bool VideoInputOp<Context>::GetClipsAndLabelsFromDBValue(
    const TensorProto& start_frm_proto = protos.protos(curr_proto_idx++);
    start_frm = start_frm_proto.int32_data(0);
    if (get_start_frame_) {
-      for (const auto i : c10::irange(num_clips)) {
+      for (int i = 0; i < num_clips; i++) {
        start_frame_data[i] = start_frm;
      }
    }
@ -670,7 +669,7 @@ bool VideoInputOp<Context>::GetClipsAndLabelsFromDBValue(
    CAFFE_ENFORCE_GE(
        protos.protos_size(), curr_proto_idx + 1, "Video Id not provided");
    const TensorProto& video_id_proto = protos.protos(curr_proto_idx);
-    for (const auto i : c10::irange(num_clips)) {
+    for (int i = 0; i < num_clips; i++) {
      video_id_data[i] = video_id_proto.int64_data(0);
    }
  }
@ -775,7 +774,7 @@ void VideoInputOp<Context>::DecodeAndTransform(
    int clip_offset_of = channels_of_ * length_of_ * crop_size_ * crop_size_;
    for (int i = 0; i < std::min(clip_per_video_, int(buffer_rgb.size()));
         i++) {
-      for (const auto j : c10::irange(crop_per_clip_)) {
+      for (int j = 0; j < crop_per_clip_; j++) {
        // get the rectangle for cropping
        int h_off = 0;
        int w_off = 0;
@ -858,7 +857,7 @@ void VideoInputOp<Context>::DecodeAndTransform(
      }
    }
    if (buffer_rgb.size() > 0) {
-      for (const auto i : c10::irange(buffer_rgb.size())) {
+      for (int i = 0; i < buffer_rgb.size(); i++) {
        unsigned char* buff = buffer_rgb[i];
        delete[] buff;
      }
@ -887,12 +886,12 @@ bool VideoInputOp<Context>::Prefetch() {
    // Prefetching handled with a thread pool of "decode_threads" threads.
    std::mt19937 meta_randgen(time(nullptr));
    std::vector<std::mt19937> randgen_per_thread;
-    for (const auto i : c10::irange(num_decode_threads_)) {
+    for (int i = 0; i < num_decode_threads_; ++i) {
      randgen_per_thread.emplace_back(meta_randgen());
    }

    std::bernoulli_distribution mirror_this_clip(0.5);
-    for (const auto item_id : c10::irange(batch_size_)) {
+    for (int item_id = 0; item_id < batch_size_; ++item_id) {
      std::mt19937* randgen =
          &randgen_per_thread[item_id % num_decode_threads_];

--- a/test/cpp/api/dataloader.cpp
+++ b/test/cpp/api/dataloader.cpp
@ -5,7 +5,6 @@
 #include <test/cpp/api/support.h>

 #include <c10/util/ArrayRef.h>
-#include <c10/util/irange.h>
 #include <c10/util/tempfile.h>

 #include <algorithm>
@ -174,7 +173,7 @@ TEST(DataTest, InfiniteStreamDataset) {
  for (auto& batch : *data_loader) {
    ASSERT_LT(batch_index, 3);
    ASSERT_EQ(batch.size(), kBatchSize);
-    for (const auto j : c10::irange(kBatchSize)) {
+    for (size_t j = 0; j < kBatchSize; ++j) {
      ASSERT_EQ(batch.at(j), 1 + (batch_index * kBatchSize) + j);
    }
    batch_index += 1;
@ -838,7 +837,7 @@ TEST(DataTest, CanUseCustomTypeAsIndexType) {

  size_t i = 0;
  for (auto batch : *data_loader) {
-    for (const auto j : c10::irange(kBatchSize)) {
+    for (int j = 0; j < kBatchSize; ++j) {
      ASSERT_EQ(batch.at(j), 10 + j);
    }
    i += 1;
@ -858,7 +857,7 @@ TEST(DataTest, DistributedRandomSamplerSingleReplicaProduceCorrectSamples) {
  ASSERT_EQ(res.size(), sample_count);

  std::sort(res.begin(), res.end());
-  for (const auto i : c10::irange(res.size())) {
+  for (size_t i = 0; i < res.size(); ++i) {
    ASSERT_EQ(res[i], i);
  }
 }
@ -873,14 +872,14 @@ TEST(DataTest, DistributedRandomSamplerMultiReplicaProduceCorrectSamples) {
                           size_t batch_size) {
    std::vector<std::unique_ptr<samplers::DistributedRandomSampler>> samplers;

-    for (const auto i : c10::irange(num_replicas)) {
+    for (size_t i = 0; i < num_replicas; ++i) {
      samplers.emplace_back(
          torch::make_unique<samplers::DistributedRandomSampler>(
              sample_count, num_replicas, i, allow_duplicates));
    }

    std::vector<size_t> res;
-    for (const auto i : c10::irange(num_replicas)) {
+    for (size_t i = 0; i < num_replicas; ++i) {
      (*samplers[i]).reset();
      torch::optional<std::vector<size_t>> idx;
      while ((idx = (*samplers[i]).next(batch_size)).has_value()) {
@ -954,7 +953,7 @@ TEST(DataTest, DistributedSequentialSamplerSingleReplicaProduceCorrectSamples) {
  ASSERT_EQ(res.size(), sample_count);

  std::sort(res.begin(), res.end());
-  for (const auto i : c10::irange(res.size())) {
+  for (size_t i = 0; i < res.size(); ++i) {
    ASSERT_EQ(res[i], i);
  }
 }
@ -970,14 +969,14 @@ TEST(DataTest, DistributedSequentialSamplerMultiReplicaProduceCorrectSamples) {
    std::vector<std::unique_ptr<samplers::DistributedSequentialSampler>>
        samplers;

-    for (const auto i : c10::irange(num_replicas)) {
+    for (size_t i = 0; i < num_replicas; ++i) {
      samplers.emplace_back(
          torch::make_unique<samplers::DistributedSequentialSampler>(
              sample_count, num_replicas, i, allow_duplicates));
    }

    std::vector<size_t> res;
-    for (const auto i : c10::irange(num_replicas)) {
+    for (size_t i = 0; i < num_replicas; ++i) {
      (*samplers[i]).reset();
      torch::optional<std::vector<size_t>> idx;
      while ((idx = (*samplers[i]).next(batch_size)).has_value()) {
@ -1491,7 +1490,7 @@ TEST(DataLoaderTest, StatefulDatasetWithNoWorkers) {

  auto data_loader = torch::data::make_data_loader(D{});

-  for (const auto i : c10::irange(10)) {
+  for (size_t i = 0; i < 10; ++i) {
    const auto number_of_iterations =
        std::distance(data_loader->begin(), data_loader->end());
    ASSERT_EQ(
@ -1532,7 +1531,7 @@ TEST(DataLoaderTest, StatefulDatasetWithManyWorkers) {
      torch::data::datasets::make_shared_dataset<D>(),
      DataLoaderOptions().workers(kNumberOfWorkers));

-  for (const auto i : c10::irange(10)) {
+  for (size_t i = 0; i < 10; ++i) {
    const auto number_of_iterations =
        std::distance(data_loader->begin(), data_loader->end());
    ASSERT_EQ(
@ -1575,7 +1574,7 @@ TEST(DataLoaderTest, StatefulDatasetWithMap) {
              })),
      DataLoaderOptions{});

-  for (const auto i : c10::irange(10)) {
+  for (size_t i = 0; i < 10; ++i) {
    const auto number_of_iterations =
        std::distance(data_loader->begin(), data_loader->end());
    ASSERT_EQ(
@ -1676,8 +1675,7 @@ TEST(DataLoaderTest, ChunkDataSetGetBatch) {
            dataset,
            DataLoaderOptions(batch_size).workers(dataloader_worker_count));

-        for (const auto epoch_index : c10::irange(epoch_count)) {
-          (void)epoch_index; // Suppress unused variable warning
+        for (int epoch_index = 0; epoch_index < epoch_count; ++epoch_index) {
          std::vector<bool> result(total_example_count, false);
          int iteration_count = 0;
          for (auto iterator = data_loader->begin();
@ -1689,11 +1687,11 @@ TEST(DataLoaderTest, ChunkDataSetGetBatch) {
            // When prefetch_count is equal to 1 and no worker thread, the batch
            // order is deterministic. So we can verify elements in each batch.
            if (prefetch_count == 1 && dataloader_worker_count == 0) {
-              for (const auto j : c10::irange(batch_size)) {
+              for (size_t j = 0; j < batch_size; ++j) {
                ASSERT_EQ(batch[j], iteration_count * batch_size + j);
              }
            }
-            for (const auto j : c10::irange(batch_size)) {
+            for (size_t j = 0; j < batch_size; ++j) {
              result[batch[j]] = true;
            }
          }
@ -1980,8 +1978,7 @@ TEST(DataLoaderTest, ChunkDatasetSave) {
        dataset,
        DataLoaderOptions(batch_size).workers(dataloader_worker_count));

-    for (const auto epoch_index : c10::irange(epoch_count)) {
-      (void)epoch_index; // Suppress unused variable warning
+    for (int epoch_index = 0; epoch_index < epoch_count; ++epoch_index) {
      int iteration_count = 0;
      for (auto iterator = data_loader->begin(); iterator != data_loader->end();
           ++iterator, ++iteration_count) {
@ -2082,7 +2079,7 @@ TEST(DataLoaderTest, ChunkDatasetLoad) {
  auto data_loader = torch::data::make_data_loader(
      dataset, DataLoaderOptions(batch_size).workers(dataloader_worker_count));

-  for (const auto epoch_index : c10::irange(epoch_count)) {
+  for (int epoch_index = 0; epoch_index < epoch_count; ++epoch_index) {
    int iteration_count = 0;

    // For the first epoch, the returned batch should be returned from the
@ -2131,7 +2128,7 @@ TEST(DataLoaderTest, ChunkDatasetCrossChunkShuffle) {
      size_t index = 0;

      // Repeatly sample every 5 indices.
-      for (const auto i : c10::irange(batch_size)) {
+      for (size_t i = 0; i < batch_size; ++i) {
        for (size_t j = 0; j < size_ / batch_size; ++j) {
          indices_[index++] = i + batch_size * j;
        }
@ -2225,11 +2222,11 @@ TEST(DataLoaderTest, ChunkDatasetCrossChunkShuffle) {
        // construct expected result
        int offset = 0;

-        for (const auto i : c10::irange((chunk_count + cross_chunk_shuffle_count - 1) /
-                 cross_chunk_shuffle_count)) {
-          for (const auto j : c10::irange(chunk_size)) {
-            (void)j; // Suppress unused variable warning
-            for (const auto k : c10::irange(cross_chunk_shuffle_count)) {
+        for (int i = 0; i < (chunk_count + cross_chunk_shuffle_count - 1) /
+                 cross_chunk_shuffle_count;
+             i++) {
+          for (int j = 0; j < chunk_size; ++j) {
+            for (int k = 0; k < cross_chunk_shuffle_count; ++k) {
              if (i * cross_chunk_shuffle_count + k < chunk_count) {
                expected_result.push_back(i * cross_chunk_shuffle_count + k);
              }
--- a/test/cpp/api/dispatch.cpp
+++ b/test/cpp/api/dispatch.cpp
@ -2,7 +2,6 @@

 #include <torch/torch.h>
 #include <ATen/native/Pow.h>
-#include <c10/util/irange.h>
 #include <torch/types.h>
 #include <torch/utils.h>
 #include <test/cpp/api/support.h>
@ -25,7 +24,7 @@ TEST_F(DispatchTest, TestAVX2) {
  setenv("ATEN_CPU_CAPABILITY", "avx2", 1);
 #endif
  const auto actual_pow_avx2 = vals_tensor.pow(pows_tensor);
-  for (const auto i : c10::irange(4)) {
+  for (int i = 0; i < 4; i++) {
    ASSERT_EQ(result[i], actual_pow_avx2[i].item<int>());
  }
 }
@ -41,7 +40,7 @@ TEST_F(DispatchTest, TestAVX512) {
  setenv("ATEN_CPU_CAPABILITY", "avx512", 1);
 #endif
  const auto actual_pow_avx512 = vals_tensor.pow(pows_tensor);
-  for (const auto i : c10::irange(4)) {
+  for (int i = 0; i < 4; i++) {
    ASSERT_EQ(result[i], actual_pow_avx512[i].item<int>());
  }
 }
@ -57,7 +56,7 @@ TEST_F(DispatchTest, TestDefault) {
  setenv("ATEN_CPU_CAPABILITY", "default", 1);
 #endif
  const auto actual_pow_default = vals_tensor.pow(pows_tensor);
-  for (const auto i : c10::irange(4)) {
+  for (int i = 0; i < 4; i++) {
    ASSERT_EQ(result[i], actual_pow_default[i].item<int>());
  }
 }
--- a/test/cpp/api/expanding-array.cpp
+++ b/test/cpp/api/expanding-array.cpp
@ -1,6 +1,5 @@
 #include <gtest/gtest.h>

-#include <c10/util/irange.h>
 #include <torch/torch.h>

 #include <test/cpp/api/support.h>
@ -14,7 +13,7 @@ struct ExpandingArrayTest : torch::test::SeedingFixture {};
 TEST_F(ExpandingArrayTest, CanConstructFromInitializerList) {
  torch::ExpandingArray<5> e({1, 2, 3, 4, 5});
  ASSERT_EQ(e.size(), 5);
-  for (const auto i : c10::irange(e.size())) {
+  for (size_t i = 0; i < e.size(); ++i) {
    ASSERT_EQ((*e)[i], i + 1);
  }
 }
@ -22,7 +21,7 @@ TEST_F(ExpandingArrayTest, CanConstructFromInitializerList) {
 TEST_F(ExpandingArrayTest, CanConstructFromVector) {
  torch::ExpandingArray<5> e(std::vector<int64_t>{1, 2, 3, 4, 5});
  ASSERT_EQ(e.size(), 5);
-  for (const auto i : c10::irange(e.size())) {
+  for (size_t i = 0; i < e.size(); ++i) {
    ASSERT_EQ((*e)[i], i + 1);
  }
 }
@ -30,7 +29,7 @@ TEST_F(ExpandingArrayTest, CanConstructFromVector) {
 TEST_F(ExpandingArrayTest, CanConstructFromArray) {
  torch::ExpandingArray<5> e(std::array<int64_t, 5>({1, 2, 3, 4, 5}));
  ASSERT_EQ(e.size(), 5);
-  for (const auto i : c10::irange(e.size())) {
+  for (size_t i = 0; i < e.size(); ++i) {
    ASSERT_EQ((*e)[i], i + 1);
  }
 }
@ -38,7 +37,7 @@ TEST_F(ExpandingArrayTest, CanConstructFromArray) {
 TEST_F(ExpandingArrayTest, CanConstructFromSingleValue) {
  torch::ExpandingArray<5> e(5);
  ASSERT_EQ(e.size(), 5);
-  for (const auto i : c10::irange(e.size())) {
+  for (size_t i = 0; i < e.size(); ++i) {
    ASSERT_EQ((*e)[i], 5);
  }
 }
--- a/test/cpp/api/fft.cpp
+++ b/test/cpp/api/fft.cpp
@ -1,6 +1,5 @@
 #include <gtest/gtest.h>

-#include <c10/util/irange.h>
 #include <torch/torch.h>
 #include <test/cpp/api/support.h>

@ -15,15 +14,15 @@ torch::Tensor naive_dft(torch::Tensor x, bool forward=true) {
  // Roots of unity, exp(-2*pi*j*n/N) for n in [0, N), reversed for inverse transform
  std::vector<c10::complex<double>> roots(len);
  const auto angle_base = (forward ? -2.0 : 2.0) * M_PI / len;
-  for (const auto i : c10::irange(len)) {
+  for (int64_t i = 0; i < len; ++i) {
    auto angle = i * angle_base;
    roots[i] = c10::complex<double>(std::cos(angle), std::sin(angle));
  }

  const auto in = x.data_ptr<c10::complex<double>>();
  const auto out = out_tensor.data_ptr<c10::complex<double>>();
-  for (const auto i : c10::irange(len)) {
-    for (const auto j : c10::irange(len)) {
+  for (int64_t i = 0; i < len; ++i) {
+    for (int64_t j = 0; j < len; ++j) {
      out[i] += roots[(j * i) % len] * in[j];
    }
  }
--- a/test/cpp/api/functional.cpp
+++ b/test/cpp/api/functional.cpp
@ -1,6 +1,5 @@
 #include <gtest/gtest.h>

-#include <c10/util/irange.h>
 #include <torch/torch.h>

 #include <test/cpp/api/support.h>
@ -1128,7 +1127,7 @@ TEST_F(FunctionalTest, GumbelSoftmax) {
  int dims[] = {1, -1};
  // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays,cppcoreguidelines-avoid-magic-numbers)
  int expected[] = {5*3, 5*4};
-  for (const auto i : c10::irange(2)) {
+  for(auto i=0; i<2; i++) {
    auto logits = torch::randn({5, 4, 3});
    int expected_count = expected[i];
    auto y_draw = F::gumbel_softmax(logits, F::GumbelSoftmaxFuncOptions().hard(true).dim(dims[i]));
@ -1150,8 +1149,7 @@ TEST_F(FunctionalTest, GumbelSoftmax) {

    auto counts = torch::zeros_like(logits);
    torch::Tensor y_draw;
-    for (const auto i : c10::irange(num_draws)) {
-        (void)i; // Suppress unused variable warning
+    for (auto i=0; i<num_draws; i++) {
        y_draw = F::gumbel_softmax(logits, F::GumbelSoftmaxFuncOptions().hard(true));
        counts += y_draw;
    }
@ -1177,7 +1175,7 @@ TEST_F(FunctionalTest, Softmax) {
  auto output = F::softmax(input, /*dim=*/1);
  auto sum = torch::sum(torch::exp(input), 1);

-  for (const auto i : c10::irange(2)) {
+  for (int i = 0; i < 2; i++) {
    auto expected = torch::exp(input[i]) / sum[i];
    ASSERT_TRUE(torch::allclose(output[i], expected));
  }
@ -1189,7 +1187,7 @@ TEST_F(FunctionalTest, Softmin) {
  auto output = F::softmin(input, /*dim=*/1);
  auto sum = torch::sum(torch::exp(-input), 1);

-  for (const auto i : c10::irange(2)) {
+  for (int i = 0; i < 2; i++) {
    auto expected = torch::exp(-input[i]) / sum[i];
    ASSERT_TRUE(torch::allclose(output[i], expected));
  }
@ -1201,7 +1199,7 @@ TEST_F(FunctionalTest, LogSoftmax) {
  auto output = F::log_softmax(input, /*dim=*/1);
  auto sum = torch::sum(torch::exp(input), 1);

-  for (const auto i : c10::irange(2)) {
+  for (int i = 0; i < 2; i++) {
    auto expected = torch::log(torch::exp(input[i]) / sum[i]);
    ASSERT_TRUE(torch::allclose(output[i], expected));
  }
--- a/test/cpp/api/init.cpp
+++ b/test/cpp/api/init.cpp
@ -1,6 +1,5 @@
 #include <gtest/gtest.h>

-#include <c10/util/irange.h>
 #include <torch/torch.h>

 #include <test/cpp/api/init_baseline.h>
@ -15,7 +14,7 @@ void check_exact_values(
    const std::vector<std::vector<torch::Tensor>>& expected_parameters) {
  ASSERT_EQ(parameters.size(), expected_parameters.size());

-  for (const auto i : c10::irange(parameters.size())) {
+  for (size_t i = 0; i < parameters.size(); i++) {
    auto layerParameters = parameters[i];
    auto expectedLayerParameters = expected_parameters[i];

@ -28,7 +27,7 @@ void check_exact_values(
      ASSERT_TRUE(false);
    }

-    for (const auto p : c10::irange(layerParameters.size(0))) {
+    for (size_t p = 0; p < layerParameters.size(0); p++) {
      // Always compare using double dtype, regardless of the original dtype of the tensors
      auto tensor = layerParameters[p].to(torch::kFloat64);
      auto expectedTensor = expectedLayerParameters[p].to(torch::kFloat64);
--- a/test/cpp/api/integration.cpp
+++ b/test/cpp/api/integration.cpp
@ -1,6 +1,5 @@
 #include <gtest/gtest.h>

-#include <c10/util/irange.h>
 #include <torch/torch.h>

 #include <test/cpp/api/support.h>
@ -123,12 +122,10 @@ bool test_mnist(
  torch::Device device(with_cuda ? torch::kCUDA : torch::kCPU);
  model->to(device);

-  for (const auto epoch : c10::irange(number_of_epochs)) {
-    (void)epoch; // Suppress unused variable warning
+  for (size_t epoch = 0; epoch < number_of_epochs; epoch++) {
    // NOLINTNEXTLINE(performance-for-range-copy)
    for (torch::data::Example<> batch : *data_loader) {
-      auto data = batch.data.to(device);
-      auto targets = batch.target.to(device);
+      auto data = batch.data.to(device), targets = batch.target.to(device);
      torch::Tensor prediction = forward_op(std::move(data));
      // NOLINTNEXTLINE(performance-move-const-arg)
      torch::Tensor loss = torch::nll_loss(prediction, std::move(targets));
@ -199,7 +196,7 @@ TEST_F(IntegrationTest, CartPole) {

    std::vector<torch::Tensor> policy_loss;
    std::vector<torch::Tensor> value_loss;
-    for (const auto i : c10::irange(0U, saved_log_probs.size())) {
+    for (auto i = 0U; i < saved_log_probs.size(); i++) {
      auto advantage = r_t[i] - saved_values[i].item<float>();
      policy_loss.push_back(-advantage * saved_log_probs[i]);
      value_loss.push_back(
--- a/test/cpp/api/module.cpp
+++ b/test/cpp/api/module.cpp
@ -1,6 +1,5 @@
 #include <gtest/gtest.h>

-#include <c10/util/irange.h>
 #include <torch/torch.h>

 #include <test/cpp/api/support.h>
@ -705,7 +704,7 @@ TEST_F(ModuleTest, ModulesReturnsExpectedSubmodulesForFlatModel) {
  std::vector<std::shared_ptr<torch::nn::Module>> expected = {
      model.ptr(), model[0], model[1], model[2]};
  ASSERT_EQ(modules.size(), expected.size());
-  for (const auto i : c10::irange(expected.size())) {
+  for (size_t i = 0; i < expected.size(); ++i) {
    // Assert pointer equality.
    ASSERT_EQ(modules[i].get(), expected[i].get());
  }
@ -718,7 +717,7 @@ TEST_F(ModuleTest, ModulesExcludesSelfWhenIncludeSelfSetToFalse) {
  std::vector<std::shared_ptr<torch::nn::Module>> expected = {
      model[0], model[1], model[2]};
  ASSERT_EQ(modules.size(), expected.size());
-  for (const auto i : c10::irange(expected.size())) {
+  for (size_t i = 0; i < expected.size(); ++i) {
    // Assert pointer equality.
    ASSERT_EQ(modules[i].get(), expected[i].get());
  }
@ -731,7 +730,7 @@ TEST_F(ModuleTest, NamedModulesReturnsExpectedNamedSubmodulesForFlatModel) {
  std::vector<std::shared_ptr<torch::nn::Module>> expected = {
      model.ptr(), model[0], model[1], model[2]};
  ASSERT_EQ(modules.size(), expected.size());
-  for (const auto i : c10::irange(expected.size())) {
+  for (size_t i = 0; i < expected.size(); ++i) {
    // Assert pointer equality.
    ASSERT_EQ(modules[i].key(), i ? std::to_string(i - 1) : std::string());
    ASSERT_EQ(modules[i].value().get(), expected[i].get());
@ -746,7 +745,7 @@ TEST_F(ModuleTest, NamedModulesExcludesSelfWhenIncludeSelfSetToFalse) {
  std::vector<std::shared_ptr<torch::nn::Module>> expected = {
      model[0], model[1], model[2]};
  ASSERT_EQ(modules.size(), expected.size());
-  for (const auto i : c10::irange(expected.size())) {
+  for (size_t i = 0; i < expected.size(); ++i) {
    // Assert pointer equality.
    ASSERT_EQ(modules[i].key(), std::to_string(i));
    ASSERT_EQ(modules[i].value().get(), expected[i].get());
@ -759,7 +758,7 @@ TEST_F(ModuleTest, ChildrenReturnsExpectedSubmodulesForFlatModel) {
  std::vector<std::shared_ptr<torch::nn::Module>> expected = {
      model[0], model[1], model[2]};
  ASSERT_EQ(modules.size(), expected.size());
-  for (const auto i : c10::irange(expected.size())) {
+  for (size_t i = 0; i < expected.size(); ++i) {
    // Assert pointer equality.
    ASSERT_EQ(modules[i].get(), expected[i].get());
  }
@ -775,7 +774,7 @@ TEST_F(ModuleTest, NamedChildrenReturnsExpectedNamedSubmodulesForFlatModel) {
  std::vector<std::shared_ptr<torch::nn::Module>> expected = {
      model[0], model[1], model[2]};
  ASSERT_EQ(modules.size(), expected.size());
-  for (const auto i : c10::irange(expected.size())) {
+  for (size_t i = 0; i < expected.size(); ++i) {
    // Assert pointer equality.
    ASSERT_EQ(modules[i].key(), std::to_string(i));
    ASSERT_EQ(modules[i].value().get(), expected[i].get());
@ -823,7 +822,7 @@ TEST_F(ModuleTest, NamedBuffersReturnsExpectedTensorsForFlatModel) {
 struct TestContainer : torch::nn::Module {
  TestContainer(int64_t number, std::vector<TestContainer> modules = {})
      : tensor(torch::tensor(number)) {
-    for (const auto i : c10::irange(modules.size())) {
+    for (size_t i = 0; i < modules.size(); ++i) {
      register_module(
          std::to_string(i),
          std::make_shared<TestContainer>(std::move(modules[i])));
@ -867,7 +866,7 @@ TEST_F(ModuleTest, ModulesReturnsExpectedSubmodulesForDeepModel) {
  std::vector<std::shared_ptr<torch::nn::Module>> modules = model->modules();

  ASSERT_EQ(modules.size(), 10);
-  for (const auto i : c10::irange(modules.size())) {
+  for (size_t i = 0; i < modules.size(); ++i) {
    ASSERT_EQ(get_test_container_item(modules[i]), i);
  }
 }
@ -880,7 +879,7 @@ TEST_F(ModuleTest, NamedModulesReturnsExpectedNamedSubmodulesForDeepModel) {

  ASSERT_EQ(modules.size(), expected.size());

-  for (const auto i : c10::irange(expected.size())) {
+  for (size_t i = 0; i < expected.size(); ++i) {
    ASSERT_EQ(modules[i].key(), expected[i].first);
    ASSERT_EQ(get_test_container_item(modules[i].value()), expected[i].second);
  }
--- a/test/cpp/api/modulelist.cpp
+++ b/test/cpp/api/modulelist.cpp
@ -1,6 +1,5 @@
 #include <gtest/gtest.h>

-#include <c10/util/irange.h>
 #include <torch/torch.h>

 #include <algorithm>
@ -119,7 +118,7 @@ TEST_F(ModuleListTest, AccessWithAt) {
  ASSERT_EQ(list->size(), 3);

  // returns the correct module for a given index
-  for (const auto i : c10::irange(modules.size())) {
+  for (size_t i = 0; i < modules.size(); ++i) {
    ASSERT_EQ(&list->at<M>(i), modules[i].get());
  }

@ -144,7 +143,7 @@ TEST_F(ModuleListTest, AccessWithPtr) {
  ASSERT_EQ(list->size(), 3);

  // returns the correct module for a given index
-  for (const auto i : c10::irange(modules.size())) {
+  for (size_t i = 0; i < modules.size(); ++i) {
    ASSERT_EQ(list->ptr(i).get(), modules[i].get());
    ASSERT_EQ(list[i].get(), modules[i].get());
    ASSERT_EQ(list->ptr<M>(i).get(), modules[i].get());
--- a/test/cpp/api/modules.cpp
+++ b/test/cpp/api/modules.cpp
@ -1,6 +1,5 @@
 #include <gtest/gtest.h>

-#include <c10/util/irange.h>
 #include <torch/torch.h>

 #include <test/cpp/api/support.h>
@ -1149,7 +1148,7 @@ TEST_F(ModulesTest, LayerNorm) {
  s.backward();
  ASSERT_EQ(y.ndimension(), 2);
  ASSERT_EQ(s.ndimension(), 0);
-  for (const auto i : c10::irange(2)) {
+  for (auto i = 0; i < 2; i++) {
    ASSERT_EQ(y.size(i), 2);
  }

@ -1167,7 +1166,7 @@ TEST_F(ModulesTest, GroupNorm) {
  s.backward();
  ASSERT_EQ(y.ndimension(), 2);
  ASSERT_EQ(s.ndimension(), 0);
-  for (const auto i : c10::irange(2)) {
+  for (auto i = 0; i < 2; i++) {
    ASSERT_EQ(y.size(i), 2);
  }

@ -2596,7 +2595,7 @@ TEST_F(ModulesTest, Softmax) {
  auto output = m(input);
  auto sum = torch::sum(torch::exp(input), 1);

-  for (const auto i : c10::irange(2)) {
+  for (int i = 0; i < 2; i++) {
    auto expected = torch::exp(input[i]) / sum[i];
    ASSERT_TRUE(torch::allclose(output[i], expected));
  }
@ -2608,7 +2607,7 @@ TEST_F(ModulesTest, Softmin) {
  auto output = m(input);
  auto sum = torch::sum(torch::exp(-input), 1);

-  for (const auto i : c10::irange(2)) {
+  for (int i = 0; i < 2; i++) {
    auto expected = torch::exp(-input[i]) / sum[i];
    ASSERT_TRUE(torch::allclose(output[i], expected));
  }
@ -2620,7 +2619,7 @@ TEST_F(ModulesTest, LogSoftmax) {
  auto output = m(input);
  auto sum = torch::sum(torch::exp(input), 1);

-  for (const auto i : c10::irange(2)) {
+  for (int i = 0; i < 2; i++) {
    auto expected = torch::log(torch::exp(input[i]) / sum[i]);
    ASSERT_TRUE(torch::allclose(output[i], expected));
  }
@ -2657,7 +2656,7 @@ TEST_F(ModulesTest, AdaptiveLogSoftmaxWithLoss) {
    auto logprob_out = asfm->log_prob(x);
    NLLLoss nll_loss;

-    for (const auto v : c10::irange(4)) {
+    for (int64_t v = 0; v < 4; ++v) {
      auto y = torch::full({4}, v, torch::kLong);
      auto asm_out = asfm(x, y);
      auto out = asm_out.output;
@ -2676,10 +2675,10 @@ TEST_F(ModulesTest, Softmax2d) {
  auto output = m(input);
  auto sum = torch::sum(torch::exp(input), 1);

-  for (const auto i : c10::irange(1)) {
-    for (const auto j : c10::irange(2)) {
-      for (const auto k : c10::irange(3)) {
-        for (const auto l : c10::irange(4)) {
+  for (int i = 0; i < 1; i++) {
+    for (int j = 0; j < 2; j++) {
+      for (int k = 0; k < 3; k++) {
+        for (int l = 0; l < 4; l++) {
          auto expected = torch::exp(input[i][j][k][l]) / sum[i][k][l];
          ASSERT_TRUE(torch::allclose(output[i][j][k][l], expected));
        }
@ -3390,8 +3389,8 @@ namespace detail {
    TORCH_INTERNAL_ASSERT(a.size(0) == b.size(0));
    TORCH_INTERNAL_ASSERT(a.size(1) == b.size(1));
    auto retval = torch::zeros({a.size(0), a.size(1), a.size(2), b.size(3)}, torch::kFloat32);
-    for (const auto i : c10::irange(a.size(0))) {
-      for (const auto j : c10::irange(a.size(1))) {
+    for (int i = 0; i < a.size(0); i++) {
+      for (int j = 0; j < a.size(1); j++) {
        retval[i][j] = torch::matmul(a[i][j], b[i][j]);
      }
    }
@ -3400,9 +3399,9 @@ namespace detail {

  torch::Tensor _softmax(const torch::Tensor& x) {
    auto output = torch::zeros(x.sizes());
-    for (const auto i : c10::irange(x.size(0))) {
-      for (const auto j : c10::irange(x.size(1))) {
-        for (const auto k : c10::irange(x.size(2))) {
+    for (int i = 0; i < x.size(0); i++) {
+      for (int j = 0; j < x.size(1); j++) {
+        for (int k = 0; k < x.size(2); k++) {
          const auto& x_curr = x[i][j][k];
          const auto e_x = torch::exp(x_curr - torch::max(x_curr));
          output[i][j][k] = e_x / torch::sum(e_x);
@ -3425,10 +3424,10 @@ namespace detail {
    const auto s1 = QKT.size(2);
    const auto s2 = QKT.size(3);
    if (unseen_mask.defined() || key_padding_mask.defined()) {
-      for (const auto i : c10::irange(b1)) {
-        for (const auto j : c10::irange(b2)) {
-          for (const auto m : c10::irange(s1)) {
-            for (const auto n : c10::irange(s2)) {
+      for (int i = 0; i < b1; i++) {
+        for (int j = 0; j < b2; j++) {
+          for (int m = 0; m < s1; m++) {
+            for (int n = 0; n < s2; n++) {
              if (unseen_mask.defined() && unseen_mask[m][n].item<double>() == 0) {
                QKT[i][j][m][n] = -std::numeric_limits<double>::infinity();
              }
@ -3476,8 +3475,7 @@ namespace detail {
    std::uniform_int_distribution<int> d_2_10(2, 10);
    std::uniform_int_distribution<int> d_3_10(3, 10);
    bool registration_checked = false;
-    for (const auto i : c10::irange(100)) {
-      (void)i; // Suppress unused variable warning
+    for (int i = 0; i < 100; i++) {
      const auto batch_sz = d_2_10(generator);
      const auto seq_len = d_2_10(generator);
      const auto d_head = d_3_10(generator);
--- a/test/cpp/api/nn_utils.cpp
+++ b/test/cpp/api/nn_utils.cpp
@ -1,6 +1,5 @@
 #include <gtest/gtest.h>

-#include <c10/util/irange.h>
 #include <torch/torch.h>

 #include <test/cpp/api/support.h>
@ -41,7 +40,7 @@ TEST_F(NNUtilsTest, ClipGradNorm) {
  auto compare_scaling =
      [&](const std::vector<torch::Tensor>& grads) -> torch::Tensor {
    std::vector<torch::Tensor> p_scale;
-    for (const auto i : c10::irange(grads.size())) {
+    for (int i = 0; i < grads.size(); i++) {
      auto param = l->parameters()[i];
      auto grad = grads[i];
      p_scale.push_back(param.grad().data().div(grad).view(-1));
@ -62,7 +61,7 @@ TEST_F(NNUtilsTest, ClipGradNorm) {
      std::numeric_limits<float>::infinity(),
  };
  for (auto norm_type : norm_types) {
-    for (const auto i : c10::irange(grads.size())) {
+    for (int i = 0; i < grads.size(); i++) {
      l->parameters()[i].mutable_grad() =
          grads[i].clone().view_as(l->parameters()[i].data());
    }
@ -81,7 +80,7 @@ TEST_F(NNUtilsTest, ClipGradNorm) {
      torch::ones(10).div(500),
  };
  for (auto norm_type : norm_types) {
-    for (const auto i : c10::irange(grads.size())) {
+    for (int i = 0; i < grads.size(); i++) {
      l->parameters()[i].grad().data().copy_(grads[i]);
    }
    auto norm_before = compute_norm(norm_type);
@ -228,7 +227,7 @@ TEST_F(NNUtilsTest, ClipGradNormErrorIfNonfinite) {
      // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto)
      EXPECT_THROW(utils::clip_grad_norm_(parameters, 1., norm_type, true), std::exception) << msg;
      // Grads should not change if error is thrown
-      for (const auto p_idx : c10::irange(parameters.size())) {
+      for (int64_t p_idx = 0; p_idx < parameters.size(); p_idx++) {
        ASSERT_TRUE(torch::allclose(parameters[p_idx].grad(), grads_before[p_idx], 1.0, 0.0, /*equal_nan*/ true)) << msg;
      }
    } else {
@ -286,7 +285,7 @@ TEST_F(NNUtilsTest, ClipGradValue) {
  std::vector<std::vector<torch::Tensor>> grad_lists = {
      {grad_w, grad_b}, {grad_w, torch::Tensor()}};
  for (auto grad_list : grad_lists) {
-    for (const auto i : c10::irange(grad_list.size())) {
+    for (int i = 0; i < grad_list.size(); i++) {
      auto p = l->parameters()[i];
      auto g = grad_list[i];
      p.mutable_grad() = g.defined() ? g.clone().view_as(p.data()) : g;
@ -336,7 +335,7 @@ TEST_F(NNUtilsTest, ConvertParameters) {
  };

  utils::vector_to_parameters(vector, zero_parameters);
-  for (const auto i : c10::irange(zero_parameters.size())) {
+  for (int i = 0; i < zero_parameters.size(); ++i) {
    ASSERT_TRUE(zero_parameters[i].allclose(parameters[i]));
  }

@ -369,8 +368,7 @@ int64_t PackedSequenceTest_max_length = 6;
 std::vector<torch::Tensor> PackedSequenceTest_ordered_sequence(torch::ScalarType tensor_type) {
  std::vector<torch::Tensor> seqs;
  seqs.reserve(PackedSequenceTest_batch_size);
-  for (const auto i : c10::irange(PackedSequenceTest_batch_size)) {
-    (void)i; // Suppress unused variable warning
+  for (int64_t i = 0; i < PackedSequenceTest_batch_size; i++) {
    seqs.emplace_back(torch::empty({
      torch::randint(1, PackedSequenceTest_max_length, {1}).item<int64_t>()
    }, tensor_type));
@ -392,7 +390,7 @@ std::tuple<torch::Tensor, torch::Tensor> PackedSequenceTest_padded_sequence(torc
  // Create Tensor of random padded sequences
  auto ordered = PackedSequenceTest_ordered_sequence(tensor_type);
  auto lengths = torch::empty({(int64_t)ordered.size()}, torch::kInt64);
-  for (const auto i : c10::irange(ordered.size())) {
+  for (int64_t i = 0; i < ordered.size(); i++) {
    lengths[i] = ordered[i].size(0);
  }
  auto padded_tensor = rnn_utils::pad_sequence(ordered);
@ -621,9 +619,9 @@ TEST_F(NNUtilsTest, PackPaddedSequence) {
    }
    auto padded = torch::cat(tensors_to_be_cat, 1);
    std::vector<torch::Tensor> expected_data_vec;
-    for (const auto n : c10::irange(batch_sizes.size(0))) {
+    for (int64_t n = 0; n < batch_sizes.size(0); n++) {
      int64_t batch_size = batch_sizes[n].item<int64_t>();
-      for (const auto i : c10::irange(batch_size)) {
+      for (int64_t i = 0; i < batch_size; i++) {
        expected_data_vec.emplace_back(torch::arange(1., 6) + (i + 1) * 100 + 5 * n);
      }
    }
@ -633,7 +631,7 @@ TEST_F(NNUtilsTest, PackPaddedSequence) {
    if (should_shuffle) {
      // Shuffle the padded sequence to create an unsorted sequence
      std::vector<int64_t> permutation;
-      for (const auto i : c10::irange(sorted_lengths.size())) {
+      for (int64_t i = 0; i < sorted_lengths.size(); i++) {
        permutation.emplace_back(i);
      }
      std::shuffle(
@ -704,7 +702,7 @@ TEST_F(NNUtilsTest, PackPaddedSequence) {
      if (batch_first) {
        grad_output.transpose_(0, 1);
      }
-      for (const auto i : c10::irange(lengths.size(0))) {
+      for (int64_t i = 0; i < lengths.size(0); i++) {
        int64_t l = lengths[i].item<int64_t>();
        ASSERT_TRUE(torch::allclose(
          padded.grad().narrow(0, 0, l).select(1, i),