irange for size_t (#55320)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/55320

Test Plan: Sandcastle

Reviewed By: ngimel

Differential Revision: D27572577

fbshipit-source-id: 97710fd2bb1303006b05828a0d1343b0b59ccb03
This commit is contained in:
Richard Barnes
2021-06-03 01:03:11 -07:00
committed by Facebook GitHub Bot
parent f914ab193e
commit 3979cb0656
105 changed files with 437 additions and 331 deletions

View File

@ -181,7 +181,7 @@ std::vector<int64_t> ConvTransposeNdImpl<D, Derived>::_output_padding(
max_sizes.push_back(min_sizes[d] + (*stride)[d] - 1);
}
for (size_t i = 0; i < output_size_.value().size(); i++) {
for(const auto i : c10::irange(output_size_.value().size())) {
int64_t size = output_size_.value()[i];
int64_t min_size = min_sizes[i];
int64_t max_size = max_sizes[i];

View File

@ -127,13 +127,13 @@ void RNNImplBase<Derived>::reset() {
layer_params.emplace_back(w_hr);
param_names.emplace_back("weight_hr_l{layer}{suffix}");
}
for (size_t i = 0; i < param_names.size(); i++) { // NOLINT(modernize-loop-convert)
for(const auto i : c10::irange(param_names.size())) { // NOLINT(modernize-loop-convert)
std::string x = std::regex_replace(param_names[i], std::regex("\\{layer\\}"), c10::str(layer));
x = std::regex_replace(x, std::regex("\\{suffix\\}"), c10::str(suffix));
param_names[i] = x;
}
for (size_t i = 0; i < param_names.size(); i++) {
for(const auto i : c10::irange(param_names.size())) {
auto name = param_names[i];
auto param = layer_params[i];
this->register_parameter(name, param);

View File

@ -6,6 +6,7 @@
#include <torch/optim/serialize.h>
#include <ATen/ATen.h>
#include <c10/util/irange.h>
#include <functional>
@ -136,7 +137,7 @@ void Adagrad::load(serialize::InputArchive& archive) {
torch::optim::serialize(archive, "step_buffers", step_buffers);
// since there were no param_groups prior to version 1.5.0, assuming all tensors are now in one param_group
std::vector<Tensor> params = param_groups_.at(0).params();
for (size_t idx = 0; idx < params.size(); idx++) {
for(const auto idx : c10::irange(params.size())) {
auto state = std::make_unique<AdagradParamState>();
state->step(step_buffers[idx]);
state->sum(sum_buffers[idx]);

View File

@ -6,6 +6,7 @@
#include <torch/utils.h>
#include <ATen/ATen.h>
#include <c10/util/irange.h>
#include <cmath>
#include <functional>
@ -161,7 +162,7 @@ void Adam::load(serialize::InputArchive& archive) {
torch::optim::serialize(archive, "max_exp_average_sq_buffers", max_exp_average_sq_buffers);
// since there were no param_groups prior to version 1.5.0, assuming all tensors are now in one param_group
std::vector<Tensor> params = param_groups_.at(0).params();
for (size_t idx = 0; idx < step_buffers.size(); idx++) {
for(const auto idx : c10::irange(step_buffers.size())) {
auto state = std::make_unique<AdamParamState>();
state->step(step_buffers.at(idx));
state->exp_avg(exp_average_buffers.at(idx));

View File

@ -6,6 +6,7 @@
#include <torch/utils.h>
#include <ATen/ATen.h>
#include <c10/util/irange.h>
#include <cmath>
#include <functional>
@ -163,7 +164,7 @@ void AdamW::load(serialize::InputArchive& archive) {
torch::optim::serialize(archive, "max_exp_average_sq_buffers", max_exp_average_sq_buffers);
// since there were no param_groups prior to version 1.5.0, assuming all tensors are now in one param_group
std::vector<Tensor> params = param_groups_.at(0).params();
for (size_t idx = 0; idx < step_buffers.size(); idx++) {
for(const auto idx : c10::irange(step_buffers.size())) {
auto state = std::make_unique<AdamWParamState>();
state->step(step_buffers.at(idx));
state->exp_avg(exp_average_buffers.at(idx));

View File

@ -59,7 +59,7 @@ void LBFGSOptions::set_lr(const double lr) {
template <typename T>
bool if_container_equal(T lhs, T rhs) {
if (!(lhs.size() == rhs.size())) return false;
for (size_t i = 0; i < lhs.size(); i++) {
for(const auto i : c10::irange(lhs.size())) {
if (!torch::equal(lhs.at(i), rhs.at(i))) return false;
}
return true;
@ -154,7 +154,7 @@ void LBFGS::_add_grad(const double step_size, const Tensor& update) {
void LBFGS::_set_param(const std::vector<Tensor>& params_data) {
auto& _params = param_groups_.at(0).params();
TORCH_INTERNAL_ASSERT(params_data.size() == _params.size());
for (size_t i = 0; i < _params.size(); i++) {
for(const auto i : c10::irange(_params.size())) {
_params.at(i).copy_(params_data.at(i));
}
}

View File

@ -5,6 +5,7 @@
#include <torch/utils.h>
#include <ATen/ATen.h>
#include <c10/util/irange.h>
#include <functional>
@ -159,7 +160,7 @@ void RMSprop::load(serialize::InputArchive& archive) {
torch::optim::serialize(archive, "grad_average_buffers", grad_average_buffers);
// since there were no param_groups prior to version 1.5.0, assuming all tensors are now in one param_group
std::vector<Tensor> params = param_groups_.at(0).params();
for (size_t idx = 0; idx < square_average_buffers.size(); idx++) {
for(const auto idx : c10::irange(square_average_buffers.size())) {
auto state = std::make_unique<RMSpropParamState>();
state->square_avg(square_average_buffers[idx]);
if(idx < momentum_buffers.size()) {

View File

@ -8,6 +8,7 @@
#include <torch/utils.h>
#include <ATen/ATen.h>
#include <c10/util/irange.h>
#include <functional>
@ -123,7 +124,7 @@ void SGD::load(serialize::InputArchive& archive) {
torch::optim::serialize(archive, "momentum_buffers", momentum_buffers);
// since there were no param_groups prior to version 1.5.0, assuming all tensors are now in one param_group
std::vector<Tensor> params = param_groups_.at(0).params();
for (size_t idx = 0; idx < momentum_buffers.size(); idx++) {
for(const auto idx : c10::irange(momentum_buffers.size())) {
auto state = std::make_unique<SGDParamState>();
state->momentum_buffer(momentum_buffers[idx]);
state_[c10::guts::to_string(params[idx].unsafeGetTensorImpl())] = std::move(state);

View File

@ -339,7 +339,7 @@ Tensor permute_backwards(const Tensor & grad, IntArrayRef fwd_dims) {
// invert the permutation
auto ndims = fwd_dims.size();
std::vector<int64_t> dims(ndims);
for (size_t i = 0; i < ndims; i++) {
for(const auto i : c10::irange(ndims)) {
dims[at::maybe_wrap_dim(fwd_dims[i], ndims)] = i;
}
return grad.permute(dims);
@ -358,7 +358,7 @@ Tensor deg2rad_backward(const Tensor& grad) {
Tensor unsqueeze_multiple(const Tensor & t, IntArrayRef dim, size_t n_dims) {
auto dims_to_unsqueeze = at::dim_list_to_bitset(dim, n_dims);
Tensor res = t;
for (size_t i = 0; i < n_dims; i++){
for(const auto i : c10::irange(n_dims)){
if (dims_to_unsqueeze[i]) {
res = res.unsqueeze(i);
}

View File

@ -6,6 +6,8 @@
#include <torch/csrc/autograd/function.h>
#include <torch/csrc/autograd/functions/basic_ops.h>
#include <c10/util/irange.h>
namespace torch {
namespace autograd {
@ -74,7 +76,7 @@ variable_list run_backward(
size_t num_tensors = outputs.size();
edge_list roots;
roots.reserve(num_tensors);
for (size_t i = 0; i < num_tensors; i++) {
for(const auto i : c10::irange(num_tensors)) {
const Variable& output = outputs[i];
auto gradient_edge = impl::gradient_edge(output);
TORCH_CHECK(

View File

@ -15,6 +15,7 @@
#include <c10/core/Stream.h>
#include <c10/core/Event.h>
#include <c10/core/DeviceGuard.h>
#include <c10/util/irange.h>
#include <c10/util/Optional.h>
#include <c10/util/ThreadLocal.h>
#include <c10/core/StreamGuard.h>
@ -602,7 +603,7 @@ void set_device(int device) {
// Don't use DeviceGuard here because its destructor may be called before the
// device is reset. This is fine because the device is thread local.
if (device != CPU_DEVICE) {
for (size_t i = 0; i < static_cast<size_t>(c10::DeviceType::COMPILE_TIME_MAX_DEVICE_TYPES); i++) {
for(const auto i : c10::irange(static_cast<size_t>(c10::DeviceType::COMPILE_TIME_MAX_DEVICE_TYPES))) {
auto* impl = c10::impl::device_guard_impl_registry[i].load();
if (impl && device < impl->deviceCount()) {
impl->setDevice(at::Device(static_cast<c10::DeviceType>(i), device));
@ -622,7 +623,7 @@ void validate_outputs(
ss << edges.size() << ", but got " << grads.size();
AT_ERROR(format_error(ss.str()));
}
for (size_t i = 0; i < grads.size(); i++) {
for(const auto i : c10::irange(grads.size())) {
const auto& edge = edges[i];
if (!edge.is_valid()) continue;

View File

@ -6,6 +6,7 @@
#include <torch/csrc/autograd/variable.h>
#include <ATen/ATen.h>
#include <c10/util/irange.h>
#include <cstddef>
#include <memory>
@ -92,7 +93,7 @@ auto CopySlices::apply(variable_list&& inputs) -> variable_list {
auto res = (*fn)({ grad_slice.clone(at::MemoryFormat::Contiguous) });
variable_list grad_inputs(num_outputs());
for (size_t i = 0; i < res.size(); i++) {
for(const auto i : c10::irange(res.size())) {
if (should_compute_output(i)) {
AT_ASSERT(res[i].defined());
if (i == 0) {

View File

@ -1,7 +1,6 @@
#include <torch/csrc/autograd/python_hook.h>
#include <sstream>
#include <c10/util/irange.h>
#include <pybind11/pybind11.h>
#include <torch/csrc/THP.h>
#include <torch/csrc/autograd/python_variable.h>
@ -9,6 +8,8 @@
#include <torch/csrc/utils/python_strings.h>
#include <torch/csrc/Exceptions.h>
#include <sstream>
using torch::autograd::variable_list;
using torch::autograd::Variable;
@ -113,7 +114,7 @@ static PyObject *wrap_variables(const variable_list& c_variables)
static variable_list unwrap_variables(PyObject* py_variables) {
variable_list results(PyTuple_GET_SIZE(py_variables));
for (size_t i = 0; i < results.size(); i++) {
for(const auto i : c10::irange(results.size())) {
PyObject* item = PyTuple_GET_ITEM(py_variables, i);
if (item == Py_None) {
continue;

View File

@ -11,6 +11,7 @@
#include <ATen/WrapDimUtils.h>
#include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h>
#include <c10/util/irange.h>
#include <c10/util/Optional.h>
#include <torch/csrc/autograd/variable.h>
@ -73,7 +74,7 @@ static inline std::vector<Tensor>& _broadcast_out_impl(
std::vector<Tensor>& broadcast_out(
const Tensor& tensor,
std::vector<Tensor>& out_tensors) {
for (size_t i = 0; i < out_tensors.size(); i++) {
for(const auto i : c10::irange(out_tensors.size())) {
TORCH_CHECK(
out_tensors[i].is_cuda(),
"Expected all output tensors to be CUDA tensors, but output tensor at index ",
@ -240,7 +241,7 @@ std::vector<at::Tensor>& scatter_out(
int64_t total_size = 0;
std::vector<int64_t> chunk_sizes;
chunk_sizes.reserve(out_tensors.size());
for (size_t i = 0; i < out_tensors.size(); i++) {
for(const auto i : c10::irange(out_tensors.size())) {
TORCH_CHECK(
out_tensors[i].is_cuda(),
"Expected all output tensors to be CUDA tensors, but output tensor at index ",
@ -283,7 +284,7 @@ std::vector<at::Tensor>& scatter_out(
auto chunks =
tensor.split_with_sizes(/*split_sizes=*/chunk_sizes, /*dim=*/dim);
at::cuda::OptionalCUDAStreamGuard cuda_guard;
for (size_t i = 0; i < chunks.size(); i++) {
for(const auto i : c10::irange(chunks.size())) {
if (i < (streams ? streams->size() : 0U) && (*streams)[i]) {
const auto device_index =
static_cast<int16_t>(out_tensors[i].get_device());
@ -379,7 +380,7 @@ static inline at::Tensor& _gather_out_impl(
}
auto chunks =
out_tensor.split_with_sizes(/*split_sizes=*/chunk_sizes, /*dim=*/dim);
for (size_t i = 0; i < tensors.size(); i++) {
for(const auto i : c10::irange(tensors.size())) {
chunks[i].copy_(tensors[i], /*non_blocking=*/out_tensor.is_cuda());
}
return out_tensor;
@ -395,7 +396,7 @@ at::Tensor& gather_out(
const auto first_size = first.sizes();
dim = at::maybe_wrap_dim(dim, first);
std::vector<int64_t> expected_size(first_size.begin(), first_size.end());
for (size_t i = 0; i < tensors.size(); i++) {
for(const auto i : c10::irange(tensors.size())) {
const auto& tensor = tensors[i];
TORCH_CHECK(
tensor.is_cuda(),
@ -450,7 +451,7 @@ at::Tensor gather(
dim = at::maybe_wrap_dim(dim, first);
std::vector<int64_t> expected_size(first_size.begin(), first_size.end());
auto memory_format = first.suggest_memory_format();
for (size_t i = 0; i < tensors.size(); i++) {
for(const auto i : c10::irange(tensors.size())) {
const auto& tensor = tensors[i];
TORCH_CHECK(
tensor.is_cuda(),

View File

@ -258,7 +258,7 @@ void check_inputs(
int64_t numel = inputs[0].numel();
auto dtype = inputs[0].scalar_type();
for (size_t i = 0; i < len; i++) {
for(const auto i : c10::irange(len)) {
auto input = inputs[i];
auto output = outputs[i];
@ -289,7 +289,7 @@ void check_inputs(
int64_t numel = inputs[0].numel();
auto dtype = inputs[0].scalar_type();
for (size_t i = 0; i < len; i++) {
for(const auto i : c10::irange(len)) {
auto input = inputs[i];
check_tensor(
@ -465,7 +465,7 @@ void reduce(
AutoNcclGroup nccl_group_guard;
at::cuda::OptionalCUDAGuard device_guard;
for (size_t i = 0; i < len; i++) {
for(const auto i : c10::irange(len)) {
int device = inputs[i].device().index();
device_guard.set_index(device);
// Default to the current stream
@ -517,7 +517,7 @@ void all_reduce(
AutoNcclGroup nccl_group_guard;
at::cuda::OptionalCUDAGuard device_guard;
for (size_t i = 0; i < len; i++) {
for(const auto i : c10::irange(len)) {
int device = inputs[i].device().index();
device_guard.set_index(device);
// Default to the current stream
@ -559,7 +559,7 @@ void reduce_scatter(
AutoNcclGroup nccl_group_guard;
at::cuda::OptionalCUDAGuard device_guard;
for (size_t i = 0; i < len; i++) {
for(const auto i : c10::irange(len)) {
int device = inputs[i].device().index();
device_guard.set_index(device);
// Default to the current stream
@ -600,7 +600,7 @@ void all_gather(
AutoNcclGroup nccl_group_guard;
at::cuda::OptionalCUDAGuard device_guard;
for (size_t i = 0; i < len; i++) {
for(const auto i : c10::irange(len)) {
int device = inputs[i].device().index();
device_guard.set_index(device);
// Default to the current stream
@ -728,7 +728,7 @@ void all2all(std::vector<at::Tensor>& outputTensors,
auto comm = to_nccl_comm(_comm);
NCCL_CHECK(ncclGroupStart());
for (size_t r = 0; r < outputTensors.size(); r++) {
for(const auto r : c10::irange(outputTensors.size())) {
at::Tensor &input = inputTensors[r];
at::Tensor &output = outputTensors[r];
if (input.numel() != 0) {

View File

@ -1,8 +1,11 @@
#include <ATen/Parallel.h>
#include <gtest/gtest.h>
#include <c10/util/irange.h>
#include <torch/csrc/deploy/deploy.h>
#include <torch/script.h>
#include <torch/torch.h>
#include <future>
#include <iostream>
#include <string>
@ -82,7 +85,7 @@ TEST(TorchpyTest, MultiSerialSimpleModel) {
size_t ninterp = 3;
std::vector<at::Tensor> outputs;
for (size_t i = 0; i < ninterp; i++) {
for (const auto i : c10::irange(ninterp)) {
outputs.push_back(model({input.alias()}).toTensor());
}
@ -90,7 +93,7 @@ TEST(TorchpyTest, MultiSerialSimpleModel) {
auto ref_output = ref_model.forward({input.alias()}).toTensor();
// Compare all to reference
for (size_t i = 0; i < ninterp; i++) {
for (const auto i : c10::irange(ninterp)) {
ASSERT_TRUE(ref_output.equal(outputs[i]));
}
@ -121,7 +124,7 @@ TEST(TorchpyTest, ThreadedSimpleModel) {
std::vector<at::Tensor> outputs;
std::vector<std::future<at::Tensor>> futures;
for (size_t i = 0; i < nthreads; i++) {
for (const auto i : c10::irange(nthreads)) {
futures.push_back(std::async(std::launch::async, [&model]() {
auto input = torch::ones({10, 20});
for (int i = 0; i < 100; ++i) {
@ -131,7 +134,7 @@ TEST(TorchpyTest, ThreadedSimpleModel) {
return result;
}));
}
for (size_t i = 0; i < nthreads; i++) {
for (const auto i : c10::irange(nthreads)) {
outputs.push_back(futures[i].get());
}
@ -139,7 +142,7 @@ TEST(TorchpyTest, ThreadedSimpleModel) {
auto ref_output = ref_model.forward({input.alias()}).toTensor();
// Compare all to reference
for (size_t i = 0; i < nthreads; i++) {
for (const auto i : c10::irange(nthreads)) {
ASSERT_TRUE(ref_output.equal(outputs[i]));
}
}

View File

@ -2,6 +2,7 @@
#include <ATen/Parallel.h>
#include <c10/core/Event.h>
#include <c10/util/irange.h>
#include <torch/csrc/autograd/functions/accumulate_grad.h>
#include <torch/csrc/autograd/input_buffer.h>
#include <torch/csrc/distributed/autograd/context/container.h>
@ -97,7 +98,7 @@ void DistEngine::globalCpuThread(
variables =
InputBuffer::variables(std::move(task.inputs_))]() mutable {
InputBuffer inputs(variables.size());
for (size_t i = 0; i < variables.size(); i++) {
for(const auto i : c10::irange(variables.size())) {
inputs.add(i, std::move(variables[i]), c10::nullopt, c10::nullopt);
}
execute_graph_task_until_ready_queue_empty(

View File

@ -1,5 +1,6 @@
#include <torch/csrc/distributed/autograd/functions/recvrpc_backward.h>
#include <ATen/core/functional.h>
#include <c10/util/irange.h>
#include <torch/csrc/distributed/autograd/rpc_messages/propagate_gradients_req.h>
#include <torch/csrc/distributed/rpc/rpc_agent.h>
@ -23,7 +24,7 @@ RecvRpcBackward::RecvRpcBackward(
variable_list RecvRpcBackward::apply(variable_list&& grads) {
std::vector<Variable> outputGrads;
for (size_t i = 0; i < grads.size(); i++) {
for(const auto i : c10::irange(grads.size())) {
const auto& grad = grads[i];
if (grad.defined()) {
outputGrads.emplace_back(grad);

View File

@ -2,6 +2,8 @@
#include <torch/csrc/distributed/rpc/rpc_agent.h>
#include <torch/csrc/jit/serialization/pickle.h>
#include <c10/util/irange.h>
namespace torch {
namespace distributed {
namespace autograd {
@ -74,7 +76,7 @@ std::unique_ptr<PropagateGradientsReq> PropagateGradientsReq::fromMessage(
// Retrieve the gradient tensors.
std::vector<Variable> grads(tupleElements.size());
for (size_t i = 0; i < tupleElements.size(); i++) {
for(const auto i : c10::irange(tupleElements.size())) {
grads[i] = tupleElements[i].toTensor();
}

View File

@ -23,6 +23,8 @@
#include <torch/csrc/jit/serialization/pickler.h>
#include <torch/csrc/jit/serialization/unpickler.h>
#include <c10/util/irange.h>
using namespace torch::autograd::profiler;
namespace torch {
@ -377,7 +379,7 @@ std::string wireSerialize(
pickler.stop();
tensorData = pickler.tensorData();
entries.push_back({kMeta, metaEntry.data(), metaEntry.size()});
for (size_t i = 0; i < tensorData.size(); i++) {
for (const auto i : c10::irange(tensorData.size())) {
// Construct WritableTensorData for each tensor in the pickler tensorData
// Since tensorData is in function scope, and getWritableTensorData just
// record the tensors, the data() pointers stay valid for CPU tensors

View File

@ -1,6 +1,7 @@
#include <torch/csrc/jit/codegen/cuda/arith.h>
#include <c10/util/Exception.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/codegen/cuda/ir_all_nodes.h>
#include <torch/csrc/jit/codegen/cuda/type.h>
@ -61,7 +62,7 @@ TensorView* newOutputTV(const std::vector<Val*>& vals, DataType dtype) {
dom.size(),
" dimensions but expected ",
out_domain.size());
for (size_t i = 0; i < dom.size(); i++) {
for (const auto i : c10::irange(dom.size())) {
if (out_domain[i] != nullptr)
continue;
if (dom[i]->isBroadcast())
@ -69,7 +70,7 @@ TensorView* newOutputTV(const std::vector<Val*>& vals, DataType dtype) {
out_domain[i] = new IterDomain(dom[i]->start(), dom[i]->extent());
}
}
for (size_t dim_i = 0; dim_i < out_domain.size(); dim_i++) {
for (const auto dim_i : c10::irange(out_domain.size())) {
if (out_domain[dim_i] == nullptr) {
IterType itype = IterType::BroadcastWithoutStride;
for (const auto tv : tvs) {
@ -103,7 +104,7 @@ std::vector<Val*> maybeBroadcast(const std::vector<Val*>& vals) {
}
}
for (size_t i = 0; i < vals.size(); i++) {
for (const auto i : c10::irange(vals.size())) {
if (vals[i]->getValType().value() == ValType::TensorView) {
auto tv = vals[i]->as<TensorView>();
size_t tv_dims = TensorDomain::noReductions(tv->getRootDomain()).size();
@ -413,7 +414,7 @@ static TensorView* newForReduction(
"Error setting up reduction, reduction axis is outside nDims. Keep in mind reductions are relative to root domains, not modified views.");
auto axis_iter = axes_set.begin();
for (size_t dim = 0; dim < orig_domain.size(); dim++) {
for (const auto dim : c10::irange(orig_domain.size())) {
bool isReduction = false;
if (axis_iter != axes_set.end() && *axis_iter == dim) {
isReduction = true;

View File

@ -6,6 +6,8 @@
#include <torch/csrc/jit/codegen/cuda/transform_iter.h>
#include <torch/csrc/jit/codegen/cuda/transform_replay.h>
#include <c10/util/irange.h>
namespace torch {
namespace jit {
namespace fuser {
@ -135,7 +137,7 @@ T1 tvIterable(const T2& val_iterable) {
std::deque<std::deque<TensorView*>> tvChains(
std::deque<std::deque<Val*>> val_chains) {
std::deque<std::deque<TensorView*>> tv_chains(val_chains.size());
for (size_t i = 0; i < val_chains.size(); i++) {
for (const auto i : c10::irange(val_chains.size())) {
tv_chains[i] = tvIterable<std::deque<TensorView*>>(val_chains[i]);
}
return tv_chains;

View File

@ -14,6 +14,7 @@
#include <c10/core/DeviceGuard.h>
#include <c10/cuda/CUDAFunctions.h>
#include <c10/cuda/CUDAStream.h>
#include <c10/util/irange.h>
#include <cstdlib>
@ -413,7 +414,7 @@ std::vector<at::Tensor> FusionExecutor::runFusion(
at::AutoDispatchBelowADInplaceOrView non_variable_type_mode;
// take the short-cut for launch if we see a recorded input set again;
launch_params = executor_entry->launch_params;
for (size_t i = 0; i < executor_entry->output_sizes.size(); i++) {
for (const auto i : c10::irange(executor_entry->output_sizes.size())) {
alloced_outputs.push_back(at::native::empty_cuda(
executor_entry->output_sizes[i],
executor_entry->output_types[i],
@ -421,7 +422,8 @@ std::vector<at::Tensor> FusionExecutor::runFusion(
options_.device,
c10::nullopt));
}
for (size_t i = 0; i < executor_entry->empty_buffer_sizes.size(); i++) {
for (const auto i :
c10::irange(executor_entry->empty_buffer_sizes.size())) {
global_buffers.empty_buffers.push_back(at::native::empty_cuda(
executor_entry->empty_buffer_sizes[i],
executor_entry->empty_buffer_types[i],
@ -430,7 +432,7 @@ std::vector<at::Tensor> FusionExecutor::runFusion(
c10::nullopt));
}
}
for (size_t i = 0; i < executor_entry->zero_buffer_sizes.size(); i++) {
for (const auto i : c10::irange(executor_entry->zero_buffer_sizes.size())) {
auto tensor_options = at::TensorOptions()
.dtype(executor_entry->zero_buffer_types[i])
.device(options_.device);

View File

@ -79,7 +79,7 @@ void KernelArgumentHolder::push(const uint64_t& val) {
void** KernelArgumentHolder::getBuffer() {
if (changed_) {
void_ptrs_ = std::vector<void*>(arguments_.size(), nullptr);
for (size_t i = 0; i < arguments_.size(); i++) {
for (const auto i : c10::irange(arguments_.size())) {
void_ptrs_[i] = static_cast<void*>(arguments_[i]->arg());
}
changed_ = false;

View File

@ -3,6 +3,7 @@
#include <ATen/cuda/nvrtc_stub/ATenNVRTC.h>
#include <c10/cuda/CUDACachingAllocator.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/codegen/cuda/executor_utils.h>
#include <torch/csrc/jit/codegen/cuda/instrumentation.h>
@ -230,7 +231,7 @@ StatefulExpressionEvaluator statefulBindInputs(
// This should probably move to EvaluationContext as we may want to bind
// input values frequently. Bind fusion input values to runtime values.
for (size_t i = 0; i < fusion->inputs().size(); i++) {
for (const auto i : c10::irange(fusion->inputs().size())) {
if (fusion->inputs()[i]->getValType() == ValType::TensorView) {
TensorView* cg_tensor = fusion->inputs()[i]->as<TensorView>();
@ -244,7 +245,7 @@ StatefulExpressionEvaluator statefulBindInputs(
aten_tensor.ndimension() == (int64_t)root_dom.size(),
"Something went wrong configuring launch. Inputs no longer match.");
for (size_t dim = 0; dim < root_dom.size(); dim++) {
for (const auto dim : c10::irange(root_dom.size())) {
evaluator.safeBind(
root_dom[dim]->extent(), aten_tensor.sizes()[dim], lower);
}

View File

@ -1,6 +1,7 @@
#include <torch/csrc/jit/passes/cuda_graph_fuser.h>
#include <c10/util/Exception.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/codegen/cuda/instrumentation.h>
#include <torch/csrc/jit/codegen/cuda/interface.h>
#include <torch/csrc/jit/codegen/cuda/partition.h>
@ -509,7 +510,7 @@ struct CudaGraphFuser {
WithInsertPoint guard(bchunk->next());
std::vector<Value*> producer_chunk_outputs;
for (size_t i = 0; i < nchunks; i++) {
for (const auto i : c10::irange(nchunks)) {
producer_chunk_outputs.push_back(
bchunk->output(nchunks * producer_index + i));
}
@ -579,7 +580,7 @@ struct CudaGraphFuser {
}
bchunk->removeInput(producer_index);
for (size_t i = 0; i < nchunks; i++) {
for (const auto i : c10::irange(nchunks)) {
bchunk->eraseOutput(nchunks * producer_index);
}

View File

@ -1,6 +1,7 @@
#include <torch/csrc/jit/codegen/cuda/index_compute.h>
#include <c10/util/Exception.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/codegen/cuda/arith.h>
#include <torch/csrc/jit/codegen/cuda/instrumentation.h>
#include <torch/csrc/jit/codegen/cuda/ir_all_nodes.h>
@ -181,7 +182,7 @@ class ContigIDs : public OptInDispatch {
" != ",
root_contiguity_.size());
for (size_t i = 0; i < root_domain_.size(); i++) {
for (const auto i : c10::irange(root_domain_.size())) {
if (root_contiguity_[i]) {
auto kir_root_domain_i =
GpuLower::lowerValue(root_domain_[i])->as<kir::IterDomain>();
@ -794,7 +795,7 @@ kir::TensorIndex* Index::getGlobalProducerIndex(
// Global striding
int64_t stride_i = 0;
std::vector<Val*> strided_inds;
for (size_t i = 0; i < root_dom.size(); i++) {
for (const auto i : c10::irange(root_dom.size())) {
if (root_dom[i]->isReduction() ||
root_dom[i]->getIterType() == IterType::BroadcastWithoutStride) {
continue;
@ -918,7 +919,7 @@ kir::TensorIndex* Index::getProducerIndex_impl(
std::vector<Val*> strided_inds;
for (size_t i = 0; i < root_dom.size(); i++) {
for (const auto i : c10::irange(root_dom.size())) {
if (root_dom[i]->isReduction() || root_dom[i]->isBroadcast()) {
continue;
}
@ -1023,7 +1024,7 @@ kir::TensorIndex* Index::getGlobalConsumerIndex(
int64_t stride_i = 0;
std::vector<Val*> strided_inds;
for (size_t i = 0; i < root_dom.size(); i++) {
for (const auto i : c10::irange(root_dom.size())) {
if (root_dom[i]->isReduction() ||
root_dom[i]->getIterType() == IterType::BroadcastWithoutStride) {
continue;
@ -1089,7 +1090,7 @@ kir::TensorIndex* Index::getConsumerIndex_impl(
auto root_dom = consumer_tv->getMaybeRFactorDomain();
std::vector<Val*> strided_inds;
for (size_t i = 0; i < root_dom.size(); i++) {
for (const auto i : c10::irange(root_dom.size())) {
if (root_dom[i]->isReduction() || root_dom[i]->isBroadcast()) {
continue;
}
@ -1267,7 +1268,7 @@ std::pair<std::vector<Val*>, bool> Index::getConsumerRootPredIndices(
: consumer_tv->getRootDomain();
std::vector<Val*> root_inds(root_dom.size(), ir_builder.create<kir::Int>(0));
for (size_t i = 0; i < root_dom.size(); i++) {
for (const auto i : c10::irange(root_dom.size())) {
if (root_dom[i]->isBroadcast()) {
continue;
}

View File

@ -1,6 +1,7 @@
#include <torch/csrc/jit/codegen/cuda/interface.h>
#include <ATen/core/dispatch/OperatorOptions.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/runtime/custom_operator.h>
#include <torch/csrc/jit/runtime/register_ops_utils.h>
@ -103,7 +104,7 @@ bool complyWith(
const auto& t_sizes = tensor.sizes();
const auto& t_strides = tensor.strides();
int inner_dim = -1;
for (size_t j = 0; j < *guard_tensor_type->dim(); j++) {
for (const auto j : c10::irange(*guard_tensor_type->dim())) {
// check b. for stride check, we go along dimensions from fastest stride to
// slowest stride
int sorted_index = stride_properties[j]->stride_index_
@ -210,7 +211,7 @@ RegisterOperators reg_guard({
return;
}
for (size_t i = 0; i < num_inputs; i++) {
for (const auto i : c10::irange(num_inputs)) {
const c10::TensorTypePtr& guard_tensor_type =
types[i]->cast<TensorType>();

View File

@ -8,6 +8,7 @@
#include <torch/csrc/jit/ir/ir.h>
#include <c10/util/Exception.h>
#include <c10/util/irange.h>
#include <iostream>
#include <stdexcept>
@ -227,7 +228,7 @@ bool Expr::sameAs(const Expr* const other) const {
if (inputs().size() != other->inputs().size() ||
outputs().size() != other->outputs().size())
return false;
for (size_t i = 0; i < inputs().size(); i++) {
for (const auto i : c10::irange(inputs().size())) {
if (!input(i)->sameAs(other->input(i)))
return false;
}

View File

@ -5,6 +5,8 @@
#include <torch/csrc/jit/codegen/cuda/ir_all_nodes.h>
#include <torch/csrc/jit/codegen/cuda/lower_utils.h>
#include <c10/util/irange.h>
namespace torch {
namespace jit {
namespace fuser {
@ -51,7 +53,7 @@ void IrPrinter::handle(const TensorDomain* td) {
return;
}
os_ << "[ ";
for (size_t i = 0; i < td->nDims(); i++) {
for (const auto i : c10::irange(td->nDims())) {
handle(td->axis(i));
if (i != td->nDims() - 1)
os_ << ", ";

View File

@ -1,5 +1,3 @@
#include <c10/util/irange.h>
#include <torch/csrc/jit/codegen/cuda/arith.h>
#include <torch/csrc/jit/codegen/cuda/ir_cloner.h>
#include <torch/csrc/jit/codegen/cuda/ir_interface_nodes.h>
@ -9,6 +7,8 @@
#include <torch/csrc/jit/codegen/cuda/transform_iter.h>
#include <torch/csrc/jit/codegen/cuda/transform_rfactor.h>
#include <c10/util/irange.h>
#include <sstream>
namespace torch {
@ -1239,7 +1239,7 @@ void ConcretizeDomain::concretizePwOp(Expr* e) {
TensorDomain::noReductions(i->getMaybeRFactorDomain());
TORCH_INTERNAL_ASSERT(ii.size() == io.size());
for (size_t it = 0; it < ii.size(); it++) {
for (const auto it : c10::irange(ii.size())) {
if (!canConcretize(io[it]))
continue;
@ -1338,7 +1338,7 @@ class ProveValEqual : private IterVisitor {
std::vector<IterDomain*> ii =
TensorDomain::noReductions(i->getMaybeRFactorDomain());
for (size_t it = 0; it < ii.size(); it++)
for (const auto it : c10::irange(ii.size()))
if (cd_.canConcretize(ii[it]) && cd_.canConcretize(io[it]))
proveId(cd_.concretized(ii[it]), cd_.concretized(io[it]));
}

View File

@ -408,20 +408,20 @@ std::vector<at::Tensor> FusionExecutorCache::runFusionWithInputs(
bool GraphCache::requiresPermutation() {
const size_t input_rank = input_permutation_.size();
for (size_t i = 0; i < input_rank; i++) {
for (const auto i : c10::irange(input_rank)) {
if (input_permutation_[i] != (long)i) {
return true;
}
}
// Check if output agrees
const size_t pw_output_rank = pw_output_permutation_.size();
for (size_t i = 0; i < pw_output_rank; i++) {
for (const auto i : c10::irange(pw_output_rank)) {
TORCH_INTERNAL_ASSERT(
pw_output_permutation_[i] == (long)i,
"permutation of output and input is not consistent");
}
const size_t reduction_output_rank = reduction_output_permutation_.size();
for (size_t i = 0; i < reduction_output_rank; i++) {
for (const auto i : c10::irange(reduction_output_rank)) {
TORCH_INTERNAL_ASSERT(
reduction_output_permutation_[i] == (long)i,
"permutation of output and input is not consistent");
@ -505,7 +505,7 @@ void GraphCache::createFusion(const std::shared_ptr<Graph>& graph) {
std::vector<int64_t> adjusted_reduction_axes;
for (const auto dim : dims_list->vec()) {
// adjust reduction axis to be the permuted axis;
for (size_t j = 0; j < input_permutation_.size(); j++) {
for (const auto j : c10::irange(input_permutation_.size())) {
// follow the permutation to resolve the new reduction axes;
if (input_permutation_[j] == dim) {
adjusted_reduction_axes.emplace_back(j);

View File

@ -7,6 +7,8 @@
#include <torch/csrc/jit/codegen/cuda/lower2device.h>
#include <torch/csrc/jit/codegen/cuda/lower_utils.h>
#include <c10/util/irange.h>
namespace torch {
namespace jit {
namespace fuser {
@ -103,7 +105,7 @@ void avoidRedundantWritesToSmem(
TensorView* out_tv,
ir_utils::ParallelTypeBitmap& pred) {
if (out_tv->getMemoryType() == MemoryType::Shared) {
for (size_t i = 0; i < out_tv->nDims(); i++) {
for (const auto i : c10::irange(out_tv->nDims())) {
auto id = out_tv->getComputeAtAxis(i).first;
if (out_tv->axis(i)->isBroadcast() && id->isThreadDim()) {
pred.set(id->getParallelType(), true);
@ -159,7 +161,7 @@ void ThreadPredicateMap::updateBitSet(Expr* expr) {
}
// Validate the combination of ptypes, reductions, bcasts
for (size_t i = 0; i < ir_utils::ParallelTypeBitmap::num_p_type; i++) {
for (const auto i : c10::irange(ir_utils::ParallelTypeBitmap::num_p_type)) {
if (input_reductions[i]) {
if (id_ptypes[i]) {
TORCH_INTERNAL_ASSERT(

View File

@ -10,6 +10,8 @@
#include <torch/csrc/jit/codegen/cuda/lower_utils.h>
#include <torch/csrc/jit/codegen/cuda/transform_iter.h>
#include <c10/util/irange.h>
namespace torch {
namespace jit {
namespace fuser {
@ -43,7 +45,7 @@ std::vector<kir::Bool*> PredicateCompute::computePredicates(
std::vector<kir::Bool*> preds(root.size(), true_bool);
Val* extent = nullptr;
for (size_t i = 0; i < indices.size(); i++) {
for (const auto i : c10::irange(indices.size())) {
const bool zero_ind = indices[i]->isZeroInt();
const bool simple_ind = indices[i]->getOrigin() == nullptr;
@ -257,7 +259,7 @@ void UnrollPredicate::predicateOn(Expr* tv_expr) {
all_preds.size() == root_dom.size(),
"Predicates should be produced for every dimension, even if it's simply set as true.");
for (size_t i = 0; i < all_preds.size(); i++) {
for (const auto i : c10::irange(all_preds.size())) {
if (all_preds[i]->isConst() && all_preds[i]->value().value()) {
continue;
}

View File

@ -10,6 +10,7 @@
#include <torch/csrc/jit/codegen/cuda/parser.h>
#include <ATen/cuda/CUDAContext.h>
#include <c10/util/irange.h>
namespace torch {
namespace jit {
@ -23,7 +24,7 @@ namespace {
std::vector<int> reductionAxes(TensorView* tv) {
size_t n_dims = tv->nDims();
std::vector<int> reduction_axes;
for (size_t i = 0; i < n_dims; i++) {
for (const auto i : c10::irange(n_dims)) {
if (tv->axis(i)->isReduction()) {
reduction_axes.emplace_back(i);
}

View File

@ -1,5 +1,6 @@
#include <torch/csrc/jit/codegen/cuda/transform_iter.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/codegen/cuda/ir_utils.h>
namespace torch {
@ -263,7 +264,7 @@ BestEffortReplay::BestEffortReplay(
std::vector<IterDomain*>(t_inps.size(), nullptr);
// Map t_expr inputs to replay domain directly
for (size_t t_i = 0; t_i < t_inps.size(); t_i++) {
for (const auto t_i : c10::irange(t_inps.size())) {
// There might not be a mapping, that could be okay.
auto it = id_map_.find(t_inps[t_i]);
if (it != id_map_.end())
@ -382,7 +383,7 @@ BestEffortReplay::BestEffortReplay(
}
// Add outputs to map.
for (size_t i = 0; i < t_expr->outputs().size(); i++) {
for (const auto i : c10::irange(t_expr->outputs().size())) {
auto t_out = t_expr->output(i);
auto r_out = r_expr->output(i);
if (t_out->getValType() == ValType::IterDomain &&
@ -420,7 +421,7 @@ int BestEffortReplay::findFirstMismatchedID(
BestEffortReplay ber(td2->domain(), td1->domain(), id_map);
for (size_t i = 0; i < td1->domain().size(); i++) {
for (const auto i : c10::irange(td1->domain().size())) {
if (ber.getReplay().find(td1->axis(i)) == ber.getReplay().end()) {
return i;
}

View File

@ -210,7 +210,7 @@ std::shared_ptr<FusedKernel> compileKernel(
auto graph = spec.graph()->copy();
for (size_t i = 0; i < input_desc.size(); i++) {
for (const auto i : c10::irange(input_desc.size())) {
const auto& desc = input_desc[i];
// TODO: can't get rid of this use of TensorType

View File

@ -1,4 +1,6 @@
#include <torch/csrc/jit/frontend/concrete_module_type.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/python/pybind_utils.h>
namespace torch {
@ -56,7 +58,7 @@ std::shared_ptr<ConcreteModuleType> ConcreteModuleType::fromJitType(
// Populate the builder metadata from the JIT type. This is to ensure
// ConcreteModuleTypes produced from Python and ones produced from a JIT
// type directly behave the same to the rest of the system.
for (size_t i = 0; i < classType->numAttributes(); i++) {
for (const auto i : c10::irange(classType->numAttributes())) {
const auto& attrName = classType->getAttributeName(i);
const auto& attrType = classType->getAttribute(i);
if (attrType->is_module()) {
@ -70,7 +72,7 @@ std::shared_ptr<ConcreteModuleType> ConcreteModuleType::fromJitType(
}
}
for (size_t i = 0; i < classType->numConstants(); i++) {
for (const auto i : c10::irange(classType->numConstants())) {
builder.addConstant(
classType->getConstantName(i), classType->getConstant(i));
}

View File

@ -2,6 +2,7 @@
#include <c10/util/Exception.h>
#include <c10/util/StringUtil.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/api/function_impl.h>
#include <torch/csrc/jit/frontend/canonicalize_modified_loop.h>
#include <torch/csrc/jit/frontend/convert_to_ssa.h>
@ -3985,7 +3986,7 @@ struct to_ir {
rdim =
handle_indexing(subscript_expr, rev_idx, rdim, /*is_reverse=*/true);
}
for (size_t i = 0; i < exprs.size(); i++) {
for (const auto i : c10::irange(exprs.size())) {
if (!exprs[i].has_value()) {
if (subscript_exprs[i].kind() == TK_SLICE_EXPR) {
sliceable = emitSlice(
@ -4451,7 +4452,7 @@ std::vector<Function*> CompilationUnit::define(
this->register_function(std::move(fn));
};
for (size_t i = 0; i < properties.size(); i++) {
for (const auto i : c10::irange(properties.size())) {
PropertyPair property_fns = define_property(
prefix,
properties[i],
@ -4470,7 +4471,7 @@ std::vector<Function*> CompilationUnit::define(
}
}
for (size_t i = 0; i < definitions.size(); i++) {
for (const auto i : c10::irange(definitions.size())) {
auto fn = define(
prefix,
definitions[i],
@ -4549,7 +4550,7 @@ void CompilationUnit::define_hooks(
};
// define hooks
for (size_t i = 0; i < hookDefs.size(); i++) {
for (const auto i : c10::irange(hookDefs.size())) {
// check to see if already defined this hook
auto existing_fn = check_collisions(hookDefs[i]);
if (existing_fn != nullptr) {
@ -4576,7 +4577,7 @@ void CompilationUnit::define_hooks(
}
// define pre_hooks
for (size_t i = 0; i < preHookDefs.size(); i++) {
for (const auto i : c10::irange(preHookDefs.size())) {
// check to see if already defined this hook
auto existing_fn = check_collisions(preHookDefs[i]);
if (existing_fn != nullptr) {

View File

@ -1,5 +1,6 @@
#include <torch/csrc/jit/frontend/sugared_value.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/frontend/schema_matching.h>
#include <torch/csrc/jit/frontend/tree_views.h>
#include <torch/csrc/jit/ir/ir.h>
@ -137,7 +138,7 @@ std::shared_ptr<SugaredValue> SimpleValue::attr(
if (auto tuple_type = value_->type()->cast<TupleType>()) {
if (tuple_type->schema()) {
auto attrs = tuple_type->schema()->arguments();
for (size_t i = 0; i < attrs.size(); i++) {
for (const auto i : c10::irange(attrs.size())) {
if (attrs[i].name() == field) {
auto idx = m.graph()->insertConstant(IValue(static_cast<int64_t>(i)));
auto out_type = tuple_type->elements().at(i);

View File

@ -5,6 +5,7 @@
#include <ATen/core/Dict.h>
#include <ATen/core/functional.h>
#include <c10/util/Exception.h>
#include <c10/util/irange.h>
#include <torch/csrc/autograd/engine.h>
#include <torch/csrc/autograd/function.h>
#include <torch/csrc/autograd/variable.h>
@ -381,7 +382,7 @@ static IValue addInput(
if (input.isTensorList()) {
auto elems = input.toTensorList();
for (size_t i = 0; i < num_elems; i++) {
for (const auto i : c10::irange(num_elems)) {
elems[i] = addInput(
state,
elems.get(i),
@ -392,7 +393,7 @@ static IValue addInput(
return elems;
} else {
auto elems = input.toList();
for (size_t i = 0; i < num_elems; i++) {
for (const auto i : c10::irange(num_elems)) {
elems[i] = addInput(
state,
elems.get(i),

View File

@ -1,5 +1,6 @@
#include <torch/csrc/jit/ir/alias_analysis.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/jit_log.h>
#include <torch/csrc/jit/passes/utils/subgraph_utils.h>
#include <torch/csrc/jit/runtime/operator.h>
@ -609,7 +610,7 @@ void AliasDb::analyzeImpl(Node* node) {
case prim::TypeCheck:
case prim::RequiresGradCheck: {
auto num_inputs = node->inputs().size();
for (size_t i = 0; i < num_inputs; i++) {
for (const auto i : c10::irange(num_inputs)) {
makePointerTo(node->outputs().at(i), node->inputs().at(i));
}
return;
@ -692,7 +693,7 @@ void AliasDb::analyzeImpl(Node* node) {
// Bind the schema's "formal" alias annotation to the actual values those
// schema arguments represent
std::unordered_map<Symbol, Value*> formalToActual;
for (size_t i = 0; i < schema.arguments().size(); i++) {
for (const auto i : c10::irange(schema.arguments().size())) {
const auto& formal = schema.arguments()[i].alias_info();
const auto& actualValue = node->inputs().at(i);
// Skip if there's no alias annotation
@ -743,7 +744,7 @@ void AliasDb::analyzeImpl(Node* node) {
}
// Use the formal-actual mapping to give aliases to the outputs
for (size_t i = 0; i < schema.returns().size(); i++) {
for (const auto i : c10::irange(schema.returns().size())) {
const auto actual = node->outputs().at(i);
const auto& formal = schema.returns()[i].alias_info();
if (!formal) {
@ -820,7 +821,7 @@ void AliasDb::analyzeIf(Node* node) {
analyze(trueBlock);
analyze(falseBlock);
for (size_t i = 0; i < node->outputs().size(); i++) {
for (const auto i : c10::irange(node->outputs().size())) {
const auto nodeOutput = node->outputs()[i];
const auto trueOutput = trueBlock->outputs().at(i);
@ -869,7 +870,7 @@ void AliasDb::analyzeSubgraph(Node* node) {
// subgraph block.
TORCH_INTERNAL_ASSERT(
subgraphBlock->outputs().size() >= node->outputs().size());
for (size_t i = 0; i < node->outputs().size(); i++) {
for (const auto i : c10::irange(node->outputs().size())) {
makePointerTo(node->outputs()[i], subgraphBlock->outputs()[i]);
}
}
@ -1186,7 +1187,7 @@ bool AliasDb::mayContainAlias(
// Make each value in the `from` list point to its partner in the `to` list
void AliasDb::mapAliases(at::ArrayRef<Value*> from, at::ArrayRef<Value*> to) {
TORCH_INTERNAL_ASSERT(to.size() == from.size());
for (size_t i = 0; i < to.size(); i++) {
for (const auto i : c10::irange(to.size())) {
makePointerTo(from[i], to[i]);
}
}

View File

@ -4,6 +4,7 @@
#include <ATen/core/function.h>
#include <c10/util/Exception.h>
#include <c10/util/StringUtil.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/api/function_impl.h>
#include <torch/csrc/jit/frontend/error_report.h>
#include <torch/csrc/jit/frontend/schema_matching.h>
@ -1291,7 +1292,7 @@ void Node::cloneFrom(Node* s) {
void Node::replaceAllUsesWith(Node* n) {
AT_ASSERT(outputs().size() == n->outputs().size());
size_t nOutputs = outputs().size();
for (size_t i = 0; i < nOutputs; i++) {
for (const auto i : c10::irange(nOutputs)) {
outputs()[i]->replaceAllUsesWith(n->outputs()[i]);
}
}
@ -1615,7 +1616,7 @@ Value* Graph::insert(
Node* Graph::create(NodeKind kind, size_t num_outputs) {
// NB: Node constructor adds node to all_nodes
auto n = new Node(this, kind);
for (size_t i = 0; i < num_outputs; i++) {
for (const auto i : c10::irange(num_outputs)) {
n->addOutput();
}
return n;

View File

@ -1,5 +1,7 @@
#include <c10/util/irange.h>
#include <torch/csrc/jit/ir/subgraph_matcher.h>
#include <torch/csrc/jit/jit_log.h>
#include <regex>
#include <stack>
@ -295,12 +297,12 @@ bool SubgraphMatcher::matchNodes(const Node* n1, Node* n2) {
// Add nodes to the map before calling matchValues to avoid infinite
// recursion.
nodes_map_[n1] = n2;
for (size_t i = 0; i < n1->outputs().size(); i++) {
for (const auto i : c10::irange(n1->outputs().size())) {
if (!matchValues(n1->outputs()[i], n2->outputs()[i])) {
return false;
}
}
for (size_t i = 0; i < n1->inputs().size(); i++) {
for (const auto i : c10::irange(n1->inputs().size())) {
if (!matchValues(n1->inputs()[i], n2->inputs()[i])) {
return false;
}

View File

@ -1,6 +1,7 @@
#include <torch/csrc/jit/passes/bailout_graph.h>
#include <ATen/core/function.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/ir/alias_analysis.h>
#include <torch/csrc/jit/ir/ir_views.h>
#include <torch/csrc/jit/jit_log.h>
@ -110,7 +111,7 @@ struct BailOutGraphBuilderForNode {
const at::ArrayRef<Value*> block_outputs,
const at::ArrayRef<Value*> carried_deps) {
TORCH_INTERNAL_ASSERT(block_outputs.size() == carried_deps.size());
for (size_t i = 0; i < block_outputs.size(); i++) {
for (const auto i : c10::irange(block_outputs.size())) {
auto nv = getOrAddInputForValue(block_outputs[i]);
old_to_new_[carried_deps[i]] = nv;
}

View File

@ -1,5 +1,6 @@
#include <torch/csrc/jit/passes/canonicalize.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/ir/ir_views.h>
namespace torch {
@ -33,7 +34,7 @@ std::shared_ptr<Graph> Canonicalize(
r->appendNode(r_node);
auto outputs = node->outputs();
auto r_outputs = r_node->outputs();
for (size_t i = 0; i < outputs.size(); i++) {
for (const auto i : c10::irange(outputs.size())) {
rn_env[outputs.at(i)] = r_outputs.at(i);
}
if (node->hasAttribute(attr::Subgraph)) {

View File

@ -1,5 +1,6 @@
#include <torch/csrc/jit/passes/dead_code_elimination.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/ir/alias_analysis.h>
#include <torch/csrc/jit/ir/ir_views.h>
#include <torch/csrc/jit/jit_log.h>
@ -114,7 +115,7 @@ class DeadCodeEliminator {
outerNode->kind() == c10::onnx::Loop) {
// Special handling to deal with loop carried dependencies.
auto loop = LoopView(outerNode);
for (size_t i = 0; i < loop.carriedOutputs().size(); i++) {
for (const auto i : c10::irange(loop.carriedOutputs().size())) {
if (outerNode->kind() == c10::onnx::Loop) {
// Special handling for onnx loop.
// The number of body carried inputs and outputs are different.
@ -135,7 +136,7 @@ class DeadCodeEliminator {
liveValues_.insert(loop.nextCond());
} else {
AT_ASSERT(outerNode->outputs().size() == node->inputs().size());
for (size_t i = 0; i < outerNode->outputs().size(); i++) {
for (const auto i : c10::irange(outerNode->outputs().size())) {
auto innerOutput = node->inputs()[i];
auto outerOutput = outerNode->outputs()[i];
if (liveValues_.count(outerOutput)) {

View File

@ -1,5 +1,6 @@
#include <torch/csrc/jit/passes/fixup_trace_scope_blocks.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/frontend/schema_matching.h>
#include <torch/csrc/jit/passes/canonicalize.h>
#include <torch/csrc/jit/passes/dead_code_elimination.h>
@ -200,14 +201,14 @@ struct ConvertTracedAttrReferences {
// the proper attribute.
auto attr_atoms = attr_qualname.atoms();
Value* replaced_value = self;
for (size_t i = 0; i < attr_atoms.size(); i++) {
for (const auto i : c10::irange(attr_atoms.size())) {
if (i < prefix_atoms.size()) {
TORCH_INTERNAL_ASSERT(attr_atoms[i] == prefix_atoms[i]);
} else {
replaced_value = n->owningBlock()->owningGraph()->insertGetAttr(
replaced_value, attr_atoms[i]);
} // if (i < prefix_atoms.size())
} // for (size_t i = 0; i < attr_atoms.size(); i++)
} // for(const auto i : c10::irange(attr_atoms.size()))
n->replaceInput(inp_idx, replaced_value);
local_remaps[inp] = replaced_value;
} else {

View File

@ -2,6 +2,7 @@
#include <torch/csrc/jit/jit_log.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/ir/alias_analysis.h>
#include <torch/csrc/jit/passes/clear_profiling.h>
#include <torch/csrc/jit/passes/inliner.h>
@ -312,7 +313,7 @@ class AttributePropagator {
} else if (attr.isList()) {
c10::List<IValue> elems = std::move(attr).toList();
for (size_t i = 0; i < elems.size(); i++) {
for (const auto i : c10::irange(elems.size())) {
elems.set(i, overrideGradient(elems.extract(i)));
}
attr = std::move(elems);

View File

@ -1,3 +1,4 @@
#include <c10/util/irange.h>
#include <torch/csrc/jit/ir/alias_analysis.h>
#include <torch/csrc/jit/ir/ir_views.h>
#include <torch/csrc/jit/passes/frozen_conv_folding.h>
@ -15,7 +16,7 @@ void OptimizeFrozenGraph(
removeDropout(graph);
// run a couple times to capture Conv -> Mul -> Add etc
if (optimize_numerics) {
for (size_t i = 0; i < 2; i++) {
for (const auto i : c10::irange(2)) {
FoldFrozenConvBatchnorm(graph);
FoldFrozenConvAddOrSub(graph);
FoldFrozenConvMulOrDiv(graph);

View File

@ -3,6 +3,7 @@
#include <ATen/core/interned_strings.h>
#include <c10/core/ScalarType.h>
#include <c10/util/Exception.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/ir/alias_analysis.h>
#include <torch/csrc/jit/ir/constants.h>
#include <torch/csrc/jit/ir/ir.h>
@ -193,7 +194,7 @@ void InplaceMKLDNNSubgraph(std::shared_ptr<Graph> graph) {
// the binary operators (add/mul) are commutative and only take tensor
// inputs, so we can inplace either the first or second input
int64_t reusable_value_index = -1;
for (size_t i = 0; i < 2; i++) {
for (const auto i : c10::irange(2)) {
TORCH_INTERNAL_ASSERT(node->inputs().at(i)->type()->cast<TensorType>());
if (!set_liveness[alias_mapping[node->inputs().at(i)]]->isAfter(node)) {
reusable_value_index = i;
@ -905,7 +906,7 @@ class MKLDNNSubgraphSlicer {
if (n->kind() == aten::add || n->kind() == aten::mul) {
// mkldnn doesn't currently support Tensor-Scalar add
for (size_t i = 0; i < 2; i++) {
for (const auto i : c10::irange(2)) {
if (!n->inputs().at(i)->type()->cast<TensorType>()) {
return false;
}

View File

@ -1,6 +1,7 @@
#include <torch/csrc/jit/passes/graph_fuser.h>
#include <c10/util/Exception.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/codegen/fuser/interface.h>
#include <torch/csrc/jit/frontend/ir_emitter.h>
#include <torch/csrc/jit/ir/alias_analysis.h>
@ -600,7 +601,7 @@ struct GraphFuser {
// a_broadcasted, b_broadcasted = listUnpack(output_list)
// `a_broadcasted` should receive the same aliasing info as `a`
TORCH_INTERNAL_ASSERT(unpack_node->outputs().size() == inputs.size());
for (size_t i = 0; i < inputs.size(); i++) {
for (const auto i : c10::irange(inputs.size())) {
Value* original_input = inputs[i];
Value* broadcasted_output = unpack_node->outputs()[i];
aliasDb_->copyValue(original_input, broadcasted_output);
@ -753,7 +754,7 @@ struct GraphFuser {
WithInsertPoint guard(bchunk->next());
std::vector<Value*> producer_chunk_outputs;
for (size_t i = 0; i < nchunks; i++) {
for (const auto i : c10::irange(nchunks)) {
producer_chunk_outputs.push_back(
bchunk->output(nchunks * producer_index + i));
}
@ -828,7 +829,7 @@ struct GraphFuser {
}
bchunk->removeInput(producer_index);
for (size_t i = 0; i < nchunks; i++) {
for (const auto i : c10::irange(nchunks)) {
bchunk->eraseOutput(nchunks * producer_index);
}

View File

@ -2,6 +2,7 @@
#include <ATen/core/functional.h>
#include <c10/util/Exception.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/ir/constants.h>
#include <torch/csrc/jit/passes/dead_code_elimination.h>
@ -229,7 +230,7 @@ static void flattenOutputs(Node* n, Node* insert_point) {
// is placed at the current insertion point
if (TupleTypePtr tt = output->type()->cast<TupleType>()) {
if (supported_ops.count(n->kind()) > 0) {
for (size_t j = 0; j < tt->elements().size(); j++) {
for (const auto j : c10::irange(tt->elements().size())) {
n->insertOutput(i + 1 + j)->setType(tt->elements()[j]);
}
auto new_tup =

View File

@ -2,6 +2,7 @@
#include <ATen/core/functional.h>
#include <c10/util/Exception.h>
#include <c10/util/irange.h>
#include <torch/csrc/autograd/function.h>
#include <torch/csrc/autograd/symbolic.h>
#include <torch/csrc/jit/ir/constants.h>
@ -316,7 +317,7 @@ void NodeToONNX(
auto cloneNode = [&](Node* node) {
auto n_ = new_block->appendNode(
new_block->owningGraph()->createClone(node, envFn));
for (size_t i = 0; i < node->outputs().size(); i++) {
for (const auto i : c10::irange(node->outputs().size())) {
// n_->outputs()[i]->setType(node->outputs()[i]->type());
env[node->output(i)] = n_->output(i);
}

View File

@ -3,6 +3,7 @@
#include <torch/torch.h>
#include <c10/util/Optional.h>
#include <c10/util/irange.h>
#include <algorithm>
namespace torch {
@ -88,8 +89,7 @@ static void fuseConvBatchNorm(Block* b, ValueToParamPairMap& valsToParamsMap) {
bn_scale = bn_scale.div(bn_var);
// Calculate weight
// NOLINTNEXTLINE(clang-diagnostic-sign-compare)
for (size_t i = 0; i < w_conv.size(0); i++) {
for (const auto i : c10::irange(w_conv.size(0))) {
w_conv[i] = w_conv[i].mul(bn_scale[i]);
}

View File

@ -1,6 +1,7 @@
#include <torch/csrc/jit/passes/onnx/fixup_onnx_controlflow.h>
#include <aten/src/ATen/InitialTensorOptions.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/jit_log.h>
#include <torch/csrc/jit/passes/dead_code_elimination.h>
#include <torch/csrc/jit/passes/onnx/peephole.h>
@ -328,7 +329,7 @@ void ONNXFixupUninitializedOutput(Node* node) {
// Infer shape and type for subblock outputs
TORCH_INTERNAL_ASSERT(
then_block->outputs().size() == else_block->outputs().size())
for (size_t i = 0; i < else_block->outputs().size(); i++) {
for (const auto i : c10::irange(else_block->outputs().size())) {
Value* then_block_output = then_block->outputs()[i];
Value* else_block_output = else_block->outputs()[i];

View File

@ -34,7 +34,7 @@ static bool isStaticCondition(Node* node) {
compare_node->kind() == onnx::Less ||
compare_node->kind() == onnx::GreaterOrEqual ||
compare_node->kind() == onnx::LessOrEqual) {
for (size_t i = 0; i < compare_node->inputs().size(); i++) {
for (const auto i : c10::irange(compare_node->inputs().size())) {
auto sym = compare_node->inputs()[i]
->type()
->castRaw<TensorType>()
@ -74,7 +74,7 @@ static c10::optional<int> findIndex(
c10::ArrayRef<torch::jit::Value*> outputs,
Value* input) {
c10::optional<int> idx = c10::nullopt;
for (size_t i = 0; i < outputs.size(); i++) {
for (const auto i : c10::irange(outputs.size())) {
if (input == outputs[i]) {
idx = i;
break;
@ -122,7 +122,7 @@ static bool constantFoldedConditionValue(Node* node) {
TORCH_INTERNAL_ASSERT(compare_node != nullptr);
ScalarTypeAnalysisNodeForONNX(compare_node);
std::vector<at::Tensor> inputs;
for (size_t i = 0; i < compare_node->inputs().size(); i++) {
for (const auto i : c10::irange(compare_node->inputs().size())) {
auto input_node = compare_node->inputs()[i]->node();
if (input_node->kind() == onnx::Constant) {
const at::Tensor& val = input_node->t(attr::value);

View File

@ -1,6 +1,7 @@
#include <torch/csrc/jit/passes/onnx/peephole.h>
#include <c10/util/Exception.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/passes/dead_code_elimination.h>
#include <torch/csrc/jit/passes/onnx/helper.h>
@ -94,7 +95,7 @@ c10::optional<size_t> fusibleExpandTo(
return c10::nullopt;
}
for (size_t i = 0; i < from.size(); i++) {
for (const auto i : c10::irange(from.size())) {
auto fdim = from[from.size() - 1 - i];
auto tdim = to[to.size() - 1 - i];
if (fdim != 1 && fdim != tdim) {
@ -717,7 +718,7 @@ static void fuseListConstructListUnpack(Block* b) {
}
if (it->kind() == prim::ListUnpack &&
it->input()->node()->kind() == prim::ListConstruct) {
for (size_t i = 0; i < it->outputs().size(); i++) {
for (const auto i : c10::irange(it->outputs().size())) {
auto output = it->outputs().at(i);
output->replaceAllUsesWith(it->input()->node()->inputs().at(i));
}

View File

@ -1,5 +1,6 @@
#include <torch/csrc/jit/passes/onnx/preprocess_for_onnx.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/jit_log.h>
#include <torch/csrc/jit/passes/onnx/helper.h>
@ -192,7 +193,7 @@ static void fuseListAndListUnpack(Block* b) {
fuseListAndListUnpack(child_block);
}
if (it->kind() == prim::ListUnpack) {
for (size_t i = 0; i < it->outputs().size(); i++) {
for (const auto i : c10::irange(it->outputs().size())) {
auto output = it->outputs().at(i);
if (it->inputs().size() == 1 &&
it->input()->node()->kind() != prim::ListConstruct &&

View File

@ -9,6 +9,8 @@
#include <torch/csrc/jit/passes/onnx/helper.h>
#include <torch/csrc/jit/passes/onnx/pattern_conversion/pattern_encapsulation.h>
#include <c10/util/irange.h>
#include <limits>
namespace torch {

View File

@ -373,7 +373,7 @@ bool IsBlockReturnTypeSame(Node* n) {
TORCH_INTERNAL_ASSERT(n->kind() == ::c10::onnx::If);
auto then_block = n->blocks()[0];
auto else_block = n->blocks()[1];
for (size_t i = 0; i < n->outputs().size(); i++) {
for (const auto i : c10::irange(n->outputs().size())) {
// check the type
auto then_block_type = then_block->outputs()[i]->type();
auto else_block_type = else_block->outputs()[i]->type();
@ -598,7 +598,7 @@ c10::optional<std::vector<int64_t>> GetValueFromListConstructNode(
Node* lc_node) {
auto rank = lc_node->inputs().size();
std::vector<int64_t> shape_size;
for (size_t i = 0; i < rank; i++) {
for (const auto i : c10::irange(rank)) {
if (TensorTypePtr shape_type =
lc_node->input(i)->type()->cast<TensorType>()) {
if (ConstantValueMap::HasValue(lc_node->input(i)->debugName())) {
@ -1157,7 +1157,7 @@ void SpecialPostProcess(Node* n) {
if (!IsBlockReturnTypeSame(n) && IsStaticConditionONNX(n)) {
auto cond = ConditionValueONNX(n);
auto block_idx = cond ? 0 : 1;
for (size_t i = 0; i < n->outputs().size(); i++) {
for (const auto i : c10::irange(n->outputs().size())) {
n->outputs()[i]->setType(
n->blocks()[block_idx]->outputs()[i]->type());
}

View File

@ -1,6 +1,7 @@
#include <torch/csrc/jit/passes/peephole.h>
#include <ATen/core/jit_type.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/ir/alias_analysis.h>
#include <torch/csrc/jit/ir/ir_views.h>
#include <torch/csrc/jit/jit_log.h>
@ -375,7 +376,7 @@ bool FuseAddMM(Block* block) {
// == 0 for it).
if (node->get<at::Scalar>(attr::alpha).value().toDouble() == 1.) {
// Look for mm from both sides of the add
for (size_t mm_side = 0; mm_side < 2; mm_side++) {
for (const auto mm_side : c10::irange(2)) {
// Add will accept tensors of mismatched scalar types, as long as
// one of them is a scalar. Addmm will throw in that case, so we can
// only perform this fusion if we're sure that it is correct, and

View File

@ -1,6 +1,7 @@
#include <torch/csrc/jit/passes/shape_analysis.h>
#include <c10/util/Exception.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/frontend/error_report.h>
#include <torch/csrc/jit/ir/alias_analysis.h>
#include <torch/csrc/jit/ir/constants.h>
@ -238,7 +239,7 @@ class ShapePropagator {
c10::ScalarType dimmed = c10::ScalarType::Undefined;
c10::ScalarType zerodim = c10::ScalarType::Undefined;
// binary arithmetic ops, more than 2 args is alpha.
for (size_t i = 0; i < 2; i++) {
for (const auto i : c10::irange(2)) {
auto dtt = node->inputs()[i]->type()->expect<TensorType>();
auto inputDtype = dtt->scalarType();
if (!dtt || !inputDtype) {

View File

@ -8,6 +8,7 @@
#include <torch/csrc/jit/runtime/profiling_record.h>
#include <ATen/core/interned_strings.h>
#include <c10/util/irange.h>
namespace torch {
namespace jit {
@ -25,7 +26,7 @@ void insertProfileNodesForSpecializeAutogradZero(
ProfilingRecord* pr) {
for (auto it = block->nodes().begin(); it != block->nodes().end(); ++it) {
auto n = *it;
for (size_t offset = 0; offset < n->inputs().size(); offset++) {
for (const auto offset : c10::irange(n->inputs().size())) {
auto i = n->input(offset);
if (i->type()->cast<OptionalType>() && hasGradSumToSizeUses(i)) {
// here we are profile the definition instead of the use,
@ -293,7 +294,7 @@ struct AutogradZeroSpecializer {
graph_->insertNode(versioning_if);
auto ret = graph_->return_node();
for (size_t i = 0; i < ret->inputs().size(); i++) {
for (const auto i : c10::irange(ret->inputs().size())) {
auto ogo = ret->input(i);
auto ngo = versioning_if->output(i);
ngo->copyMetadata(ogo);

View File

@ -3,6 +3,8 @@
#include <torch/csrc/jit/ir/irparser.h>
#include <torch/csrc/jit/ir/subgraph_matcher.h>
#include <c10/util/irange.h>
namespace torch {
namespace jit {
@ -171,7 +173,7 @@ void SubgraphRewriter::rewriteSinglePatternOnGraph(
// Record all planned rewritings
AT_ASSERT(outputs.size() == new_outputs.size());
for (size_t idx = 0; idx < outputs.size(); idx++) {
for (const auto idx : c10::irange(outputs.size())) {
values_to_rewrite.push_back(outputs[idx]);
rewrite_map[outputs[idx]] = new_outputs[idx];
}

View File

@ -4,6 +4,8 @@
#include <torch/csrc/jit/passes/normalize_ops.h>
#include <torch/csrc/jit/runtime/operator.h>
#include <c10/util/irange.h>
namespace torch {
namespace jit {
namespace {
@ -91,8 +93,8 @@ struct AliasAndIValue {
// No inputs should alias each other
void checkInputPreconditions(const Stack& inputs) {
for (size_t i = 0; i < inputs.size(); i++) {
for (size_t j = 0; j < inputs.size(); j++) {
for (const auto i : c10::irange(inputs.size())) {
for (const auto j : c10::irange(inputs.size())) {
if (i == j) {
continue;
}
@ -133,7 +135,7 @@ void checkWrites(
const std::vector<AliasAndIValue>& inputs,
const std::vector<IValue>& deepCopiedInputs) {
AT_ASSERT(inputs.size() == deepCopiedInputs.size());
for (size_t i = 0; i < inputs.size(); i++) {
for (const auto i : c10::irange(inputs.size())) {
const auto& input = inputs[i];
const auto& deepCopiedInput = deepCopiedInputs[i];
if (!input.aliasInfo || !input.aliasInfo->isWrite()) {
@ -242,7 +244,7 @@ void checkAliasAnnotation(
const auto schema = node->schema();
std::vector<AliasAndIValue> inputsToCheck;
for (size_t i = 0; i < schema.arguments().size(); i++) {
for (const auto i : c10::irange(schema.arguments().size())) {
inputsToCheck.emplace_back(
schema.arguments().at(i).alias_info(), stack.at(i));
}
@ -257,7 +259,7 @@ void checkAliasAnnotation(
const auto outputs = std::move(stack);
std::vector<AliasAndIValue> outputsToCheck;
for (size_t i = 0; i < schema.returns().size(); i++) {
for (const auto i : c10::irange(schema.returns().size())) {
outputsToCheck.emplace_back(
schema.returns().at(i).alias_info(), outputs.at(i));
}

View File

@ -2,6 +2,8 @@
#include <torch/csrc/jit/passes/canonicalize.h>
#include <c10/util/irange.h>
namespace torch {
namespace jit {
namespace SubgraphUtils {
@ -300,7 +302,7 @@ void mergeNodeIntoSubgraph(
}
// Add n's outputs to the group node and inner subgraph outputs.
for (size_t i = 0; i < toMerge->outputs().size(); i++) {
for (const auto i : c10::irange(toMerge->outputs().size())) {
auto oldOutput = toMerge->outputs()[i];
auto newOutput = mergedNode->outputs()[i];
subgraph->registerOutput(newOutput);

View File

@ -2,6 +2,8 @@
#include <torch/csrc/jit/python/python_dict.h>
#include <torch/csrc/jit/python/python_ivalue.h>
#include <c10/util/irange.h>
namespace torch {
namespace jit {
@ -182,7 +184,7 @@ IValue toIValue(py::handle obj, const TypePtr& type, c10::optional<int32_t> N) {
c10::StrongTypePtr(cu, classType), numAttrs);
// 2. copy all the contained types
for (size_t slot = 0; slot < numAttrs; slot++) {
for (const auto slot : c10::irange(numAttrs)) {
const auto& attrType = classType->getAttribute(slot);
const auto& attrName = classType->getAttributeName(slot);

View File

@ -2,6 +2,7 @@
#include <ATen/core/ivalue.h>
#include <c10/util/Exception.h>
#include <c10/util/irange.h>
#include <torch/csrc/autograd/grad_mode.h>
#include <torch/csrc/jit/frontend/tracer.h>
#include <torch/csrc/jit/ir/ir.h>
@ -160,7 +161,7 @@ struct CaptureList {
case CAPTURE_LIST: {
c10::List<at::Tensor> lst;
auto size = *size_it++;
for (size_t i = 0; i < size; i++) {
for (const auto i : c10::irange(size)) {
lst.emplace_back(var_capture_it->unpack(saved_for));
var_capture_it++;
}
@ -980,12 +981,12 @@ Node* replaceBlockWithFallbackGraph(Block* b, ArrayRef<Value*> inputs) {
fallback->g_(attr::Subgraph, graph);
b->prependNode(fallback);
for (size_t i = 0; i < inputs.size(); i++) {
for (const auto i : c10::irange(inputs.size())) {
graph->inputs()[i]->setType(inputs[i]->type());
graph->inputs()[i]->copyMetadata(inputs[i]);
}
for (size_t i = 0; i < b->outputs().size(); i++) {
for (const auto i : c10::irange(b->outputs().size())) {
fallback->output(i)->setType(b->outputs()[i]->type());
fallback->output(i)->copyMetadata(b->outputs()[i]);
b->replaceOutput(i, fallback->output(i));

View File

@ -5,6 +5,7 @@
#include <ATen/record_function.h>
#include <c10/core/thread_pool.h>
#include <c10/util/Exception.h>
#include <c10/util/irange.h>
#include <torch/csrc/autograd/edge.h>
#include <torch/csrc/autograd/grad_mode.h>
#include <torch/csrc/autograd/profiler.h>

View File

@ -1,5 +1,6 @@
#include <torch/csrc/jit/runtime/profiling_graph_executor_impl.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/jit_log.h>
#include <torch/csrc/jit/passes/bailout_graph.h>
#include <torch/csrc/jit/passes/batch_mm.h>
@ -132,7 +133,7 @@ static bool needsGradientInProfilingMode(Block* b) {
bool guardDifferentiableGraph(Node* dnode) {
auto gi = dnode->g(attr::Subgraph)->inputs();
bool all_inputs_seen = true;
for (size_t i = 0; i < gi.size(); i++) {
for (const auto i : c10::irange(gi.size())) {
auto ty = gi[i]->type()->cast<TensorType>();
if (ty) {
auto n = gi[i]->uses().at(0).user;
@ -706,7 +707,7 @@ void ProfilingGraphExecutorImpl::replaceFallbackGraphWithFallbackFunction(
WithInsertPoint wip{*it};
auto function_call = insertFallbackFunctionCall(
b->owningGraph(), fallback_func, it->inputs());
for (size_t i = 0; i < function_call->outputs().size(); i++) {
for (const auto i : c10::irange(function_call->outputs().size())) {
it->output(i)->replaceAllUsesWith(function_call->output(i));
}
it.destroyCurrent();

View File

@ -1,6 +1,7 @@
#include <torch/csrc/jit/runtime/profiling_record.h>
#include <ATen/core/interned_strings.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/jit_log.h>
#include <torch/csrc/jit/passes/clear_profiling.h>
#include <torch/csrc/jit/passes/constant_propagation.h>
@ -61,7 +62,7 @@ bool ShapeSymbolTable::bindSymbolicShapes(
if (*sym_shapes.rank() != new_sizes.size()) {
return false;
}
for (size_t i = 0; i < new_sizes.size(); i++) {
for (const auto i : c10::irange(new_sizes.size())) {
auto symbol = (*sym_shapes.sizes())[i];
if (!symbol.is_static()) {
continue;
@ -137,7 +138,7 @@ c10::SymbolicShape ProfilingRecord::mergeSymbolicShapes(
new_sizes.rank().has_value() && sym_shapes.rank().has_value() &&
*new_sizes.rank() == *sym_shapes.rank());
for (size_t i = 0; i < *new_sizes.rank(); i++) {
for (const auto i : c10::irange(*new_sizes.rank())) {
if (!(*sym_shapes.sizes())[i].is_static() ||
!(*new_sizes.sizes())[i].is_static()) {
new_symbols.emplace_back();
@ -260,7 +261,7 @@ void ProfilingRecord::removeProfileCounter(Block* b) {
void ProfilingRecord::instrumentBlock(Block* block) {
for (auto it = block->nodes().begin(); it != block->nodes().end(); ++it) {
auto n = *it;
for (size_t offset = 0; offset < n->inputs().size(); offset++) {
for (const auto offset : c10::irange(n->inputs().size())) {
auto i = n->input(offset);
if (i->type()->kind() == c10::TypeKind::TensorType &&
(needsProfiledInputs(n) || needsProfiledOutput(i->node()))) {

View File

@ -398,7 +398,7 @@ void listMulIntLeftInPlace(Stack* stack) {
} else if (n > 1) {
size_t list_size = list.size();
for (int64_t i = 1; i < n; i++) {
for (size_t j = 0; j < list_size; j++) {
for (const auto j : c10::irange(list_size)) {
list.push_back(list.get(j));
}
}

View File

@ -69,7 +69,7 @@ RegisterOperators reg(
return [rg_props](Stack* stack) {
auto num_inputs = rg_props.size();
// Check every input's shape against profiled (expected) shape.
for (size_t i = 0; i < num_inputs; i++) {
for (const auto i : c10::irange(num_inputs)) {
auto& input = peek(stack, i, num_inputs);
const auto& t = input.toTensor();
if (rg_props[i] != t.requires_grad()) {

View File

@ -689,7 +689,7 @@ std::vector<at::Tensor> StaticRuntime::operator()(
const std::vector<at::Tensor>& inps) {
std::vector<c10::IValue> stack;
stack.resize(inps.size());
for (size_t i = 0; i < inps.size(); i++) {
for (const auto i : c10::irange(inps.size())) {
stack[i] = inps[i];
}
@ -730,11 +730,11 @@ c10::IValue StaticRuntime::operator()(
"with StaticModule(const torch::jit::Module& m) instead.");
std::vector<c10::IValue> s = args;
static_module_.schema()->checkAndNormalizeInputs(s, kwargs);
for (size_t i = 0; i < s.size(); i++) {
for (const auto i : c10::irange(s.size())) {
Input(i) = std::move(s[i]);
}
} else {
for (size_t i = 0; i < args.size(); i++) {
for (const auto i : c10::irange(args.size())) {
Input(i) = args[i];
}
}
@ -797,7 +797,7 @@ void StaticRuntime::benchmark(
IndividualMetrics results =
benchmark_individual_ops(args, kwargs, warmup_runs, main_runs);
for (size_t i = 0; i < nodes_.size(); i++) {
for (const auto i : c10::irange(nodes_.size())) {
const Node* node = nodes_[i].node();
std::cout << "Node #" << i << ": " << results.time_per_node[i]
<< " ms/iter, ";
@ -895,7 +895,7 @@ StaticRuntime::IndividualMetrics StaticRuntime::benchmark_individual_ops(
"with StaticModule(const torch::jit::Module& m) instead.");
static_module_.schema()->checkAndNormalizeInputs(stack, kwargs);
}
for (size_t i = 0; i < stack.size(); i++) {
for (const auto i : c10::irange(stack.size())) {
Input(i) = stack[i];
}
results.setup_time = timer.MilliSeconds();
@ -906,8 +906,8 @@ StaticRuntime::IndividualMetrics StaticRuntime::benchmark_individual_ops(
}
// main runs
for (int k = 0; k < main_runs; k++) {
for (size_t i = 0; i < stack.size(); i++) {
for (const auto k : c10::irange(main_runs)) {
for (const auto i : c10::irange(stack.size())) {
Input(i) = stack[i];
}
timer.Start();
@ -917,7 +917,7 @@ StaticRuntime::IndividualMetrics StaticRuntime::benchmark_individual_ops(
float millis = timer.MilliSeconds();
results.memory_alloc_time += millis;
for (size_t i = 0; i < nodes_.size(); i++) {
for (const auto i : c10::irange(nodes_.size())) {
timer.Start();
nodes_[i].run();
millis = timer.MilliSeconds();
@ -969,7 +969,7 @@ StaticRuntime::IndividualMetrics StaticRuntime::benchmark_individual_ops(
}
// post processing
for (size_t i = 0; i < nodes_.size(); i++) {
for (const auto i : c10::irange(nodes_.size())) {
const Node* node = nodes_[i].node();
std::string kind = std::string(node->kind().toQualString());
results.time_per_node[i] /= static_cast<float>(main_runs);
@ -998,15 +998,15 @@ void StaticRuntime::check_for_memory_leak(bool output_returned) {
}
// check for inputs
for (size_t i = 0; i < inputs_.size(); i++) {
for (const auto i : c10::irange(inputs_.size())) {
TORCH_CHECK(inputs_[i].isNone(), "Input ", i, " was not cleaned up");
}
std::unordered_set<const IValue*> output_ivalues(
outputs_.begin(), outputs_.end());
for (size_t n = 0; n < nodes_.size(); n++) {
for (const auto n : c10::irange(nodes_.size())) {
auto& pnode = nodes_[n];
for (size_t i = 0; i < pnode.outputs().size(); i++) {
for (const auto i : c10::irange(pnode.outputs().size())) {
const IValue* ival = &pnode.Output(i);
const Value* val = pnode.node()->output(i);
const std::string error_msg = "Output " + c10::to_string(i) + ", %" +
@ -1261,7 +1261,7 @@ void ProcessedNode::run() {
std::vector<IValue> stack;
const size_t size = node_->inputs().size();
stack.reserve(size);
for (size_t i = 0; i < size; i++) {
for (const auto i : c10::irange(size)) {
stack.emplace_back(Input(i));
}

View File

@ -243,7 +243,7 @@ REGISTER_OPERATOR_FUNCTOR(
const size_t size = p_node->inputs().size();
c10::List<IValue> vals(type.getElementType());
vals.reserve(size);
for (size_t i = 0; i < size; i++) {
for (const auto i : c10::irange(size)) {
vals.push_back(p_node->Input(i));
}
p_node->Output(0) = std::move(vals);
@ -265,7 +265,7 @@ REGISTER_OPERATOR_FUNCTOR(
const size_t size = p_node->inputs().size();
std::vector<IValue> vals;
vals.reserve(size);
for (size_t i = 0; i < size; i++) {
for (const auto i : c10::irange(size)) {
vals.push_back(p_node->Input(i));
}
p_node->Output(0) = c10::ivalue::Tuple::create(std::move(vals));
@ -1035,7 +1035,7 @@ std::function<void(ProcessedNode*)> getNativeOperation(Node* n) {
std::vector<IValue> stack;
const size_t size = p_node->inputs().size();
stack.reserve(size);
for (size_t i = 0; i < size; i++) {
for (const auto i : c10::irange(size)) {
stack.emplace_back(p_node->Input(i));
}
// run op
@ -1055,7 +1055,7 @@ std::function<void(ProcessedNode*)> getNativeOperation(Node* n) {
std::vector<IValue> stack;
const size_t size = p_node->inputs().size();
stack.reserve(size);
for (size_t i = 0; i < size; i++) {
for (const auto i : c10::irange(size)) {
stack.emplace_back(p_node->Input(i));
}
// run op
@ -1088,7 +1088,7 @@ std::function<void(ProcessedNode*)> getNativeOperation(Node* n) {
std::vector<IValue> stack;
const size_t size = p_node->inputs().size();
stack.reserve(size);
for (size_t i = 0; i < size; i++) {
for (const auto i : c10::irange(size)) {
stack.emplace_back(p_node->Input(i));
}
// run op
@ -1105,7 +1105,7 @@ std::function<void(ProcessedNode*)> getNativeOperation(Node* n) {
std::vector<IValue> stack;
const size_t size = p_node->inputs().size();
stack.reserve(size);
for (size_t i = 0; i < size; i++) {
for (const auto i : c10::irange(size)) {
stack.emplace_back(p_node->Input(i));
}
// run op

View File

@ -6,6 +6,7 @@
#include <c10/util/Exception.h>
#include <c10/util/Optional.h>
#include <c10/util/accumulate.h>
#include <c10/util/irange.h>
#include <torch/csrc/autograd/symbolic.h>
#include <torch/csrc/jit/jit_log.h>
#include <torch/csrc/jit/passes/dead_code_elimination.h>
@ -27,6 +28,7 @@
#include <set>
#include <string>
#include <vector>
namespace torch {
namespace jit {
@ -347,7 +349,7 @@ void EncoderBase::EncodeValueInfo(
if (t->dim()) {
onnx::TensorShapeProto* shape = tensor_type->mutable_shape();
auto sizes = t->symbolic_sizes().sizes().value();
for (size_t i = 0; i < sizes.size(); i++) {
for (const auto i : c10::irange(sizes.size())) {
shape->add_dim();
if ((dynamic_axes.find(name) != dynamic_axes.end()) &&
(dynamic_axes.at(name).find(i) != dynamic_axes.at(name).end())) {

View File

@ -3,6 +3,7 @@
#include <ATen/core/functional.h>
#include <ATen/core/ivalue_inl.h>
#include <c10/util/Exception.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/serialization/import_export_helpers.h>
#if !defined(C10_MOBILE) && !defined(C10_DISABLE_LEGACY_IMPORT)
#include <torch/csrc/jit/serialization/import_legacy.h>
@ -39,7 +40,7 @@ using caffe2::serialize::ReadAdapterInterface;
void postSetStateValidate(const IValue& v) {
auto obj = v.toObject();
const auto& objType = obj->type();
for (size_t i = 0; i < objType->numAttributes(); i++) {
for (const auto i : c10::irange(objType->numAttributes())) {
const auto& attrType = objType->getAttribute(i);
const auto& attrName = objType->getAttributeName(i);
const auto& slot = obj->getSlot(i);

View File

@ -16,6 +16,7 @@
#include <caffe2/serialize/inline_container.h>
#include <ATen/ATen.h>
#include <c10/util/irange.h>
namespace torch {
namespace jit {
@ -378,7 +379,7 @@ Module ScriptModuleDeserializer::LEGACY_convertModule(
}
}
for (size_t i = 0; i < numPushed; i++) {
for (const auto i : c10::irange(numPushed)) {
LEGACY_moduleStack_.pop_back();
}
return module;

View File

@ -3,6 +3,7 @@
#include <ATen/core/qualified_name.h>
#include <c10/util/Exception.h>
#include <c10/util/StringUtil.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/api/module.h>
#include <torch/csrc/jit/frontend/error_report.h>
#include <torch/csrc/jit/frontend/versioned_symbols.h>
@ -1336,7 +1337,7 @@ struct PythonPrintImpl {
std::vector<std::string> buffers;
// Populate the __parameters__ field. This tells the importer which
// attributes are parameters.
for (size_t i = 0; i < numAttrs; i++) {
for (const auto i : c10::irange(numAttrs)) {
if (classType->is_parameter(i)) {
params.push_back(classType->getAttributeName(i));
}
@ -1378,7 +1379,7 @@ struct PythonPrintImpl {
}
}
for (size_t i = 0; i < numAttrs; i++) {
for (const auto i : c10::irange(numAttrs)) {
const auto& name = classType->getAttributeName(i);
const auto& type = classType->getAttribute(i);
registerClassDependencies(type);
@ -1406,7 +1407,7 @@ struct PythonPrintImpl {
}
size_t numConstants = classType->numConstants();
for (size_t i = 0; i < numConstants; i++) {
for (const auto i : c10::irange(numConstants)) {
const auto& name = classType->getConstantName(i);
IValue v = classType->getConstant(i);

View File

@ -8,6 +8,8 @@
#include <torch/csrc/jit/tensorexpr/ir_visitor.h>
#include <torch/csrc/jit/tensorexpr/stmt.h>
#include <c10/util/irange.h>
namespace torch {
namespace jit {
namespace tensorexpr {
@ -163,7 +165,7 @@ std::vector<const Expr*> getBoundExtents(
// Find the safe size of the temprorary buffer by determining the outer
// extents of a union of all bounds.
for (const TensorAccessBoundsInfo& p : infos) {
for (size_t i = 0; i < p.start.size(); i++) {
for (const auto i : c10::irange(p.start.size())) {
if (starts.size() <= i) {
starts.push_back(p.start[i]);
} else {

View File

@ -158,7 +158,7 @@ class SimpleIREvaluatorImpl : public IRVisitor {
std::vector<T> lhs_v = lhs.as_vec<T>();
std::vector<T> rhs_v = rhs.as_vec<T>();
std::vector<T> result_v(lhs_v.size());
for (size_t i = 0; i < lhs_v.size(); i++) {
for (const auto i : c10::irange(lhs_v.size())) {
switch (op_type) {
case IRNodeType::kAdd:
result_v[i] = lhs_v[i] + rhs_v[i];
@ -197,7 +197,7 @@ class SimpleIREvaluatorImpl : public IRVisitor {
std::vector<T> lhs_v = lhs.as_vec<T>();
std::vector<T> rhs_v = rhs.as_vec<T>();
std::vector<T> result_v(lhs_v.size());
for (size_t i = 0; i < lhs_v.size(); i++) {
for (const auto i : c10::irange(lhs_v.size())) {
switch (op_type) {
case IRNodeType::kAnd:
result_v[i] = lhs_v[i] & rhs_v[i];
@ -224,7 +224,7 @@ class SimpleIREvaluatorImpl : public IRVisitor {
std::vector<T> lhs_v = lhs.as_vec<T>();
std::vector<T> rhs_v = rhs.as_vec<T>();
std::vector<T> result_v(lhs_v.size());
for (size_t i = 0; i < lhs_v.size(); i++) {
for (const auto i : c10::irange(lhs_v.size())) {
switch (op_type) {
case IRNodeType::kLshift: {
typename std::make_unsigned<T>::type a =
@ -255,7 +255,7 @@ class SimpleIREvaluatorImpl : public IRVisitor {
std::vector<R> ret_val1_v = retval1.as_vec<R>();
std::vector<R> ret_val2_v = retval2.as_vec<R>();
std::vector<R> result_v(lhs_v.size());
for (size_t i = 0; i < lhs_v.size(); i++) {
for (const auto i : c10::irange(lhs_v.size())) {
switch (cmp_op) {
case CompareSelectOperation::kEQ:
result_v[i] = (lhs_v[i] == rhs_v[i]) ? ret_val1_v[i] : ret_val2_v[i];
@ -623,7 +623,7 @@ class SimpleIREvaluatorImpl : public IRVisitor {
case ScalarType::Name: { \
Type* ptr##Name = static_cast<Type*>(ptr); \
std::vector<Type> v(index.size()); \
for (size_t i = 0; i < index.size(); i++) { \
for (const auto i : c10::irange(index.size())) { \
v[i] = ptr##Name[index[i]]; \
} \
value_ = Value(v); \
@ -657,7 +657,7 @@ class SimpleIREvaluatorImpl : public IRVisitor {
throw malformed_input("value size mismatch in Store", v); \
} \
Type* ptr##Name = static_cast<Type*>(ptr); \
for (size_t i = 0; i < index.size(); i++) { \
for (const auto i : c10::irange(index.size())) { \
ptr##Name[index[i]] = value[i]; \
} \
} break;
@ -748,11 +748,11 @@ class SimpleIREvaluatorImpl : public IRVisitor {
std::vector<TReturn> result(v1.size(), -1);
if (values.size() == 1ULL) {
for (size_t i = 0; i < v1.size(); i++) {
for (const auto i : c10::irange(v1.size())) {
result[i] = compute_intrinsics<TReturn>(v->op_type(), v1[i]);
}
} else {
for (size_t i = 0; i < v1.size(); i++) {
for (const auto i : c10::irange(v1.size())) {
result[i] = compute_intrinsics<TReturn>(v->op_type(), v1[i], v2[i]);
}
}
@ -987,7 +987,7 @@ void SimpleIREvaluator::call_raw(const std::vector<void*>& args) {
if (args.size() != buffer_args().size()) {
throw malformed_input("bad args in IREvaluator call");
}
for (size_t i = 0; i < args.size(); i++) {
for (const auto i : c10::irange(args.size())) {
bindArg(buffer_args()[i], args[i]);
}
stmt()->accept(&*impl_);

View File

@ -30,7 +30,7 @@ std::vector<at::Tensor> constructTensors(
}
std::vector<at::Tensor> tensors;
for (size_t i = 0; i < buf_data_vec.size(); i++) {
for (const auto i : c10::irange(buf_data_vec.size())) {
auto options = at::TensorOptions()
.dtype(buf_dtypes_vec[i])
.layout(at::kStrided)

View File

@ -2,6 +2,8 @@
#include <torch/csrc/jit/tensorexpr/tensor.h>
#include <c10/util/irange.h>
namespace torch {
namespace jit {
namespace tensorexpr {
@ -93,7 +95,7 @@ const Expr* flatten_index(
}
const Expr* total_index = new IntImm(0);
for (size_t i = 0; i < ndim; i++) {
for (const auto i : c10::irange(ndim)) {
total_index = new Add(total_index, new Mul(indices[i], strides[i]));
}
return total_index;
@ -187,7 +189,7 @@ ExternalCall* ExternalCall::make(
std::vector<const Expr*> ExprHandleVectorToExprVector(
const std::vector<ExprHandle>& v) {
std::vector<const Expr*> result(v.size());
for (size_t i = 0; i < v.size(); i++) {
for (const auto i : c10::irange(v.size())) {
result[i] = v[i].node();
}
return result;
@ -196,7 +198,7 @@ std::vector<const Expr*> ExprHandleVectorToExprVector(
std::vector<ExprHandle> ExprVectorToExprHandleVector(
const std::vector<const Expr*>& v) {
std::vector<ExprHandle> result(v.size());
for (size_t i = 0; i < v.size(); i++) {
for (const auto i : c10::irange(v.size())) {
result[i] = ExprHandle(v[i]);
}
return result;
@ -205,7 +207,7 @@ std::vector<ExprHandle> ExprVectorToExprHandleVector(
std::vector<const Var*> VarHandleVectorToVarVector(
const std::vector<VarHandle>& v) {
std::vector<const Var*> result(v.size());
for (size_t i = 0; i < v.size(); i++) {
for (const auto i : c10::irange(v.size())) {
result[i] = v[i].node();
}
return result;
@ -214,7 +216,7 @@ std::vector<const Var*> VarHandleVectorToVarVector(
std::vector<VarHandle> VarVectorToVarHandleVector(
const std::vector<const Var*>& v) {
std::vector<VarHandle> result(v.size());
for (size_t i = 0; i < v.size(); i++) {
for (const auto i : c10::irange(v.size())) {
result[i] = VarHandle(v[i]);
}
return result;

View File

@ -195,7 +195,7 @@ const Expr* IRMutator::mutate(Buf* v) {
std::vector<const Expr*> dims_old = v->dims();
std::vector<const Expr*> dims_new(dims_old.size());
for (size_t i = 0; i < dims_old.size(); i++) {
for (const auto i : c10::irange(dims_old.size())) {
dims_new[i] = dims_old[i]->accept_mutator(this);
any_change |= (dims_new[i] != dims_old[i]);
}

View File

@ -413,7 +413,7 @@ void IRPrinter::visit(const Allocate* v) {
<< "); // dtype=" << v->dtype().ToCppString();
os() << ", dims=[";
const std::vector<const Expr*>& dims = v->dims();
for (size_t i = 0; i < dims.size(); i++) {
for (const auto i : c10::irange(dims.size())) {
if (i != 0) {
os() << ", ";
}
@ -583,7 +583,7 @@ std::string to_string(const Tensor* t) {
std::ostringstream oss;
// TODO: move this to Buf printer
oss << "Tensor " << t->buf()->name_hint() << "[";
for (size_t i = 0; i < t->buf()->ndim(); i++) {
for (const auto i : c10::irange(t->buf()->ndim())) {
if (i != 0) {
oss << ", ";
}

View File

@ -3,6 +3,7 @@
#include <ATen/ExpandUtils.h>
#include <ATen/TensorGeometry.h>
#include <c10/util/irange.h>
#include <c10/util/string_utils.h>
#include <torch/csrc/jit/jit_log.h>
#include <torch/csrc/jit/passes/utils/subgraph_utils.h>
@ -502,7 +503,7 @@ ArgValue TensorExprKernel::toArg(const torch::jit::Value* v) const {
std::vector<ExprHandle> TensorExprKernel::sizesFromVaryingShape(
const c10::VaryingShape<int64_t>& shape) {
std::vector<ExprHandle> dims;
for (size_t i = 0; i < *shape.size(); i++) {
for (const auto i : c10::irange(*shape.size())) {
dims.push_back(IntImm::make(*shape[i]));
}
return dims;
@ -603,7 +604,7 @@ std::vector<ExprHandle> TensorExprKernel::inferSizesForValue(
case aten::remainder:
case aten::atan2: {
std::vector<std::vector<ExprHandle>> shapes;
for (size_t idx = 0; idx < 2; idx++) {
for (const auto idx : c10::irange(2)) {
torch::jit::Value* inp = v->node()->input(idx);
shapes.push_back(sizesForValue(inp));
}
@ -614,7 +615,7 @@ std::vector<ExprHandle> TensorExprKernel::inferSizesForValue(
case aten::threshold:
case aten::where: {
std::vector<std::vector<ExprHandle>> shapes;
for (size_t idx = 0; idx < 3; idx++) {
for (const auto idx : c10::irange(3)) {
torch::jit::Value* inp = v->node()->input(idx);
shapes.push_back(sizesForValue(inp));
}
@ -623,7 +624,7 @@ std::vector<ExprHandle> TensorExprKernel::inferSizesForValue(
case aten::addcmul: {
std::vector<std::vector<ExprHandle>> shapes;
for (size_t idx = 0; idx < 4; idx++) {
for (const auto idx : c10::irange(4)) {
torch::jit::Value* inp = v->node()->input(idx);
shapes.push_back(sizesForValue(inp));
}

View File

@ -401,7 +401,7 @@ LLVMCodeGenImpl::LLVMCodeGenImpl(
// Emit prototype and bind argument Vars to parameter indices.
llvm::Type* retTy = dtypeToLLVM(dtype);
std::vector<llvm::Type*> params;
for (size_t i = 0; i < args.size(); i++) {
for (const auto i : c10::irange(args.size())) {
auto const& arg = args[i];
if (arg.isVar()) {
params.push_back(dtypeToLLVM(arg.dtype()));
@ -416,7 +416,7 @@ LLVMCodeGenImpl::LLVMCodeGenImpl(
fn_->addAttribute(
llvm::AttributeList::AttrIndex::FunctionIndex,
llvm::Attribute::AlwaysInline);
for (size_t i = 0; i < args.size(); i++) {
for (const auto i : c10::irange(args.size())) {
if (!args[i].isVar()) {
fn_->addParamAttr(i, llvm::Attribute::NoAlias);
}
@ -465,7 +465,7 @@ void LLVMCodeGenImpl::emitWrapper(const std::vector<llvm::Type*>& params) {
auto wrapBB = llvm::BasicBlock::Create(getContext(), "wrapBB", wrapper);
irb_.SetInsertPoint(wrapBB);
llvm::SmallVector<llvm::Value*, 6> wrappedArgs;
for (size_t i = 0; i < params.size(); i++) {
for (const auto i : c10::irange(params.size())) {
auto argp = irb_.CreateGEP(
wrapper->arg_begin(), llvm::ConstantInt::getSigned(IntTy_, i));
if (params[i]->isPointerTy()) {

View File

@ -8,6 +8,7 @@
#include <vector>
#include <c10/util/Logging.h>
#include <c10/util/irange.h>
#include <c10/util/string_utils.h>
#include <ATen/core/functional.h>
@ -22,6 +23,11 @@
#include <torch/csrc/jit/tensorexpr/ir_verifier.h>
#include <torch/csrc/jit/tensorexpr/tensor.h>
#include <stdexcept>
#include <unordered_map>
#include <unordered_set>
#include <vector>
namespace torch {
namespace jit {
namespace tensorexpr {
@ -501,7 +507,7 @@ class FunctionInliner : public IRMutator {
const Expr* mutate_loads(const Buf* buf, std::vector<const Expr*> dims) {
std::vector<const Var*> index_vars;
TORCH_INTERNAL_ASSERT(buf->ndim() == producer_index_vars_.size());
for (size_t i = 0; i < buf->ndim(); i++) {
for (const auto i : c10::irange(buf->ndim())) {
const Var* func_callee_arg = producer_index_vars_.at(i);
const Expr* func_caller_param = dims.at(i);
if (func_callee_arg == nullptr) {
@ -2348,7 +2354,7 @@ class LoopComputeAtRewriter : public IRMutator {
return v;
}
std::vector<const Expr*> new_indices(v->indices().size());
for (size_t i = 0; i < v->indices().size(); i++) {
for (const auto i : c10::irange(v->indices().size())) {
new_indices[i] =
IRSimplifier::simplify(new Sub(v->indices()[i], offsets_[i]));
}
@ -2713,7 +2719,7 @@ void LoopNest::computeAt(Stmt* s, For* f) {
// Generate index variables for 'temp'
std::vector<const Expr*> temp_indices(dims.size());
for (size_t i = 0; i < dims.size(); i++) {
for (const auto i : c10::irange(dims.size())) {
// TODO: Use name-hint of the producer indices instead of 'idx'
temp_indices[i] = new Var(std::string("idx") + c10::to_string(i), kInt);
}
@ -2729,7 +2735,7 @@ void LoopNest::computeAt(Stmt* s, For* f) {
std::vector<std::pair<const Var*, const Expr*>> rewrite_indices_map;
std::vector<const Expr*> offsets;
for (const TensorAccessBoundsInfo& p : bounds_it->second) {
for (size_t i = 0; i < p.start.size(); i++) {
for (const auto i : c10::irange(p.start.size())) {
if (offsets.size() <= i) {
offsets.push_back(p.start[i]);
} else {
@ -2739,7 +2745,7 @@ void LoopNest::computeAt(Stmt* s, For* f) {
}
}
for (size_t i = 0; i < prod_indices.size(); i++) {
for (const auto i : c10::irange(prod_indices.size())) {
rewrite_indices_map.push_back(
{prod_indices[i], new Add(temp_indices[i], offsets[i])});
}
@ -2749,7 +2755,7 @@ void LoopNest::computeAt(Stmt* s, For* f) {
temp_buf, temp_indices, Substitute(st->value(), rewrite_indices_map));
// Construct the loop nest for the temp computation
for (size_t i = 0; i < dims.size(); i++) {
for (const auto i : c10::irange(dims.size())) {
// We're creating loops from innermost to outermost, so we need to access
// dimensions in reversed order.
size_t dim_idx = dims.size() - 1 - i;

View File

@ -1,4 +1,7 @@
#include <torch/csrc/jit/tensorexpr/mem_dependency_checker.h>
#include <c10/util/irange.h>
#include <fstream>
namespace torch {
@ -725,7 +728,7 @@ void MemDependencyChecker::visit(const For* v) {
loopIndicesStride.resize(indices.size());
// index expr must depend on the loop var in some way to have a stride.
for (size_t i = 0; i < indices.size(); i++) {
for (const auto i : c10::irange(indices.size())) {
VarFinder vf;
if (vf.find(indices[i]).count(var) == 0) {
loopIndicesStride[i] = new IntImm(0);

View File

@ -1,6 +1,7 @@
#include <torch/csrc/jit/tensorexpr/tensor.h>
#include <c10/util/Logging.h>
#include <c10/util/irange.h>
#include <torch/csrc/jit/tensorexpr/dim_arg.h>
#include <torch/csrc/jit/tensorexpr/reduction.h>
@ -27,7 +28,7 @@ Stmt* Tensor::constructStmt(
const Expr* init_expr = buf()->initializer();
if (reduce_ndim > 0) {
for (size_t i = 0; i < reduce_ndim; i++) {
for (const auto i : c10::irange(reduce_ndim)) {
// Going in reverse order: from innermost loop to the outermost
size_t dim_index = reduce_ndim - i - 1;
s = new For(
@ -39,7 +40,7 @@ Stmt* Tensor::constructStmt(
}
}
for (size_t i = 0; i < ndim; i++) {
for (const auto i : c10::irange(ndim)) {
// Going in reverse order: from innermost loop to the outermost
size_t dim_index = ndim - i - 1;
s = new For(args[dim_index], new IntImm(0), buf()->dim(dim_index), s);

View File

@ -12,16 +12,17 @@
#include <c10/util/Exception.h>
#include <c10/util/Optional.h>
#include <c10/util/StringUtil.h>
#include <c10/util/irange.h>
#include <torch/csrc/WindowsTorchApiMacro.h>
#include <torch/csrc/jit/frontend/source_range.h>
#include <torch/csrc/jit/ir/ir.h>
#include <torch/csrc/jit/testing/file_check.h>
#include <algorithm>
#include <iostream>
#include <sstream>
#include <string>
#include <torch/csrc/jit/ir/ir.h>
#include <torch/csrc/jit/testing/file_check.h>
namespace torch {
namespace jit {

View File

@ -1,10 +1,5 @@
#include <c10/util/irange.h>
#include <torch/csrc/python_headers.h>
#include <cstdarg>
#include <string>
#include <vector>
#include <sstream>
#include <algorithm>
#include <unordered_map>
#include <torch/csrc/THP.h>
#include <torch/csrc/utils/python_strings.h>
#include <torch/csrc/utils/invalid_arguments.h>
@ -32,6 +27,13 @@
#include <torch/csrc/generic/utils.cpp>
#include <TH/THGenerateBoolType.h>
#include <algorithm>
#include <cstdarg>
#include <sstream>
#include <string>
#include <unordered_map>
#include <vector>
int THPUtils_getCallable(PyObject *arg, PyObject **result) {
if (!PyCallable_Check(arg))
return 0;
@ -200,7 +202,7 @@ void THPUtils_invalidArguments(PyObject *given_args, PyObject *given_kwargs,
std::vector<std::string> option_strings;
va_list option_list;
va_start(option_list, num_options);
for (size_t i = 0; i < num_options; i++)
for(const auto i : c10::irange(num_options))
option_strings.emplace_back(va_arg(option_list, const char*));
va_end(option_list);

View File

@ -1,5 +1,7 @@
#include <torch/csrc/utils/byte_order.h>
#include <c10/util/BFloat16.h>
#include <c10/util/irange.h>
#include <cstring>
#include <vector>
@ -121,7 +123,7 @@ THPByteOrder THP_nativeByteOrder()
void THP_decodeInt16Buffer(int16_t* dst, const uint8_t* src, THPByteOrder order, size_t len)
{
for (size_t i = 0; i < len; i++) {
for(const auto i : c10::irange(len)) {
dst[i] = (int16_t)(
order == THP_BIG_ENDIAN ? decodeUInt16BE(src) : decodeUInt16LE(src));
src += sizeof(int16_t);
@ -130,7 +132,7 @@ void THP_decodeInt16Buffer(int16_t* dst, const uint8_t* src, THPByteOrder order,
void THP_decodeInt32Buffer(int32_t* dst, const uint8_t* src, THPByteOrder order, size_t len)
{
for (size_t i = 0; i < len; i++) {
for(const auto i : c10::irange(len)) {
dst[i] = (int32_t)(
order == THP_BIG_ENDIAN ? decodeUInt32BE(src) : decodeUInt32LE(src));
src += sizeof(int32_t);
@ -139,7 +141,7 @@ void THP_decodeInt32Buffer(int32_t* dst, const uint8_t* src, THPByteOrder order,
void THP_decodeInt64Buffer(int64_t* dst, const uint8_t* src, THPByteOrder order, size_t len)
{
for (size_t i = 0; i < len; i++) {
for(const auto i : c10::irange(len)) {
dst[i] = (int64_t)(
order == THP_BIG_ENDIAN ? decodeUInt64BE(src) : decodeUInt64LE(src));
src += sizeof(int64_t);
@ -148,7 +150,7 @@ void THP_decodeInt64Buffer(int64_t* dst, const uint8_t* src, THPByteOrder order,
void THP_decodeHalfBuffer(THHalf* dst, const uint8_t* src, THPByteOrder order, size_t len)
{
for (size_t i = 0; i < len; i++) {
for(const auto i : c10::irange(len)) {
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
union { uint16_t x; THHalf f; };
x = (order == THP_BIG_ENDIAN ? decodeUInt16BE(src) : decodeUInt16LE(src));
@ -159,7 +161,7 @@ void THP_decodeHalfBuffer(THHalf* dst, const uint8_t* src, THPByteOrder order, s
void THP_decodeBFloat16Buffer(at::BFloat16* dst, const uint8_t* src, THPByteOrder order, size_t len)
{
for (size_t i = 0; i < len; i++) {
for(const auto i : c10::irange(len)) {
uint16_t x =
(order == THP_BIG_ENDIAN ? decodeUInt16BE(src) : decodeUInt16LE(src));
std::memcpy(&dst[i], &x, sizeof(dst[i]));
@ -169,14 +171,14 @@ void THP_decodeBFloat16Buffer(at::BFloat16* dst, const uint8_t* src, THPByteOrde
void THP_decodeBoolBuffer(bool* dst, const uint8_t* src, THPByteOrder order, size_t len)
{
for (size_t i = 0; i < len; i++) {
for(const auto i : c10::irange(len)) {
dst[i] = (int)src[i] != 0 ? true : false;
}
}
void THP_decodeFloatBuffer(float* dst, const uint8_t* src, THPByteOrder order, size_t len)
{
for (size_t i = 0; i < len; i++) {
for(const auto i : c10::irange(len)) {
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
union { uint32_t x; float f; };
x = (order == THP_BIG_ENDIAN ? decodeUInt32BE(src) : decodeUInt32LE(src));
@ -187,7 +189,7 @@ void THP_decodeFloatBuffer(float* dst, const uint8_t* src, THPByteOrder order, s
void THP_decodeDoubleBuffer(double* dst, const uint8_t* src, THPByteOrder order, size_t len)
{
for (size_t i = 0; i < len; i++) {
for(const auto i : c10::irange(len)) {
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
union { uint64_t x; double d; };
x = (order == THP_BIG_ENDIAN ? decodeUInt64BE(src) : decodeUInt64LE(src));
@ -198,7 +200,7 @@ void THP_decodeDoubleBuffer(double* dst, const uint8_t* src, THPByteOrder order,
void THP_decodeComplexFloatBuffer(c10::complex<float>* dst, const uint8_t* src, THPByteOrder order, size_t len)
{
for (size_t i = 0; i < len; i++) {
for(const auto i : c10::irange(len)) {
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
union { uint32_t x; float re; };
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
@ -215,7 +217,7 @@ void THP_decodeComplexFloatBuffer(c10::complex<float>* dst, const uint8_t* src,
void THP_decodeComplexDoubleBuffer(c10::complex<double>* dst, const uint8_t* src, THPByteOrder order, size_t len)
{
for (size_t i = 0; i < len; i++) {
for(const auto i : c10::irange(len)) {
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
union { uint32_t x; double re; };
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
@ -234,7 +236,7 @@ void THP_encodeInt16Buffer(uint8_t* dst, const int16_t* src, THPByteOrder order,
{
memcpy(dst, src, sizeof(int16_t) * len);
if (order != THP_nativeByteOrder()) {
for (size_t i = 0; i < len; i++) {
for(const auto i : c10::irange(len)) {
swapBytes16(dst);
dst += sizeof(int16_t);
}
@ -245,7 +247,7 @@ void THP_encodeInt32Buffer(uint8_t* dst, const int32_t* src, THPByteOrder order,
{
memcpy(dst, src, sizeof(int32_t) * len);
if (order != THP_nativeByteOrder()) {
for (size_t i = 0; i < len; i++) {
for(const auto i : c10::irange(len)) {
swapBytes32(dst);
dst += sizeof(int32_t);
}
@ -256,7 +258,7 @@ void THP_encodeInt64Buffer(uint8_t* dst, const int64_t* src, THPByteOrder order,
{
memcpy(dst, src, sizeof(int64_t) * len);
if (order != THP_nativeByteOrder()) {
for (size_t i = 0; i < len; i++) {
for(const auto i : c10::irange(len)) {
swapBytes64(dst);
dst += sizeof(int64_t);
}
@ -267,7 +269,7 @@ void THP_encodeFloatBuffer(uint8_t* dst, const float* src, THPByteOrder order, s
{
memcpy(dst, src, sizeof(float) * len);
if (order != THP_nativeByteOrder()) {
for (size_t i = 0; i < len; i++) {
for(const auto i : c10::irange(len)) {
swapBytes32(dst);
dst += sizeof(float);
}
@ -278,7 +280,7 @@ void THP_encodeDoubleBuffer(uint8_t* dst, const double* src, THPByteOrder order,
{
memcpy(dst, src, sizeof(double) * len);
if (order != THP_nativeByteOrder()) {
for (size_t i = 0; i < len; i++) {
for(const auto i : c10::irange(len)) {
swapBytes64(dst);
dst += sizeof(double);
}
@ -289,7 +291,7 @@ template <typename T>
std::vector<T> complex_to_float(const c10::complex<T>* src, size_t len) {
std::vector<T> new_src;
new_src.reserve(2 * len);
for (size_t i = 0; i < len; i++) {
for(const auto i : c10::irange(len)) {
auto elem = src[i];
new_src.emplace_back(elem.real());
new_src.emplace_back(elem.imag());

View File

@ -251,7 +251,7 @@ std::string _formattedArgDesc(
auto num_args = arguments.size() + kwargs.size();
std::string result = "(";
for (size_t i = 0; i < num_args; i++) {
for(const auto i : c10::irange(num_args)) {
bool is_kwarg = i >= arguments.size();
PyObject *arg = is_kwarg ? kwargs.at(option.arguments[i].name) : arguments[i];

View File

@ -174,7 +174,7 @@ auto combine_self_args(PyObject *self, PyObject *args) -> py::tuple {
size_t n = py_args.size();
auto args_ = py::tuple(n + 1);
args_[0] = py::handle(self);
for (size_t i = 0; i < n; i++) {
for(const auto i : c10::irange(n)) {
args_[i+1] = py_args[i];
}
return args_;
@ -384,8 +384,7 @@ bool is_scalar_list(PyObject* obj) {
}
// NOLINTNEXTLINE(bugprone-branch-clone)
auto size = tuple ? PyTuple_GET_SIZE(obj) : PyList_GET_SIZE(obj);
// NOLINTNEXTLINE(clang-diagnostic-sign-compare)
for (size_t idx = 0; idx < size; idx++) {
for (const auto idx : c10::irange(size)) {
PyObject* iobj = tuple ? PyTuple_GET_ITEM(obj, idx) : PyList_GET_ITEM(obj, idx);
if (!THPUtils_checkScalar(iobj)) {
return false;

View File

@ -34,7 +34,7 @@ static void recursive_apply(IntArrayRef sizes, ScalarType scalarType, int64_t di
if (dim == ndim) {
auto args = THPObjectPtr(PyTuple_New(N));
if (!args) throw python_error();
for (size_t i = 0; i < N; i++) {
for(const auto i : c10::irange(N)) {
PyObject* arg = load_scalar(strided_data[i].data, scalarType);
if (!arg) throw python_error();
PyTuple_SET_ITEM(args.get(), i, arg);

View File

@ -74,7 +74,7 @@ static std::vector<npy_intp> to_numpy_shape(IntArrayRef x) {
// shape and stride conversion from int64_t to npy_intp
auto nelem = x.size();
auto result = std::vector<npy_intp>(nelem);
for (size_t i = 0; i < nelem; i++) {
for(const auto i : c10::irange(nelem)) {
result[i] = static_cast<npy_intp>(x[i]);
}
return result;

View File

@ -1,3 +1,4 @@
#include <c10/util/irange.h>
#include <c10d/ProcessGroupGloo.hpp>
#include <c10d/GlooDeviceFactory.hpp>
@ -211,7 +212,7 @@ void band(void* c, const void* a, const void* b, size_t n) {
auto tc = static_cast<T*>(c);
auto ta = static_cast<const T*>(a);
auto tb = static_cast<const T*>(b);
for (size_t i = 0; i < n; i++) {
for(const auto i : c10::irange(n)) {
tc[i] = ta[i] & tb[i];
}
}
@ -224,7 +225,7 @@ void bor(void* c, const void* a, const void* b, size_t n) {
auto tc = static_cast<T*>(c);
auto ta = static_cast<const T*>(a);
auto tb = static_cast<const T*>(b);
for (size_t i = 0; i < n; i++) {
for(const auto i : c10::irange(n)) {
tc[i] = ta[i] | tb[i];
}
}
@ -237,7 +238,7 @@ void bxor(void* c, const void* a, const void* b, size_t n) {
auto tc = static_cast<T*>(c);
auto ta = static_cast<const T*>(a);
auto tb = static_cast<const T*>(b);
for (size_t i = 0; i < n; i++) {
for(const auto i : c10::irange(n)) {
tc[i] = ta[i] ^ tb[i];
}
}
@ -334,7 +335,7 @@ void initializeStreamsEvents(
at::cuda::OptionalCUDAGuard guard;
streams.reserve(tensors.size());
events.resize(tensors.size());
for (size_t i = 0; i < tensors.size(); i++) {
for(const auto i : c10::irange(tensors.size())) {
guard.set_index(tensors[i].device().index());
// Record event on current stream
events[i].record(at::cuda::getCurrentCUDAStream());
@ -390,7 +391,7 @@ void initializeStreamsEvents(
at::cuda::OptionalCUDAGuard guard;
streams.reserve(tensors.size());
events.resize(tensors.size());
for (size_t i = 0; i < tensors.size(); i++) {
for(const auto i : c10::irange(tensors.size())) {
guard.set_index(tensors[i][0].device().index());
// Record event on current stream
events[i].record(at::cuda::getCurrentCUDAStream());
@ -714,7 +715,7 @@ ProcessGroupGloo::ProcessGroupGloo(
// by a single I/O thread.
//
contexts_.reserve(options->devices.size());
for (size_t i = 0; i < options->devices.size(); i++) {
for(const auto i : c10::irange(options->devices.size())) {
auto context = std::make_shared<::gloo::rendezvous::Context>(rank_, size_);
auto store = ::gloo::rendezvous::PrefixStore(std::to_string(i), *store_);
context->setTimeout(options->timeout);
@ -729,7 +730,7 @@ ProcessGroupGloo::ProcessGroupGloo(
workInProgress_.resize(options->threads);
threads_.resize(options->threads);
for (size_t i = 0; i < threads_.size(); i++) {
for(const auto i : c10::irange(threads_.size())) {
threads_[i] = std::thread(&ProcessGroupGloo::runLoop, this, i);
}
}
@ -834,7 +835,7 @@ class AsyncBroadcastWork : public ProcessGroupGloo::AsyncWork {
broadcast(inputs[rootTensor]);
// Copy to non-root tensors
for (size_t i = 0; i < inputs.size(); i++) {
for(const auto i : c10::irange(inputs.size())) {
if (i == static_cast<size_t>(rootTensor)) {
continue;
}
@ -878,7 +879,7 @@ class AsyncBroadcastCUDAWork : public AsyncBroadcastWork {
broadcast(tmp);
// Kick off copy back to the CUDA tensors.
for (size_t i = 0; i < inputs.size(); i++) {
for(const auto i : c10::irange(inputs.size())) {
guard.reset_stream(streams[i]);
inputs[i].copy_(tmp, /* non_blocking */ true);
events[i].record(streams[i]);
@ -889,7 +890,7 @@ class AsyncBroadcastCUDAWork : public AsyncBroadcastWork {
at::cuda::OptionalCUDAGuard guard;
// Synchronize with the copy back to CUDA tensors.
for (size_t i = 0; i < inputs.size(); i++) {
for(const auto i : c10::irange(inputs.size())) {
guard.set_index(inputs[i].device().index());
events[i].block(at::cuda::getCurrentCUDAStream());
}
@ -1210,7 +1211,7 @@ class AsyncSparseAllreduceWork : public ProcessGroupGloo::AsyncWork {
std::vector<size_t> counts(context->size);
int64_t totalSize = 0;
for (size_t i = 0; i < metadata.size(); i++) {
for(const auto i : c10::irange(metadata.size())) {
counts[i] = metadata[i].nnz() * sparseDim;
totalSize += counts[i];
}
@ -1255,7 +1256,7 @@ class AsyncSparseAllreduceWork : public ProcessGroupGloo::AsyncWork {
std::vector<size_t> counts(context->size);
int64_t totalSize = 0;
for (size_t i = 0; i < metadata.size(); i++) {
for(const auto i : c10::irange(metadata.size())) {
counts[i] = metadata[i].nnz() * denseNumel;
totalSize += counts[i];
}
@ -1308,7 +1309,7 @@ class AsyncAllreduceCUDAWork : public AsyncAllreduceWork {
// Kick off copy from CUDA tensors to pinned CPU tensors.
tmp.reserve(inputs.size());
at::cuda::OptionalCUDAStreamGuard guard;
for (size_t i = 0; i < inputs.size(); i++) {
for(const auto i : c10::irange(inputs.size())) {
guard.reset_stream(streams[i]);
tmp.push_back(pinnedLike(inputs[i]).copy_(inputs[i], true));
}
@ -1317,7 +1318,7 @@ class AsyncAllreduceCUDAWork : public AsyncAllreduceWork {
void run() override {
// Synchronize with copy operations.
at::cuda::OptionalCUDAGuard device_guard;
for (size_t i = 0; i < inputs.size(); i++) {
for(const auto i : c10::irange(inputs.size())) {
device_guard.set_index(inputs[i].device().index());
AT_CUDA_CHECK(cudaStreamSynchronize(streams[i]));
}
@ -1326,7 +1327,7 @@ class AsyncAllreduceCUDAWork : public AsyncAllreduceWork {
allreduce(tmp);
at::cuda::OptionalCUDAStreamGuard stream_guard;
for (size_t i = 0; i < inputs.size(); i++) {
for(const auto i : c10::irange(inputs.size())) {
stream_guard.reset_stream(streams[i]);
inputs[i].copy_(tmp[i], /* non_blocking */ true);
events[i].record(streams[i]);
@ -1336,7 +1337,7 @@ class AsyncAllreduceCUDAWork : public AsyncAllreduceWork {
void synchronize() override {
// Synchronize with the copy back to CUDA tensors.
at::cuda::OptionalCUDAGuard guard;
for (size_t i = 0; i < inputs.size(); i++) {
for(const auto i : c10::irange(inputs.size())) {
guard.set_index(inputs[i].device().index());
events[i].block(at::cuda::getCurrentCUDAStream());
}
@ -1361,7 +1362,7 @@ class AsyncSparseAllreduceCUDAWork : public AsyncSparseAllreduceWork {
// memory must be performed asynchronously, or we block the caller.
tmp.reserve(inputs.size());
at::cuda::OptionalCUDAStreamGuard guard;
for (size_t i = 0; i < inputs.size(); i++) {
for(const auto i : c10::irange(inputs.size())) {
guard.reset_stream(streams[i]);
tmp.push_back(
inputs[i].coalesce().to(at::DeviceType::CPU, /*non_blocking=*/true));
@ -1371,7 +1372,7 @@ class AsyncSparseAllreduceCUDAWork : public AsyncSparseAllreduceWork {
void run() override {
// Synchronize with copy operations.
at::cuda::OptionalCUDAGuard device_guard;
for (size_t i = 0; i < inputs.size(); i++) {
for(const auto i : c10::irange(inputs.size())) {
device_guard.set_index(inputs[i].device().index());
AT_CUDA_CHECK(cudaStreamSynchronize(streams[i]));
}
@ -1381,7 +1382,7 @@ class AsyncSparseAllreduceCUDAWork : public AsyncSparseAllreduceWork {
// Kick off copy back to the CUDA tensors.
at::cuda::OptionalCUDAStreamGuard stream_guard;
for (size_t i = 0; i < inputs.size(); i++) {
for(const auto i : c10::irange(inputs.size())) {
stream_guard.reset_stream(streams[i]);
inputs[i].copy_(output, /*non_blocking=*/true);
events[i].record(streams[i]);
@ -1391,7 +1392,7 @@ class AsyncSparseAllreduceCUDAWork : public AsyncSparseAllreduceWork {
void synchronize() override {
// Synchronize with the copy back to CUDA tensors.
at::cuda::OptionalCUDAGuard guard;
for (size_t i = 0; i < inputs.size(); i++) {
for(const auto i : c10::irange(inputs.size())) {
guard.set_index(inputs[i].device().index());
events[i].block(at::cuda::getCurrentCUDAStream());
}
@ -1600,7 +1601,7 @@ class AsyncReduceCUDAWork : public AsyncReduceWork {
// Kick off copy from CUDA tensors to pinned CPU tensors.
tmp.reserve(inputs.size());
at::cuda::OptionalCUDAStreamGuard guard;
for (size_t i = 0; i < inputs.size(); i++) {
for(const auto i : c10::irange(inputs.size())) {
guard.reset_stream(streams[i]);
tmp.push_back(pinnedLike(inputs[i]).copy_(inputs[i], true));
}
@ -1609,7 +1610,7 @@ class AsyncReduceCUDAWork : public AsyncReduceWork {
void run() override {
// Synchronize with copy operations.
at::cuda::OptionalCUDAGuard device_guard;
for (size_t i = 0; i < inputs.size(); i++) {
for(const auto i : c10::irange(inputs.size())) {
device_guard.set_index(inputs[i].device().index());
AT_CUDA_CHECK(cudaStreamSynchronize(streams[i]));
}
@ -1619,7 +1620,7 @@ class AsyncReduceCUDAWork : public AsyncReduceWork {
// Kick off copy back to the CUDA tensors.
at::cuda::OptionalCUDAStreamGuard stream_guard;
for (size_t i = 0; i < inputs.size(); i++) {
for(const auto i : c10::irange(inputs.size())) {
stream_guard.reset_stream(streams[i]);
inputs[i].copy_(tmp[i], /* non_blocking */ true);
events[i].record(streams[i]);
@ -1629,7 +1630,7 @@ class AsyncReduceCUDAWork : public AsyncReduceWork {
void synchronize() override {
// Synchronize with the copy back to CUDA tensors.
at::cuda::OptionalCUDAGuard guard;
for (size_t i = 0; i < inputs.size(); i++) {
for(const auto i : c10::irange(inputs.size())) {
guard.set_index(inputs[i].device().index());
events[i].block(at::cuda::getCurrentCUDAStream());
}
@ -1764,15 +1765,15 @@ class AsyncAllgatherCUDAWork : public AsyncAllgatherWork {
// Kick off copy from CUDA tensors to pinned CPU tensors.
tmpInputs.reserve(inputs.size());
at::cuda::OptionalCUDAStreamGuard guard;
for (size_t i = 0; i < inputs.size(); i++) {
for(const auto i : c10::irange(inputs.size())) {
guard.reset_stream(inputStreams[i]);
tmpInputs.push_back(pinnedLike(inputs[i]).copy_(inputs[i], true));
}
tmpOutputs.resize(outputs.size());
for (size_t i = 0; i < outputs.size(); i++) {
for(const auto i : c10::irange(outputs.size())) {
tmpOutputs[i].reserve(outputs[i].size());
for (size_t j = 0; j < outputs[i].size(); j++) {
for(const auto j : c10::irange(outputs[i].size())) {
tmpOutputs[i].push_back(pinnedLike(outputs[i][j]));
}
}
@ -1781,12 +1782,12 @@ class AsyncAllgatherCUDAWork : public AsyncAllgatherWork {
void run() override {
// Synchronize with copy operations.
at::cuda::OptionalCUDAGuard device_guard;
for (size_t i = 0; i < inputs.size(); i++) {
for(const auto i : c10::irange(inputs.size())) {
device_guard.set_index(inputs[i].device().index());
AT_CUDA_CHECK(cudaStreamSynchronize(inputStreams[i]));
}
for (size_t i = 0; i < outputs.size(); i++) {
for(const auto i : c10::irange(outputs.size())) {
device_guard.set_index(outputs[i][0].device().index());
AT_CUDA_CHECK(cudaStreamSynchronize(outputStreams[i]));
}
@ -1796,9 +1797,9 @@ class AsyncAllgatherCUDAWork : public AsyncAllgatherWork {
// Kick off copy back to the CUDA tensors.
at::cuda::OptionalCUDAStreamGuard stream_guard;
for (size_t i = 0; i < outputs.size(); i++) {
for(const auto i : c10::irange(outputs.size())) {
stream_guard.reset_stream(outputStreams[i]);
for (size_t j = 0; j < outputs[i].size(); j++) {
for(const auto j : c10::irange(outputs[i].size())) {
outputs[i][j].copy_(tmpOutputs[i][j], /* non_blocking */ true);
}
outputEvents[i].record(outputStreams[i]);
@ -1808,7 +1809,7 @@ class AsyncAllgatherCUDAWork : public AsyncAllgatherWork {
void synchronize() override {
// Synchronize with the copy back to CUDA tensors.
at::cuda::OptionalCUDAGuard guard;
for (size_t i = 0; i < outputs.size(); i++) {
for(const auto i : c10::irange(outputs.size())) {
guard.set_index(outputs[i][0].device().index());
outputEvents[i].block(at::cuda::getCurrentCUDAStream());
}
@ -1846,7 +1847,7 @@ c10::intrusive_ptr<ProcessGroup::Work> ProcessGroupGloo::allgather(
"requires input/output tensor lists to have the same length");
}
for (size_t i = 0; i < outputs.size(); i++) {
for(const auto i : c10::irange(outputs.size())) {
const auto expected = inputs.size() * getSize();
const auto actual = outputs[i].size();
if (actual != expected) {
@ -2073,7 +2074,7 @@ class AsyncGatherWork : public ProcessGroupGloo::AsyncWork {
// Unflatten into output tensors on root process.
if (context->rank == root) {
for (size_t i = 0; i < outputs[0].size(); i++) {
for(const auto i : c10::irange(outputs[0].size())) {
outputs[0][i].copy_(flatOutputTensor[i]);
}
}
@ -2106,15 +2107,15 @@ class AsyncGatherCUDAWork : public AsyncGatherWork {
// Kick off copy from CUDA tensors to pinned CPU tensors.
tmpInputs.reserve(inputs.size());
at::cuda::OptionalCUDAStreamGuard guard;
for (size_t i = 0; i < inputs.size(); i++) {
for(const auto i : c10::irange(inputs.size())) {
guard.reset_stream(inputStreams[i]);
tmpInputs.push_back(pinnedLike(inputs[i]).copy_(inputs[i], true));
}
tmpOutputs.resize(outputs.size());
for (size_t i = 0; i < outputs.size(); i++) {
for(const auto i : c10::irange(outputs.size())) {
tmpOutputs[i].reserve(outputs[i].size());
for (size_t j = 0; j < outputs[i].size(); j++) {
for(const auto j : c10::irange(outputs[i].size())) {
tmpOutputs[i].push_back(pinnedLike(outputs[i][j]));
}
}
@ -2123,12 +2124,12 @@ class AsyncGatherCUDAWork : public AsyncGatherWork {
void run() override {
// Synchronize with copy operations.
at::cuda::OptionalCUDAGuard device_guard;
for (size_t i = 0; i < inputs.size(); i++) {
for(const auto i : c10::irange(inputs.size())) {
device_guard.set_index(inputs[i].get_device());
AT_CUDA_CHECK(cudaStreamSynchronize(inputStreams[i]));
}
for (size_t i = 0; i < outputs.size(); i++) {
for(const auto i : c10::irange(outputs.size())) {
device_guard.set_index(outputs[i][0].get_device());
AT_CUDA_CHECK(cudaStreamSynchronize(outputStreams[i]));
}
@ -2138,9 +2139,9 @@ class AsyncGatherCUDAWork : public AsyncGatherWork {
// Kick off copy back to the CUDA tensors.
at::cuda::OptionalCUDAStreamGuard stream_guard;
for (size_t i = 0; i < outputs.size(); i++) {
for(const auto i : c10::irange(outputs.size())) {
stream_guard.reset_stream(outputStreams[i]);
for (size_t j = 0; j < outputs[i].size(); j++) {
for(const auto j : c10::irange(outputs[i].size())) {
outputs[i][j].copy_(tmpOutputs[i][j], /* non_blocking */ true);
}
outputEvents[i].record(outputStreams[i]);
@ -2150,7 +2151,7 @@ class AsyncGatherCUDAWork : public AsyncGatherWork {
void synchronize() override {
// Synchronize with the copy back to CUDA tensors.
at::cuda::OptionalCUDAGuard guard;
for (size_t i = 0; i < outputs.size(); i++) {
for(const auto i : c10::irange(outputs.size())) {
guard.set_index(static_cast<at::DeviceIndex>(outputs[i][0].get_device()));
outputEvents[i].block(at::cuda::getCurrentCUDAStream());
}
@ -2301,10 +2302,10 @@ class AsyncScatterCUDAWork : public AsyncScatterWork {
// Kick off copy from CUDA tensors to pinned CPU tensors.
tmpInputs.resize(inputs.size());
at::cuda::OptionalCUDAStreamGuard guard;
for (size_t i = 0; i < inputs.size(); i++) {
for(const auto i : c10::irange(inputs.size())) {
guard.reset_stream(inputStreams[i]);
tmpInputs[i].reserve(inputs[i].size());
for (size_t j = 0; j < inputs[i].size(); j++) {
for(const auto j : c10::irange(inputs[i].size())) {
tmpInputs[i].push_back(
pinnedLike(inputs[i][j]).copy_(inputs[i][j], true));
}
@ -2319,11 +2320,11 @@ class AsyncScatterCUDAWork : public AsyncScatterWork {
void run() override {
// Synchronize with copy operations.
at::cuda::OptionalCUDAGuard device_guard;
for (size_t i = 0; i < inputs.size(); i++) {
for(const auto i : c10::irange(inputs.size())) {
device_guard.set_index(inputs[i][0].get_device());
AT_CUDA_CHECK(cudaStreamSynchronize(inputStreams[i]));
}
for (size_t i = 0; i < outputs.size(); i++) {
for(const auto i : c10::irange(outputs.size())) {
device_guard.set_index(outputs[i].get_device());
AT_CUDA_CHECK(cudaStreamSynchronize(outputStreams[i]));
}
@ -2333,7 +2334,7 @@ class AsyncScatterCUDAWork : public AsyncScatterWork {
// Kick off copy back to the CUDA tensors.
at::cuda::OptionalCUDAStreamGuard stream_guard;
for (size_t i = 0; i < outputs.size(); i++) {
for(const auto i : c10::irange(outputs.size())) {
stream_guard.reset_stream(outputStreams[i]);
outputs[i].copy_(tmpOutputs[i], /* non_blocking */ true);
outputEvents[i].record(outputStreams[i]);
@ -2343,7 +2344,7 @@ class AsyncScatterCUDAWork : public AsyncScatterWork {
void synchronize() override {
// Synchronize with the copy back to CUDA tensors.
at::cuda::OptionalCUDAGuard guard;
for (size_t i = 0; i < outputs.size(); i++) {
for(const auto i : c10::irange(outputs.size())) {
guard.set_index(static_cast<at::DeviceIndex>(outputs[i].get_device()));
outputEvents[i].block(at::cuda::getCurrentCUDAStream());
}

View File

@ -1,3 +1,4 @@
#include <c10/util/irange.h>
#include <c10/util/Optional.h>
#include <c10d/ProcessGroupNCCL.hpp>
@ -162,7 +163,7 @@ void syncStreams(
std::string buildNcclUniqueIdStr(const ncclUniqueId& ncclID) {
const uint8_t* bytes = reinterpret_cast<const uint8_t*>(&ncclID);
std::ostringstream oss;
for (size_t i = 0; i < NCCL_UNIQUE_ID_BYTES; i++) {
for(const auto i : c10::irange(NCCL_UNIQUE_ID_BYTES)) {
oss << std::hex << static_cast<int>(bytes[i]);
}
return oss.str();
@ -1696,7 +1697,7 @@ c10::intrusive_ptr<ProcessGroup::Work> ProcessGroupNCCL::alltoall(
std::vector<at::Tensor>& inputTensors,
const AllToAllOptions& /* unused */) {
auto device = outputTensors[0].device();
for (size_t r = 0; r < outputTensors.size(); r++) {
for(const auto r : c10::irange(outputTensors.size())) {
check_gpu_single_tensor(outputTensors[r]);
check_gpu_single_tensor(inputTensors[r]);
TORCH_CHECK(

Some files were not shown because too many files have changed in this diff Show More