mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-21 05:34:18 +08:00
Revert D31705359: use irange for loops 8
Test Plan: revert-hammer
Differential Revision:
D31705359 (17e5200441
)
Original commit changeset: c9ea2fbc0f9c
fbshipit-source-id: 08fff2d12beca953ad30dd0baabf86e39ac84f14
This commit is contained in:
committed by
Facebook GitHub Bot
parent
97750e03a4
commit
f587267dc7
@ -97,7 +97,7 @@ class ReshapeOp : public Operator<Context> {
|
||||
}
|
||||
|
||||
int unknown_idx = -1;
|
||||
for (const auto i : c10::irange(actual_new_shape.size())) {
|
||||
for (int i = 0; i < actual_new_shape.size(); ++i) {
|
||||
const auto dim = actual_new_shape[i];
|
||||
if (dim == -1) {
|
||||
CAFFE_ENFORCE(
|
||||
@ -153,7 +153,7 @@ class ReshapeOp : public Operator<Context> {
|
||||
old_shape->Resize(input.sizes().size());
|
||||
T* old_shape_data = old_shape->template mutable_data<T>();
|
||||
std::vector<T> old_shape_vector(input.sizes().begin(), input.sizes().end());
|
||||
for (const auto i : c10::irange(old_shape_vector.size())) {
|
||||
for (int i = 0; i < old_shape_vector.size(); ++i) {
|
||||
old_shape_data[i] = old_shape_vector[i];
|
||||
}
|
||||
|
||||
|
@ -62,7 +62,7 @@ class ReversePackedSegsOp final : public Operator<Context> {
|
||||
context_.FinishDeviceComputation();
|
||||
|
||||
T* rev_data_ptr = output->template mutable_data<T>();
|
||||
for (const auto i : c10::irange(batch_size)) {
|
||||
for (int64_t i = 0; i < batch_size; i++) {
|
||||
const auto& seg_length = lengths_host[i];
|
||||
CAFFE_ENFORCE_LE(seg_length, max_length);
|
||||
int64_t j = 0;
|
||||
|
@ -32,7 +32,7 @@ class RecurrentNetworkBlobFetcherOp final : public Operator<Context> {
|
||||
std::vector<std::string> blob_names_vector = {};
|
||||
|
||||
// NOLINTNEXTLINE(clang-diagnostic-sign-compare)
|
||||
for (const auto i : c10::irange(stepWorkspaces.size())) {
|
||||
for (int64_t i = 0; i < stepWorkspaces.size(); i++) {
|
||||
Workspace* currentStepWorkspace = stepWorkspaces[i].get();
|
||||
std::vector<std::string> blob_names = currentStepWorkspace->LocalBlobs();
|
||||
|
||||
|
@ -38,7 +38,7 @@ class RecurrentNetworkExecutorBase {
|
||||
recurrent_input_map_(recurrent_input_map),
|
||||
timestep_blob_(timestep_blob) {
|
||||
const bool net_def_has_device_option = step_net_def_.has_device_option();
|
||||
for (const auto i : c10::irange(step_net_def_.op_size())) {
|
||||
for (int i = 0; i < step_net_def_.op_size(); i++) {
|
||||
if (net_def_has_device_option) {
|
||||
// In the case when net def specifies device option, final device option
|
||||
// will be equal to merge of operator and net def device options, with
|
||||
@ -86,7 +86,7 @@ class RecurrentNetworkExecutorBase {
|
||||
for (auto& rnn_op : timestep_ops_template_) {
|
||||
rnn_op.has_timestep_blob = false;
|
||||
const OperatorDef& op = step_net_def_.op(rnn_op.order);
|
||||
for (const auto i : c10::irange(op.input_size())) {
|
||||
for (int i = 0; i < op.input_size(); i++) {
|
||||
if (op.input(i) == timestep_blob_) {
|
||||
rnn_op.has_timestep_blob = true;
|
||||
break;
|
||||
@ -137,7 +137,7 @@ class RecurrentNetworkExecutorBase {
|
||||
if (rnn_op.has_timestep_blob) {
|
||||
OperatorDef op_copy = step_net_def_.op(rnn_op.order);
|
||||
|
||||
for (const auto i : c10::irange(op_copy.input_size())) {
|
||||
for (int i = 0; i < op_copy.input_size(); i++) {
|
||||
if (op_copy.input(i) == timestep_blob_) {
|
||||
op_copy.set_input(i, this_timestep_blob);
|
||||
}
|
||||
@ -283,7 +283,7 @@ class RecurrentNetworkExecutorBase {
|
||||
int opidx,
|
||||
std::vector<RNNNetOperator>& rnn_ops,
|
||||
std::unordered_set<int>* dep_ops) {
|
||||
for (const auto i : c10::irange(rnn_ops.size())) {
|
||||
for (int i = 0; i < rnn_ops.size(); i++) {
|
||||
if (i == opidx) {
|
||||
continue;
|
||||
}
|
||||
@ -315,7 +315,7 @@ class RecurrentNetworkExecutorBase {
|
||||
* for each timestep.
|
||||
*/
|
||||
void CalculateInternalDependencies() {
|
||||
for (const auto i : c10::irange(step_net_def_.op_size())) {
|
||||
for (int i = 0; i < step_net_def_.op_size(); i++) {
|
||||
timestep_ops_template_.push_back(RNNNetOperator(step_net_def_.op(i), i));
|
||||
}
|
||||
// Then see which outputs appear as inputs, and those are
|
||||
|
@ -103,7 +103,7 @@ void repeatCopy(
|
||||
T* dst,
|
||||
Context* context) {
|
||||
// NOLINTNEXTLINE(clang-diagnostic-sign-compare)
|
||||
for (const auto i : c10::irange(repeat_n)) {
|
||||
for (int i = 0; i < repeat_n; ++i) {
|
||||
context->template CopySameDevice<T>(n, src, dst + i * n);
|
||||
}
|
||||
}
|
||||
@ -228,7 +228,7 @@ class RecurrentNetworkOp final : public Operator<Context> {
|
||||
CAFFE_ENFORCE_EQ(states.size(), inputs.size(), "states/inputs mismatch");
|
||||
std::vector<detail::RecurrentInput> ris;
|
||||
// NOLINTNEXTLINE(clang-diagnostic-sign-compare)
|
||||
for (const auto i : c10::irange(states.size())) {
|
||||
for (auto i = 0; i < states.size(); ++i) {
|
||||
// States need to be "global" (since they are shared between
|
||||
// forward and backward).
|
||||
sharedWs->CreateBlob(states[i]);
|
||||
@ -254,7 +254,7 @@ class RecurrentNetworkOp final : public Operator<Context> {
|
||||
dst.size() == offset.size(), "alias_dst/alias_offset mismatch");
|
||||
std::vector<detail::OffsetAlias> aliases;
|
||||
// NOLINTNEXTLINE(clang-diagnostic-sign-compare)
|
||||
for (const auto i : c10::irange(src.size())) {
|
||||
for (auto i = 0; i < src.size(); ++i) {
|
||||
detail::OffsetAlias oc;
|
||||
oc.src = src[i];
|
||||
oc.dst = dst[i];
|
||||
@ -343,7 +343,7 @@ class RecurrentNetworkOp final : public Operator<Context> {
|
||||
stepWorkspaces.resize(num_workspaces_on_fwd_only);
|
||||
}
|
||||
|
||||
for (const auto t : c10::irange(seqLen)) {
|
||||
for (auto t = 0; t < seqLen; ++t) {
|
||||
auto& currentStepWorkspace =
|
||||
(has_backward_pass ? stepWorkspaces[t] :
|
||||
stepWorkspaces[t % num_workspaces_on_fwd_only]);
|
||||
@ -472,7 +472,7 @@ class RecurrentNetworkGradientOp final : public Operator<Context> {
|
||||
}
|
||||
|
||||
void renameOpInputOutput(std::string from_name, std::string to_name) {
|
||||
for (const auto j : c10::irange(stepNetDef_.op_size())) {
|
||||
for (int j = 0; j < stepNetDef_.op_size(); j++) {
|
||||
auto* op = stepNetDef_.mutable_op(j);
|
||||
for (int i = 0; i < op->input_size(); i++) {
|
||||
if (op->input(i) == from_name) {
|
||||
@ -498,7 +498,7 @@ class RecurrentNetworkGradientOp final : public Operator<Context> {
|
||||
" != ",
|
||||
param_grads.size());
|
||||
// NOLINTNEXTLINE(clang-diagnostic-sign-compare)
|
||||
for (const auto i : c10::irange(param.size())) {
|
||||
for (int i = 0; i < param.size(); ++i) {
|
||||
detail::Param p;
|
||||
// Forward inputs come after [outputs_with_grads] gradient inputs
|
||||
p.param = operator_def.input(param[i] + gradInputs_.size());
|
||||
@ -526,17 +526,17 @@ class RecurrentNetworkGradientOp final : public Operator<Context> {
|
||||
this->template GetRepeatedArgument<int32_t>("alias_offset");
|
||||
|
||||
// NOLINTNEXTLINE(clang-diagnostic-sign-compare)
|
||||
for (const auto i : c10::irange(recurrent.size())) {
|
||||
for (auto i = 0; i < recurrent.size(); ++i) {
|
||||
detail::RecurrentGradient rg;
|
||||
rg.param = recurrent[i];
|
||||
rg.grad = remappedName(recurrent[i] + "_grad");
|
||||
|
||||
for (const auto j : c10::irange(alias_src.size())) {
|
||||
for (int j = 0; j < alias_src.size(); ++j) {
|
||||
if (alias_src[j] != recurrent[i]) {
|
||||
continue;
|
||||
}
|
||||
int idx = -1;
|
||||
for (const auto k : c10::irange(gradInputs_.size())) {
|
||||
for (int k = 0; k < gradInputs_.size(); ++k) {
|
||||
if (gradInputs_[k] == j) {
|
||||
idx = k;
|
||||
}
|
||||
@ -575,7 +575,7 @@ class RecurrentNetworkGradientOp final : public Operator<Context> {
|
||||
"",
|
||||
&links);
|
||||
// NOLINTNEXTLINE(clang-diagnostic-sign-compare)
|
||||
for (const auto i : c10::irange(links.size())) {
|
||||
for (int i = 0; i < links.size(); i++) {
|
||||
links[i] = remappedLink(links[i]);
|
||||
}
|
||||
return links;
|
||||
@ -715,7 +715,7 @@ class RecurrentNetworkGradientOp final : public Operator<Context> {
|
||||
// This code assumes that there are several inputs
|
||||
// sequences. Actually it is not supported by the rest of the code,
|
||||
// and numSequences_ is a constant, equal to 1.
|
||||
for (const auto i : c10::irange(numSequences_)) {
|
||||
for (int i = 0; i < numSequences_; ++i) {
|
||||
// Offseting as the first gradInputs_.size() inputs of the op
|
||||
// are from GO. Then all I(0..N).
|
||||
const int gradientInputIndex = i + gradInputs_.size();
|
||||
@ -790,7 +790,7 @@ class RecurrentNetworkGradientOp final : public Operator<Context> {
|
||||
|
||||
CAFFE_ENFORCE_EQ(recurrentInputIds_.size(), recurrentGradients_.size());
|
||||
// NOLINTNEXTLINE(clang-diagnostic-sign-compare)
|
||||
for (const auto i : c10::irange(recurrentInputIds_.size())) {
|
||||
for (int i = 0; i < recurrentInputIds_.size(); ++i) {
|
||||
// See GetRecurrentNetworkGradient to understand offseting here
|
||||
// Outputs of the gradient are inputs of the forward pass.
|
||||
// So we need to offset on all inputs that go before recurrent
|
||||
|
@ -32,9 +32,9 @@ class RowMulOp : public Operator<Context> {
|
||||
"Length of w should be equal to the first dim of mat");
|
||||
|
||||
auto block_size = mat.size_from_dim(1);
|
||||
for (const auto i : c10::irange(w.numel())) {
|
||||
for (int i = 0; i < w.numel(); i++) {
|
||||
size_t offset = i * block_size;
|
||||
for (const auto j : c10::irange(block_size)) {
|
||||
for (int j = 0; j < block_size; j++) {
|
||||
output_data[offset + j] = mat_data[offset + j] * w_data[i];
|
||||
}
|
||||
}
|
||||
@ -60,10 +60,10 @@ class ReduceTailSumOp : public Operator<Context> {
|
||||
T* output_data = output->template mutable_data<T>();
|
||||
const T* mat_data = mat.template data<T>();
|
||||
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
output_data[i] = 0;
|
||||
size_t offset = i * block_size;
|
||||
for (const auto j : c10::irange(block_size)) {
|
||||
for (int j = 0; j < block_size; j++) {
|
||||
output_data[i] += mat_data[offset + j];
|
||||
}
|
||||
}
|
||||
|
@ -20,7 +20,7 @@ class ScaleBlobsOp final : public Operator<Context> {
|
||||
bool DoRunWithType() {
|
||||
int batchSize = InputSize();
|
||||
|
||||
for (const auto i : c10::irange(batchSize)) {
|
||||
for (int i = 0; i < batchSize; ++i) {
|
||||
const auto& X = Input(i);
|
||||
auto* Y = Output(i, X.sizes(), at::dtype<T>());
|
||||
math::Scale<float, T, Context>(
|
||||
@ -34,7 +34,7 @@ class ScaleBlobsOp final : public Operator<Context> {
|
||||
}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
for (const auto i : c10::irange(InputSize())) {
|
||||
for (int i = 0; i < InputSize(); ++i) {
|
||||
auto& input = this->template Input<Tensor>(i, CPU);
|
||||
auto* output = this->template Output<Tensor>(i, CPU);
|
||||
output->ResizeLike(input);
|
||||
|
@ -2,7 +2,6 @@
|
||||
#define CAFFE2_OPERATORS_SEGMENT_REDUCTION_OP_H_
|
||||
|
||||
#include "caffe2/core/export_caffe2_op_to_c10.h"
|
||||
#include <c10/util/irange.h>
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/logging.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
@ -336,7 +335,7 @@ class AbstractReduceFrontOrBackOp : public Operator<Context> {
|
||||
const int num_blocks = block_size > 0 ? data.numel() / block_size : 0;
|
||||
|
||||
Reducer r(ctx, out, &context_);
|
||||
for (const auto i : c10::irange(num_blocks)) {
|
||||
for (int64_t i = 0; i < num_blocks; ++i) {
|
||||
r.template process<FixedSize>(
|
||||
ctx, inputAccessor_.getBlockPtr(block_size, i), i, &context_);
|
||||
}
|
||||
@ -407,7 +406,7 @@ class AbstractReduceFrontOrBackGradientOp : public Operator<Context> {
|
||||
T* out = data_grads->template mutable_data<T>();
|
||||
|
||||
ReducerGradient r(ctx, r_grad, &context_);
|
||||
for (const auto i : c10::irange(block_num)) {
|
||||
for (int64_t i = 0; i < block_num; ++i) {
|
||||
r.template fillGrad<FixedSize>(
|
||||
ctx,
|
||||
out + block_size * i,
|
||||
@ -1071,7 +1070,7 @@ class AbstractUnsortedSegmentOp : public Operator<Context> {
|
||||
K = num_segments_;
|
||||
} else {
|
||||
K = 0;
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (int64_t i = 0; i < N; ++i) {
|
||||
K = std::max(K, s_ids[i] + 1);
|
||||
}
|
||||
}
|
||||
@ -1087,11 +1086,11 @@ class AbstractUnsortedSegmentOp : public Operator<Context> {
|
||||
|
||||
reducers_.clear();
|
||||
reducers_.reserve(K);
|
||||
for (const auto i : c10::irange(K)) {
|
||||
for (int64_t i = 0; i < K; ++i) {
|
||||
reducers_.emplace_back(ctx, out + out_block_size * i, &context_);
|
||||
}
|
||||
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (int64_t i = 0; i < N; ++i) {
|
||||
auto s_id = s_ids[i];
|
||||
CAFFE_ENFORCE(
|
||||
0 <= s_id && s_id < K,
|
||||
@ -1115,7 +1114,7 @@ class AbstractUnsortedSegmentOp : public Operator<Context> {
|
||||
ctx, inputAccessor_.getBlockPtr(in_block_size, idx), i, &context_);
|
||||
}
|
||||
|
||||
for (const auto i : c10::irange(K)) {
|
||||
for (int64_t i = 0; i < K; ++i) {
|
||||
reducers_[i].template finish<FixedSize>(ctx, &context_);
|
||||
}
|
||||
// call reducers destructors (if there is any)
|
||||
@ -1189,7 +1188,7 @@ class AbstractUnsortedSegmentGradientOp : public Operator<Context> {
|
||||
|
||||
if (ReducerGradient::computeLength()) {
|
||||
segment_length_.resize(K, 0);
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (int i = 0; i < N; ++i) {
|
||||
auto s_id = s_ids[i];
|
||||
CAFFE_ENFORCE(
|
||||
0 <= s_id && s_id < K,
|
||||
@ -1207,7 +1206,7 @@ class AbstractUnsortedSegmentGradientOp : public Operator<Context> {
|
||||
reducers_.emplace_back(ctx, s_grads + s_block_size * i, &context_);
|
||||
}
|
||||
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (int64_t i = 0; i < N; ++i) {
|
||||
auto s_id = s_ids[i];
|
||||
if (ReducerGradient::computeLength()) {
|
||||
reducers_[s_id].template fillGrad<FixedSize>(
|
||||
@ -1463,7 +1462,7 @@ class AbstractLengthsOp : public Operator<Context> {
|
||||
TData* out = output->template mutable_data<TData>();
|
||||
|
||||
int64_t dataIndex = 0;
|
||||
for (const auto rangeIndex : c10::irange(outputSize)) {
|
||||
for (int64_t rangeIndex = 0; rangeIndex < outputSize; ++rangeIndex) {
|
||||
Reducer reducer(ctx, out + out_block_size * rangeIndex, &context_);
|
||||
for (int64_t start = dataIndex; dataIndex < start + lengths[rangeIndex];
|
||||
++dataIndex) {
|
||||
@ -1552,7 +1551,7 @@ class AbstractLengthsGradientOp : public Operator<Context> {
|
||||
CAFFE_ENFORCE(segmentGradsInput.dim() > 0);
|
||||
CAFFE_ENFORCE(numSegments == segmentGradsInput.size(0));
|
||||
const TLengths* lengths = lengthsInput.template data<TLengths>();
|
||||
for (const auto i : c10::irange(numSegments)) {
|
||||
for (int64_t i = 0; i < numSegments; ++i) {
|
||||
reducedDataSize += lengths[i];
|
||||
}
|
||||
|
||||
@ -1581,7 +1580,7 @@ class AbstractLengthsGradientOp : public Operator<Context> {
|
||||
T* dataGrads = dataGradsOutput->template mutable_data<T>();
|
||||
|
||||
int64_t dataIndex = 0;
|
||||
for (const auto rangeIndex : c10::irange(numSegments)) {
|
||||
for (int64_t rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
|
||||
ReducerGradient reducer(
|
||||
ctx, segmentGrads + segmentBlockSize * rangeIndex, &context_);
|
||||
for (int64_t start = dataIndex; dataIndex < start + lengths[rangeIndex];
|
||||
@ -1691,7 +1690,7 @@ class AbstractLengthsWithMainInputGradientOp : public Operator<Context> {
|
||||
|
||||
const Tembedding* data = dataInput.template data<Tembedding>();
|
||||
int64_t dataIndex = 0;
|
||||
for (const auto rangeIndex : c10::irange(numSegments)) {
|
||||
for (int64_t rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
|
||||
ReducerGradient reducer(
|
||||
ctx, segmentGrads + segmentBlockSize * rangeIndex, &context_);
|
||||
for (int64_t start = dataIndex; dataIndex < start + lengths[rangeIndex];
|
||||
@ -1789,7 +1788,7 @@ class AbstractLengthsWithMainInputAndForwardOutputGradientOp
|
||||
const T* data = dataInput.template data<T>();
|
||||
|
||||
int64_t dataIndex = 0;
|
||||
for (const auto rangeIndex : c10::irange(numSegments)) {
|
||||
for (int64_t rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
|
||||
ReducerGradient reducer(
|
||||
ctx, segmentGrads + segmentBlockSize * rangeIndex, &context_);
|
||||
for (int64_t start = dataIndex; dataIndex < start + lengths[rangeIndex];
|
||||
|
@ -59,12 +59,12 @@ class SelfBinningHistogramOp final : public Operator<Context> {
|
||||
T max = 0;
|
||||
T min = 0;
|
||||
int64_t total_count = 0;
|
||||
for (const auto input_idx : c10::irange(InputSize())) {
|
||||
for (int input_idx = 0; input_idx < InputSize(); input_idx++) {
|
||||
const auto& x = Input(input_idx);
|
||||
const int64_t N = x.numel();
|
||||
total_count += N;
|
||||
const auto* x_data = x.template data<T>();
|
||||
for (const auto data_idx : c10::irange(N)) {
|
||||
for (int64_t data_idx = 0; data_idx < N; data_idx++) {
|
||||
const T val = this->abs_ ? abs(x_data[data_idx]) : x_data[data_idx];
|
||||
if (!first_seen) {
|
||||
max = val;
|
||||
@ -91,7 +91,7 @@ class SelfBinningHistogramOp final : public Operator<Context> {
|
||||
scaled_max = min + (max - min) * RANGE_SCALING;
|
||||
T scaled_range = (scaled_max - min);
|
||||
// Avoid underflow by calculating advancement through multiplication.
|
||||
for (const auto i : c10::irange(num_edges_)) {
|
||||
for (int i = 0; i < num_edges_; i++) {
|
||||
T advancement_ratio = T(i) / num_bins_;
|
||||
histogram_values_data[i] = min + advancement_ratio * scaled_range;
|
||||
}
|
||||
@ -112,7 +112,7 @@ class SelfBinningHistogramOp final : public Operator<Context> {
|
||||
T log_multiplier_numerator =log(scaled_max) - log(min);
|
||||
// Avoid underflow by:
|
||||
// - Calculating each advancement separately for each i.
|
||||
for (const auto i : c10::irange(num_edges_)) {
|
||||
for (int i = 0; i < num_edges_; i++) {
|
||||
T advancement_ratio = T(i)/num_bins_;
|
||||
histogram_values_data[i] = min * exp(log_multiplier_numerator * advancement_ratio);
|
||||
}
|
||||
@ -127,11 +127,11 @@ class SelfBinningHistogramOp final : public Operator<Context> {
|
||||
histogram_counts_data[0] = total_count;
|
||||
}
|
||||
else {
|
||||
for (const auto input_idx : c10::irange(InputSize())) {
|
||||
for (int input_idx = 0; input_idx < InputSize(); input_idx++) {
|
||||
const auto& x = Input(input_idx);
|
||||
const int64_t N = x.numel();
|
||||
const auto* x_data = x.template data<T>();
|
||||
for (const auto data_idx : c10::irange(N)) {
|
||||
for (int64_t data_idx = 0; data_idx < N; data_idx++) {
|
||||
const T val = this->abs_ ? abs(x_data[data_idx]) : x_data[data_idx];
|
||||
const auto bisection_it = std::upper_bound(
|
||||
histogram_values_data,
|
||||
@ -163,7 +163,7 @@ class SelfBinningHistogramOp final : public Operator<Context> {
|
||||
|
||||
void CheckInputs() {
|
||||
const auto& input_zero = Input(0);
|
||||
for (const auto i : c10::irange(1, InputSize())) {
|
||||
for (int i = 1; i < InputSize(); i++) {
|
||||
CAFFE_ENFORCE_EQ(
|
||||
Input(i).dtype(),
|
||||
input_zero.dtype(),
|
||||
|
@ -34,7 +34,7 @@ class ShapeOp : public Operator<Context> {
|
||||
auto* output = Output(0, {numAxes}, at::dtype<int64_t>());
|
||||
auto src = reinterpret_cast<const char*>(data.sizes().data());
|
||||
auto out = reinterpret_cast<char*>(output->template mutable_data<int64_t>());
|
||||
for (const auto i : c10::irange(numAxes)) {
|
||||
for (int i = 0; i < numAxes; i++) {
|
||||
auto axis = axes_[i];
|
||||
CAFFE_ENFORCE_LT(axis, numDims, "Axis out of range");
|
||||
CAFFE_ENFORCE_GE(axis, 0, "Each axis should be non-negative");
|
||||
|
@ -51,7 +51,7 @@ class SinusoidPositionEncodingOp : public Operator<Context> {
|
||||
float max_alpha_pow =
|
||||
((float)embedding_size_ - 1.0f) / (float)embedding_size_;
|
||||
|
||||
for (const auto i : c10::irange(M)) {
|
||||
for (int i = 0; i < M; ++i) {
|
||||
float pos = (float)idxs[i * K];
|
||||
|
||||
// Compute the embedding for position i, example 0 first
|
||||
@ -72,7 +72,7 @@ class SinusoidPositionEncodingOp : public Operator<Context> {
|
||||
row_array = amplitude_ * row_array.sin().eval();
|
||||
|
||||
// Copy the embedding to position i in the other examples
|
||||
for (const auto j : c10::irange(1, K)) {
|
||||
for (int j = 1; j < K; ++j) {
|
||||
int base = i * K * embedding_size_;
|
||||
std::copy(
|
||||
&out[base],
|
||||
|
@ -30,7 +30,7 @@ bool SliceImpl(
|
||||
std::vector<SIndex> ends_idx(data.dim());
|
||||
std::vector<SIndex> dst_sizes(data.dim());
|
||||
|
||||
for (const auto i : c10::irange(data.dim())) {
|
||||
for (int i = 0; i < data.dim(); ++i) {
|
||||
if (i >= starts.numel()) {
|
||||
starts_idx[i] = 0;
|
||||
ends_idx[i] = data.size(i);
|
||||
@ -78,7 +78,7 @@ bool SliceImpl(
|
||||
}
|
||||
// for now only supports slicing in 1 dimension
|
||||
int dim = -1;
|
||||
for (const auto i : c10::irange(data.dim())) {
|
||||
for (int i = 0; i < data.dim(); ++i) {
|
||||
if (starts_idx[i] > 0 || ends_idx[i] < data.size(i)) {
|
||||
CAFFE_ENFORCE_EQ(
|
||||
dim, -1, "Currently only possible to slice in 1 dimension.");
|
||||
@ -131,7 +131,7 @@ bool SliceImpl(
|
||||
|
||||
char* src_offset_bytes = src_bytes + itemsize * src_offset;
|
||||
char* dst_offset_bytes = dst_bytes;
|
||||
for (const auto i : c10::irange(num_blocks)) {
|
||||
for (size_t i = 0; i < num_blocks; ++i) {
|
||||
char* local_src_offset_bytes =
|
||||
src_offset_bytes + i * src_block_size_bytes;
|
||||
char* local_dst_offset_bytes =
|
||||
@ -177,7 +177,7 @@ bool SliceImpl(
|
||||
return true;
|
||||
}
|
||||
|
||||
for (const auto i : c10::irange(num_blocks)) {
|
||||
for (size_t i = 0; i < num_blocks; ++i) {
|
||||
char* local_src_offset_bytes =
|
||||
src_offset_bytes + i * src_block_size_bytes;
|
||||
char* local_dst_offset_bytes =
|
||||
|
@ -29,14 +29,14 @@ void spaceToBatch(
|
||||
const int input_height = input.dim32(2);
|
||||
const int input_width = input.dim32(3);
|
||||
|
||||
for (const auto out_b : c10::irange(output_batch)) {
|
||||
for (int out_b = 0; out_b < output_batch; ++out_b) {
|
||||
const int in_b = out_b % input_batch;
|
||||
const int offset_w = (out_b / input_batch) % block_size;
|
||||
const int offset_h = (out_b / input_batch) / block_size;
|
||||
for (const auto d : c10::irange(input_depth)) {
|
||||
for (const auto out_h : c10::irange(output_height)) {
|
||||
for (int d = 0; d < input_depth; ++d) {
|
||||
for (int out_h = 0; out_h < output_height; ++out_h) {
|
||||
const int in_h = out_h * block_size + offset_h - pad_t;
|
||||
for (const auto out_w : c10::irange(output_width)) {
|
||||
for (int out_w = 0; out_w < output_width; ++out_w) {
|
||||
const int in_w = out_w * block_size + offset_w - pad_l;
|
||||
const auto output_offset =
|
||||
((out_b * output_depth + d) * output_height + out_h) *
|
||||
@ -80,14 +80,14 @@ void batchToSpace(
|
||||
const int input_width = input.dim32(3);
|
||||
|
||||
CAFFE_ENFORCE(input_depth == output_depth);
|
||||
for (const auto in_b : c10::irange(input_batch)) {
|
||||
for (int in_b = 0; in_b < input_batch; ++in_b) {
|
||||
const int out_b = in_b % output_batch;
|
||||
const int offset_w = (in_b / output_batch) % block_size;
|
||||
const int offset_h = (in_b / output_batch) / block_size;
|
||||
for (const auto d : c10::irange(input_depth)) {
|
||||
for (const auto in_h : c10::irange(input_height)) {
|
||||
for (int d = 0; d < input_depth; ++d) {
|
||||
for (int in_h = 0; in_h < input_height; ++in_h) {
|
||||
const int out_h = in_h * block_size + offset_h - pad_t;
|
||||
for (const auto in_w : c10::irange(input_width)) {
|
||||
for (int in_w = 0; in_w < input_width; ++in_w) {
|
||||
const int out_w = in_w * block_size + offset_w - pad_l;
|
||||
if (out_h >= 0 && out_w >= 0 && out_h < output_height &&
|
||||
out_w < output_width) {
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <vector>
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/export_caffe2_op_to_c10.h"
|
||||
#include <c10/util/irange.h>
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/core/tensor.h"
|
||||
#include "caffe2/utils/math.h"
|
||||
@ -30,7 +29,7 @@ class SparseToDenseMaskBase : public Operator<Context> {
|
||||
auto biggest = *std::max_element(mask.begin(), mask.end());
|
||||
dense_.assign(std::min(kMaxDenseSize, biggest + 1), -1);
|
||||
// NOLINTNEXTLINE(clang-diagnostic-sign-compare)
|
||||
for (const auto i : c10::irange(mask.size())) {
|
||||
for (int i = 0; i < mask.size(); i++) {
|
||||
int64_t id = mask[i];
|
||||
CAFFE_ENFORCE_GE(id, 0, "Only positive IDs are allowed.");
|
||||
if (id >= kMaxDenseSize) {
|
||||
@ -156,7 +155,7 @@ class SparseToDenseMaskOp : public SparseToDenseMaskBase<Context> {
|
||||
}
|
||||
|
||||
int64_t offset = 0;
|
||||
for (const auto r : c10::irange(rows)) {
|
||||
for (int r = 0; r < rows; r++) {
|
||||
bool skippedSparseIndex = false;
|
||||
for (int c = 0; c < lengths_vec[r]; c++) {
|
||||
const auto sparse_index = sparse_indices_vec[offset + c];
|
||||
@ -273,7 +272,7 @@ class SparseToDenseMaskGradientOp : public SparseToDenseMaskBase<Context> {
|
||||
// SparseToDenseMask is not injective; gradient_used records
|
||||
// if the gradient is used for other input value from the same row
|
||||
vector<bool> gradient_used(cols, false);
|
||||
for (const auto r : c10::irange(rows)) {
|
||||
for (int r = 0; r < rows; r++) {
|
||||
std::fill(gradient_used.begin(), gradient_used.end(), false);
|
||||
for (int c = lengths_vec[r] - 1; c >= 0; c--) {
|
||||
int idx = this->getFeatureIdx(sparse_indices_vec[offset + c]);
|
||||
|
@ -89,7 +89,7 @@ class SparseToDenseOp final : public Operator<Context> {
|
||||
const auto block_nitems = sparse_values.size_from_dim(1);
|
||||
const TData* sparse_values_vec = sparse_values.template data<TData>();
|
||||
|
||||
for (const auto i : c10::irange(sparse_indices_len)) {
|
||||
for (int32_t i = 0; i < sparse_indices_len; i++) {
|
||||
const TInd idx = sparse_indices_vec[i];
|
||||
CAFFE_ENFORCE_GE(idx, 0);
|
||||
CAFFE_ENFORCE_LT(idx, output_first_dim);
|
||||
|
@ -41,7 +41,7 @@ class SquareRootDivideOp final : public Operator<Context> {
|
||||
auto* scalePtr = scale.template data<TScale>();
|
||||
auto* dataPtr = data.template data<TData>();
|
||||
auto* yPtr = Y->template mutable_data<TData>();
|
||||
for (const auto i : c10::irange(0U, batchSize)) {
|
||||
for (auto i = 0U; i < batchSize; ++i) {
|
||||
auto scale = scalePtr[i];
|
||||
CAFFE_ENFORCE(scale >= 0, scale, " < 0");
|
||||
auto multiplier = scale == 0 ? 1.0 : 1 / std::sqrt(scale);
|
||||
|
@ -20,7 +20,7 @@ struct ForEach {
|
||||
|
||||
template <typename In, typename Out, typename Context>
|
||||
bool operator()(int n, const In* in, Out* out, Context* /*c*/) {
|
||||
for (const auto i : c10::irange(n)) {
|
||||
for (int i = 0; i < n; ++i) {
|
||||
out[i] = functor(in[i]);
|
||||
}
|
||||
return true;
|
||||
|
@ -51,7 +51,7 @@ bool TensorProtosDBInput<Context>::Prefetch() {
|
||||
TensorProtos protos;
|
||||
CAFFE_ENFORCE(protos.ParseFromString(value_));
|
||||
CAFFE_ENFORCE(protos.protos_size() == OutputSize());
|
||||
for (const auto i : c10::irange(protos.protos_size())) {
|
||||
for (int i = 0; i < protos.protos_size(); ++i) {
|
||||
if (protos.protos(i).has_device_detail()) {
|
||||
protos.mutable_protos(i)->clear_device_detail();
|
||||
}
|
||||
@ -62,14 +62,14 @@ bool TensorProtosDBInput<Context>::Prefetch() {
|
||||
// CPU));
|
||||
}
|
||||
} else {
|
||||
for (const auto item_id : c10::irange(batch_size_)) {
|
||||
for (int item_id = 0; item_id < batch_size_; ++item_id) {
|
||||
reader.Read(&key_, &value_);
|
||||
TensorProtos protos;
|
||||
CAFFE_ENFORCE(protos.ParseFromString(value_));
|
||||
CAFFE_ENFORCE(protos.protos_size() == OutputSize());
|
||||
// Note: shape_inferred_ is ignored, we'll always get dimensions from
|
||||
// proto
|
||||
for (const auto i : c10::irange(protos.protos_size())) {
|
||||
for (int i = 0; i < protos.protos_size(); ++i) {
|
||||
vector<int64_t> dims(
|
||||
protos.protos(i).dims().begin(), protos.protos(i).dims().end());
|
||||
dims.insert(dims.begin(), batch_size_);
|
||||
@ -94,7 +94,7 @@ bool TensorProtosDBInput<Context>::Prefetch() {
|
||||
|
||||
template <class Context>
|
||||
bool TensorProtosDBInput<Context>::CopyPrefetched() {
|
||||
for (const auto i : c10::irange(OutputSize())) {
|
||||
for (int i = 0; i < OutputSize(); ++i) {
|
||||
OperatorBase::template Output<Tensor>(i, Context::GetDeviceType())
|
||||
->CopyFrom(
|
||||
prefetched_blobs_[i].template Get<TensorCPU>(), /* async */ true);
|
||||
|
@ -113,12 +113,12 @@ class TileOp final : public Operator<Context> {
|
||||
bool DoTile(const int outer_size, const int inner_size, const T* X, T* Y) {
|
||||
if (inner_size == 1) {
|
||||
EigenArrayMap<T> Y_arr(Y, tiles_, outer_size);
|
||||
for (const auto i : c10::irange(outer_size)) {
|
||||
for (int i = 0; i < outer_size; ++i) {
|
||||
Y_arr.col(i) = X[i];
|
||||
}
|
||||
} else {
|
||||
ConstEigenArrayMap<T> X_arr(X, inner_size, outer_size);
|
||||
for (const auto i : c10::irange(outer_size)) {
|
||||
for (int i = 0; i < outer_size; ++i) {
|
||||
EigenArrayMap<T>(Y + i * tiles_ * inner_size, inner_size, tiles_)
|
||||
.colwise() = X_arr.col(i);
|
||||
}
|
||||
@ -245,10 +245,10 @@ class TileGradientOp final : public Operator<Context> {
|
||||
dX,
|
||||
inner_size,
|
||||
&context_);
|
||||
for (const auto i : c10::irange(outer_size)) {
|
||||
for (int i = 0; i < outer_size; ++i) {
|
||||
const T* dY_ptr = dY + i * tiles_ * inner_size;
|
||||
T* dX_ptr = dX + i * inner_size;
|
||||
for (const auto j : c10::irange(1, tiles_)) {
|
||||
for (int j = 1; j < tiles_; ++j) {
|
||||
math::Add<T, Context>(
|
||||
inner_size, dX_ptr, dY_ptr + j * inner_size, dX_ptr, &context_);
|
||||
}
|
||||
|
@ -49,7 +49,7 @@ class TransposeOp : public Operator<Context> {
|
||||
}
|
||||
const at::IntArrayRef X_dims = X.sizes();
|
||||
std::vector<std::int64_t> Y_dims(ndim);
|
||||
for (const auto i : c10::irange(ndim)) {
|
||||
for (int i = 0; i < ndim; ++i) {
|
||||
Y_dims[i] = X_dims[axes_[i]];
|
||||
}
|
||||
Y->Resize(Y_dims);
|
||||
|
@ -127,7 +127,7 @@ class TTLinearOp final : public Operator<Context> {
|
||||
// Check that output size of Y is the element-wise product of out_sizes
|
||||
int prod_out_sizes = 1;
|
||||
// NOLINTNEXTLINE(clang-diagnostic-sign-compare)
|
||||
for (const auto i : c10::irange(out_sizes_.size())) {
|
||||
for (int i = 0; i < out_sizes_.size(); i++) {
|
||||
prod_out_sizes *= out_sizes_[i];
|
||||
}
|
||||
CAFFE_ENFORCE(
|
||||
|
@ -3,7 +3,6 @@
|
||||
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/export_caffe2_op_to_c10.h"
|
||||
#include <c10/util/irange.h>
|
||||
#include "caffe2/core/operator.h"
|
||||
|
||||
|
||||
@ -17,7 +16,7 @@ class UnsafeCoalesceOp final : public Operator<Context> {
|
||||
|
||||
bool RunOnDevice() override {
|
||||
size_t coalesced_size = 0;
|
||||
for (const auto i : c10::irange(InputSize())) {
|
||||
for (int i = 0; i < InputSize(); ++i) {
|
||||
// For now only float type is supported
|
||||
CAFFE_ENFORCE(
|
||||
Input(i).dtype().template Match<float>(),
|
||||
@ -25,14 +24,14 @@ class UnsafeCoalesceOp final : public Operator<Context> {
|
||||
i);
|
||||
}
|
||||
|
||||
for (const auto i : c10::irange(InputSize())) {
|
||||
for (int i = 0; i < InputSize(); ++i) {
|
||||
coalesced_size += Input(i).numel();
|
||||
}
|
||||
auto* coalesced = Output(OutputSize() - 1, coalesced_size, at::dtype<float>());
|
||||
auto coalesced_data = coalesced->template mutable_data<float>();
|
||||
|
||||
size_t coalesced_offset = 0;
|
||||
for (const auto i : c10::irange(InputSize())) {
|
||||
for (auto i = 0; i < InputSize(); ++i) {
|
||||
const auto num_elems = Input(i).numel();
|
||||
auto input_sizes = Input(i).sizes().vec();
|
||||
// Don't do anything if both tensors are already pointing on the same data
|
||||
|
@ -8,7 +8,6 @@
|
||||
#include "caffe2/core/common_omp.h"
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/export_caffe2_op_to_c10.h"
|
||||
#include <c10/util/irange.h>
|
||||
#include "caffe2/core/logging.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/core/types.h"
|
||||
@ -65,7 +64,7 @@ class IsNanOp final : public Operator<Context> {
|
||||
const auto* X_data = X.template data<T>();
|
||||
uint8_t* Y_data = Y->template mutable_data<uint8_t>();
|
||||
// NOLINTNEXTLINE(clang-diagnostic-sign-compare)
|
||||
for (const auto i : c10::irange(X.numel())) {
|
||||
for (size_t i = 0; i < X.numel(); i++) {
|
||||
Y_data[i] = (uint8_t)(std::isnan(X_data[i]));
|
||||
}
|
||||
return true;
|
||||
@ -300,7 +299,7 @@ class SumOp : public Operator<Context> {
|
||||
auto* output = Output(0, input0.sizes(), at::dtype<T>());
|
||||
T* output_data = output->template mutable_data<T>();
|
||||
// Dimension checking
|
||||
for (const auto i : c10::irange(1, InputSize())) {
|
||||
for (int i = 1; i < InputSize(); ++i) {
|
||||
if (output->sizes() != Input(i).sizes()) {
|
||||
CAFFE_THROW(
|
||||
"Check failed: output->sizes() == Input(i).sizes().",
|
||||
@ -321,7 +320,7 @@ class SumOp : public Operator<Context> {
|
||||
output_data,
|
||||
&context_);
|
||||
// Add remaining.
|
||||
for (const auto i : c10::irange(2, InputSize())) {
|
||||
for (int i = 2; i < InputSize(); ++i) {
|
||||
math::Add(
|
||||
output->numel(),
|
||||
output_data,
|
||||
@ -578,7 +577,7 @@ class ScatterWeightedSumOp : public Operator<Context> {
|
||||
float w0 = *weight0.template data<float>();
|
||||
// It's most likely a constant so exact comparison is fine
|
||||
if (w0 != 1.0) {
|
||||
for (const auto i : c10::irange(K)) {
|
||||
for (int i = 0; i < K; ++i) {
|
||||
Index idx = idxs[i];
|
||||
CAFFE_ENFORCE(
|
||||
0 <= idx && idx < N,
|
||||
@ -601,7 +600,7 @@ class ScatterWeightedSumOp : public Operator<Context> {
|
||||
CAFFE_ENFORCE_EQ(weight.numel(), 1);
|
||||
const T* x_data = X.template data<T>();
|
||||
float w = *weight.template data<float>();
|
||||
for (const auto i : c10::irange(K)) {
|
||||
for (int i = 0; i < K; ++i) {
|
||||
Index idx = idxs[i];
|
||||
// double-checking the indices, but it's fine as it's DCHECK only
|
||||
DCHECK(0 <= idx && idx < N)
|
||||
@ -747,7 +746,7 @@ class ScatterAssignOp : public Operator<Context> {
|
||||
int64_t N,
|
||||
int64_t K,
|
||||
int64_t block_size) {
|
||||
for (const auto i : c10::irange(K)) {
|
||||
for (int i = 0; i < K; ++i) {
|
||||
Index idx = idxs[i];
|
||||
// double-checking the indices, but it's fine as it's DCHECK only
|
||||
DCHECK(0 <= idx && idx < N)
|
||||
@ -839,9 +838,11 @@ class ScatterOp : public Operator<CPUContext> {
|
||||
// dst should have the same rank as idxs and src, but the dimension of dim
|
||||
// axis can be different. That is why in the above equation, there is the
|
||||
// difference of J_src and J_dst.
|
||||
for (const auto outer_batch : c10::irange(outer_dims_product)) {
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (const auto inner_batch : c10::irange(idxs_block_size)) {
|
||||
for (int64_t outer_batch = 0; outer_batch < outer_dims_product;
|
||||
++outer_batch) {
|
||||
for (int64_t i = 0; i < N; ++i) {
|
||||
for (int64_t inner_batch = 0; inner_batch < idxs_block_size;
|
||||
++inner_batch) {
|
||||
auto idxs_elem_idx =
|
||||
outer_batch * idxs_batch_size + i * idxs_block_size + inner_batch;
|
||||
auto src_elem_idx =
|
||||
@ -866,7 +867,7 @@ class ScatterOp : public Operator<CPUContext> {
|
||||
const IndexType* indices,
|
||||
int64_t n,
|
||||
IndexType indexing_axis_dim) {
|
||||
for (const auto i : c10::irange(n)) {
|
||||
for (auto i = 0; i < n; ++i) {
|
||||
auto idx = indices[i];
|
||||
CAFFE_ENFORCE(
|
||||
0 <= idx && idx < indexing_axis_dim,
|
||||
@ -899,7 +900,7 @@ class LengthsToSegmentIdsOp : public Operator<Context> {
|
||||
output->Resize(total_length);
|
||||
auto* output_data = output->template mutable_data<int32_t>();
|
||||
|
||||
for (const auto i : c10::irange(input.numel())) {
|
||||
for (int i = 0; i < input.numel(); ++i) {
|
||||
auto len = input_data[i];
|
||||
std::fill(output_data, output_data + len, i);
|
||||
output_data += len;
|
||||
@ -926,7 +927,7 @@ class LengthsToRangesOp : public Operator<Context> {
|
||||
auto* output_data = output->template mutable_data<int32_t>();
|
||||
|
||||
int32_t offset = 0;
|
||||
for (const auto i : c10::irange(size)) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
auto len = input_data[i];
|
||||
output_data[i * 2] = offset;
|
||||
output_data[i * 2 + 1] = len;
|
||||
@ -960,7 +961,7 @@ class LengthsToOffsetsOp : public Operator<Context> {
|
||||
auto* output_data = output->template mutable_data<int32_t>();
|
||||
|
||||
int32_t offset = 0;
|
||||
for (const auto i : c10::irange(size)) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
auto len = input_data[i];
|
||||
output_data[i] = offset;
|
||||
offset += len;
|
||||
@ -1017,7 +1018,7 @@ class SegmentIdsToLengthsOp : public Operator<Context> {
|
||||
}
|
||||
std::fill(output_data, output_data + num_segments, 0);
|
||||
Index prev = 0; // Assume that segment_id >= 0.
|
||||
for (const auto i : c10::irange(input_size)) {
|
||||
for (int64_t i = 0; i < input_size; i++) {
|
||||
CAFFE_ENFORCE(
|
||||
prev <= input_data[i],
|
||||
"Segment ids must be sorted: ",
|
||||
@ -1068,7 +1069,7 @@ class SegmentIdsToRangesOp : public Operator<Context> {
|
||||
}
|
||||
std::fill(output_data, output_data + num_segments * 2, 0);
|
||||
Index prev = input_data[0];
|
||||
for (const auto i : c10::irange(input_size)) {
|
||||
for (int64_t i = 0; i < input_size; i++) {
|
||||
CAFFE_ENFORCE(
|
||||
prev <= input_data[i],
|
||||
"Segment ids must be sorted: ",
|
||||
@ -1108,7 +1109,7 @@ class LengthsToWeightsOp : public Operator<Context> {
|
||||
auto* output = Output(0);
|
||||
|
||||
int64_t output_size = 0;
|
||||
for (const auto i : c10::irange(input_size)) {
|
||||
for (auto i = 0; i < input_size; i++) {
|
||||
CAFFE_ENFORCE_GE(input_data[i], 0, "unexpected negative length value");
|
||||
output_size += input_data[i];
|
||||
}
|
||||
@ -1131,7 +1132,7 @@ class LengthsToWeightsOp : public Operator<Context> {
|
||||
output->Resize(output_size);
|
||||
auto* output_data = output->template mutable_data<float>();
|
||||
int64_t cnt = 0;
|
||||
for (const auto i : c10::irange(input_size)) {
|
||||
for (auto i = 0; i < input_size; i++) {
|
||||
auto len = input_data[i];
|
||||
if (len == 0) {
|
||||
continue;
|
||||
@ -1158,7 +1159,7 @@ class HasElementsOp : public Operator<Context> {
|
||||
|
||||
bool RunOnDevice() override {
|
||||
bool res = false;
|
||||
for (const auto i : c10::irange(InputSize())) {
|
||||
for (auto i = 0; i < InputSize(); ++i) {
|
||||
const auto& input = Input(i);
|
||||
res = res || input.numel() > 0;
|
||||
}
|
||||
@ -1207,7 +1208,7 @@ class LengthsToShapeOp : public Operator<Context> {
|
||||
auto size = input.numel();
|
||||
auto first = input_data[0];
|
||||
|
||||
for (const auto i : c10::irange(1, size)) {
|
||||
for (int i = 1; i < size; i++) {
|
||||
CAFFE_ENFORCE(
|
||||
input_data[i] == first, "All elements of input must be same ");
|
||||
}
|
||||
@ -1254,7 +1255,7 @@ class GatherRangesOp : public Operator<Context> {
|
||||
size_t start = 0;
|
||||
size_t blockSize = ranges.size_from_dim(1);
|
||||
// NOLINTNEXTLINE(clang-diagnostic-sign-compare)
|
||||
for (const auto i : c10::irange(batchSize)) {
|
||||
for (size_t i = 0; i < batchSize; ++i) {
|
||||
auto end = start + blockSize;
|
||||
outputLengthsPtr[i] = accumulate(rangesData, start, end);
|
||||
start = end;
|
||||
@ -1328,7 +1329,7 @@ class LengthsGatherOp : public Operator<Context> {
|
||||
|
||||
int64_t total_length = 0;
|
||||
// NOLINTNEXTLINE(clang-diagnostic-sign-compare)
|
||||
for (const auto i : c10::irange(indices.numel())) {
|
||||
for (size_t i = 0; i < indices.numel(); ++i) {
|
||||
auto idx = indices_data[i];
|
||||
CAFFE_ENFORCE_LT(idx, lengths.numel());
|
||||
total_length += lengths_data[idx];
|
||||
@ -1340,7 +1341,7 @@ class LengthsGatherOp : public Operator<Context> {
|
||||
offsets_.clear();
|
||||
int64_t running_offset = 0;
|
||||
offsets_.reserve(lengths.numel());
|
||||
for (const auto i : c10::irange(lengths.numel())) {
|
||||
for (size_t i = 0; i < lengths.numel(); ++i) {
|
||||
offsets_.push_back(running_offset);
|
||||
running_offset += lengths_data[i];
|
||||
}
|
||||
@ -1354,7 +1355,7 @@ class LengthsGatherOp : public Operator<Context> {
|
||||
auto block_bytesize = block_size * items.itemsize();
|
||||
auto out = static_cast<char*>(output->raw_mutable_data(items.dtype()));
|
||||
|
||||
for (const auto i : c10::irange(indices.numel())) {
|
||||
for (size_t i = 0; i < indices.numel(); ++i) {
|
||||
auto idx = indices_data[i];
|
||||
auto length = lengths_data[idx];
|
||||
context_.CopyItemsSameDevice(
|
||||
@ -1405,7 +1406,7 @@ class AccumulateHistogramOp : public Operator<Context> {
|
||||
math::Set<int64_t, Context>(
|
||||
num_output_buckets_, 0, cur_hist_data, &context_);
|
||||
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
int bucket_index = -1;
|
||||
if (X_data[i] < lower_bound_) {
|
||||
bucket_index = 0;
|
||||
@ -1418,7 +1419,7 @@ class AccumulateHistogramOp : public Operator<Context> {
|
||||
accumulate_hist_[bucket_index] += 1;
|
||||
}
|
||||
|
||||
for (const auto i : c10::irange(num_output_buckets_)) {
|
||||
for (int i = 0; i < num_output_buckets_; i++) {
|
||||
acc_hist_data[i] = accumulate_hist_[i];
|
||||
}
|
||||
|
||||
@ -1463,7 +1464,7 @@ class RangeOp : public Operator<Context> {
|
||||
T start = 0;
|
||||
T step = 1;
|
||||
|
||||
for (const auto i : c10::irange(InputSize())) {
|
||||
for (int i = 0; i < InputSize(); ++i) {
|
||||
CAFFE_ENFORCE_EQ(
|
||||
Input(i).numel(), 1, "All inputs must be scalar/1D tensor.");
|
||||
}
|
||||
|
@ -17,7 +17,7 @@ void VariableLengthSequencePadding(
|
||||
const int32_t* seqLengths,
|
||||
const T padValue,
|
||||
Context* /*context*/) {
|
||||
for (const auto j : c10::irange(B)) {
|
||||
for (int j = 0; j < B; j++) {
|
||||
for (int i = seqLengths[j]; i < N; i++) {
|
||||
EigenVectorArrayMap<T>(X + B * M * i + M * j, M).setConstant(padValue);
|
||||
}
|
||||
|
@ -54,7 +54,7 @@ class ConcatAddMulReplaceNaNClipOp final : public Operator<Context> {
|
||||
}
|
||||
int before = 1, after = 1;
|
||||
vector<int64_t> output_dims(concat_input_0.sizes().vec());
|
||||
for (const auto i : c10::irange(concat_input_0.dim())) {
|
||||
for (int i = 0; i < concat_input_0.dim(); ++i) {
|
||||
if (i == canonical_axis) {
|
||||
continue;
|
||||
}
|
||||
@ -65,7 +65,7 @@ class ConcatAddMulReplaceNaNClipOp final : public Operator<Context> {
|
||||
after *= dim;
|
||||
}
|
||||
// check the input dims are compatible.
|
||||
for (const auto j : c10::irange(concat_input_start, InputSize())) {
|
||||
for (int j = concat_input_start; j < InputSize(); ++j) {
|
||||
int dim_j = Input(j).dim32(i);
|
||||
CAFFE_ENFORCE(
|
||||
dim == dim_j,
|
||||
@ -93,7 +93,7 @@ class ConcatAddMulReplaceNaNClipOp final : public Operator<Context> {
|
||||
"Cannot handle fused concat with dim > 2, please update your fusion logic");
|
||||
|
||||
int output_channels = 0;
|
||||
for (const auto i : c10::irange(concat_input_start, InputSize())) {
|
||||
for (int i = concat_input_start; i < InputSize(); ++i) {
|
||||
axis_data[i - concat_input_start] = Input(i).dim32(canonical_axis);
|
||||
output_channels += Input(i).dim32(canonical_axis);
|
||||
}
|
||||
@ -101,7 +101,7 @@ class ConcatAddMulReplaceNaNClipOp final : public Operator<Context> {
|
||||
auto* output = Output(0, output_dims, at::dtype<float>());
|
||||
|
||||
size_t output_offset = 0;
|
||||
for (const auto i : c10::irange(concat_input_start, InputSize())) {
|
||||
for (int i = concat_input_start; i < InputSize(); ++i) {
|
||||
auto& input = Input(i);
|
||||
auto axis_dim = input.dim32(canonical_axis);
|
||||
math::CopyMatrix<Context>(
|
||||
@ -127,7 +127,7 @@ class ConcatAddMulReplaceNaNClipOp final : public Operator<Context> {
|
||||
const auto _zeros = _mm256_set1_ps(0.f);
|
||||
|
||||
output_offset = 0;
|
||||
for (const auto outer : c10::irange(before)) {
|
||||
for (auto outer = 0; outer < before; ++outer) {
|
||||
auto axis_dim = output->dim32(canonical_axis);
|
||||
size_t inner_size = axis_dim * after;
|
||||
auto inner = 0;
|
||||
@ -148,7 +148,7 @@ class ConcatAddMulReplaceNaNClipOp final : public Operator<Context> {
|
||||
_mm256_storeu_ps(&output_data[output_offset + inner], out_val);
|
||||
}
|
||||
|
||||
for (const auto inner_omp : c10::irange(inner, inner_size)) {
|
||||
for (auto inner_omp = inner; inner_omp < inner_size; ++inner_omp) {
|
||||
float elem = output_data[output_offset + inner_omp];
|
||||
float add_elem = add_input_data[inner_omp];
|
||||
float mul_elem = mul_input_data[inner_omp];
|
||||
|
@ -1,5 +1,4 @@
|
||||
#pragma once
|
||||
#include "c10/util/irange.h"
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
@ -21,7 +20,8 @@ struct ASTExpr {
|
||||
return starInputsFlag;
|
||||
}
|
||||
void dump(int level = 0) const {
|
||||
for (const auto i : c10::irange(level))std::cout << " ";
|
||||
for (int i = 0; i < level; i++)
|
||||
std::cout << " ";
|
||||
if (!isCall())
|
||||
std::cout << "Var: " << name << std::endl;
|
||||
else {
|
||||
@ -41,7 +41,8 @@ struct ASTStmt {
|
||||
delete rhs;
|
||||
}
|
||||
void dump(int level = 0) const {
|
||||
for (const auto i : c10::irange(level))std::cout << " ";
|
||||
for (int i = 0; i < level; i++)
|
||||
std::cout << " ";
|
||||
std::cout << "LHS:" << std::endl;
|
||||
for (auto s : lhs) {
|
||||
for (int i = 0; i < level + 1; i++)
|
||||
|
@ -6,7 +6,6 @@
|
||||
|
||||
#include <c10/util/Exception.h>
|
||||
#include <c10/util/SmallVector.h>
|
||||
#include <c10/util/irange.h>
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/logging.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
@ -139,7 +138,7 @@ class OnnxifiOp final : public Operator<Context> {
|
||||
|
||||
if (use_passed_output_shapes_) {
|
||||
// Populate output_shapes_per_bs_
|
||||
for (const auto bs : c10::irange(1, max_batch_size_)) {
|
||||
for (int bs = 1; bs < max_batch_size_; ++bs) {
|
||||
auto output_shapes_tp = helper.GetRepeatedArgument<TensorProto>("output_shapes_bs_" + caffe2::to_string(bs));
|
||||
auto output_qshapes_tp = helper.GetRepeatedArgument<TensorProto>("output_qshapes_bs_" + caffe2::to_string(bs));
|
||||
CAFFE_ENFORCE_EQ(output_names_.size(), output_shapes_tp.size() + output_qshapes_tp.size());
|
||||
@ -268,7 +267,7 @@ class OnnxifiOp final : public Operator<Context> {
|
||||
ONNXIFI_STATUS_SUCCESS);
|
||||
|
||||
// Release unused backend ids.
|
||||
for (const auto i : c10::irange(num_backends)) {
|
||||
for (size_t i = 0; i < num_backends; ++i) {
|
||||
if (i == static_cast<size_t>(backend_index)) {
|
||||
continue;
|
||||
}
|
||||
@ -288,7 +287,7 @@ class OnnxifiOp final : public Operator<Context> {
|
||||
|
||||
// Extra weight shapes
|
||||
std::unordered_map<std::string, ShapeInfo> weight_shape_info;
|
||||
for (const auto i : c10::irange(weight_names.size())) {
|
||||
for (size_t i = 0; i < weight_names.size(); ++i) {
|
||||
TensorShape shape;
|
||||
const auto& shape0 = weight_shapes[i];
|
||||
for (const auto d : shape0) {
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
#include <c10/util/Half.h>
|
||||
#include <c10/util/irange.h>
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
@ -27,7 +26,7 @@ static inline void adagrad_update_base_inlined(
|
||||
float epsilon,
|
||||
float lr,
|
||||
float weight_decay = 0.f) {
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (auto i = 0; i < N; ++i) {
|
||||
float gi = std::fma(weight_decay, w[i], g[i]);
|
||||
float hi = decay * h[i] + gi * gi;
|
||||
nh[i] = hi;
|
||||
|
@ -2,7 +2,6 @@
|
||||
#include <string.h>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include "c10/util/irange.h"
|
||||
#include "caffe2/utils/conversions.h"
|
||||
|
||||
#if (ENABLE_VECTORIZATION > 0) && !defined(_DEBUG) && !defined(DEBUG)
|
||||
@ -54,7 +53,7 @@ inline void LstmUnitImpl(
|
||||
T* H,
|
||||
const float forget_bias) {
|
||||
const T forgetBias = convert::To<float, T>(forget_bias);
|
||||
for (const auto n : c10::irange(N)) {
|
||||
for (int n = 0; n < N; ++n) {
|
||||
const bool valid = seqLengths == nullptr || t < seqLengths[n];
|
||||
if (!valid) {
|
||||
if (drop_states) {
|
||||
@ -68,7 +67,7 @@ inline void LstmUnitImpl(
|
||||
const T* X_D = &X[D];
|
||||
const T* X_2D = &X[2 * D];
|
||||
const T* X_3D = &X[3 * D];
|
||||
VECTOR_LOOP for (const auto d : c10::irange(D)) {
|
||||
VECTOR_LOOP for (int d = 0; d < D; ++d) {
|
||||
const T i = sigmoid(X[d]);
|
||||
const T f = sigmoid(X_D[d] + forgetBias);
|
||||
const T o = sigmoid(X_2D[d]);
|
||||
@ -106,7 +105,7 @@ inline void LstmUnitGradientImpl(
|
||||
T* X_diff,
|
||||
const float forget_bias) {
|
||||
const T localForgetBias = convert::To<float, T>(forget_bias);
|
||||
for (const auto n : c10::irange(N)) {
|
||||
for (int n = 0; n < N; ++n) {
|
||||
const bool valid = seqLengths == nullptr || t < seqLengths[n];
|
||||
|
||||
if (!valid) {
|
||||
@ -119,7 +118,7 @@ inline void LstmUnitGradientImpl(
|
||||
}
|
||||
memset(X_diff, 0, 4 * sizeof(T) * D);
|
||||
} else {
|
||||
VECTOR_LOOP for (const auto d : c10::irange(D)) {
|
||||
VECTOR_LOOP for (int d = 0; d < D; ++d) {
|
||||
T* c_prev_diff = C_prev_diff + d;
|
||||
T* h_prev_diff = H_prev_diff + d;
|
||||
T* i_diff = X_diff + d;
|
||||
|
@ -59,12 +59,12 @@ class DataNetFiller : public Filler {
|
||||
: init_net_(init_net), data_net_(data_net) {
|
||||
// The output of the data_net_ will be served as the input
|
||||
int op_size = data_net_.op_size();
|
||||
for (const auto i : c10::irange(op_size)) {
|
||||
for (int i = 0; i < op_size; ++i) {
|
||||
OperatorDef op_def = data_net_.op(i);
|
||||
// We rely on Fill op to generate inputs
|
||||
CAFFE_ENFORCE(op_def.type().find("Fill") != std::string::npos);
|
||||
int output_size = op_def.output_size();
|
||||
for (const auto j : c10::irange(output_size)) {
|
||||
for (int j = 0; j < output_size; ++j) {
|
||||
input_names_.push_back(op_def.output(j));
|
||||
}
|
||||
}
|
||||
@ -105,7 +105,7 @@ class DataRandomFiller : public Filler {
|
||||
int input_index,
|
||||
const std::vector<std::vector<int64_t>>& input_dims) {
|
||||
Workspace ws;
|
||||
for (const auto i : c10::irange(op_def.input_size())) {
|
||||
for (int i = 0; i < op_def.input_size(); ++i) {
|
||||
// CreateOperator requires all input blobs present
|
||||
ws.CreateBlob(op_def.input(i));
|
||||
}
|
||||
|
@ -153,12 +153,12 @@ class TensorFetcher : public BlobFetcherBase {
|
||||
if (numpy_type == NPY_OBJECT) {
|
||||
PyObject** outObj = reinterpret_cast<PyObject**>(outPtr);
|
||||
auto* str = tensor.template data<std::string>();
|
||||
for (const auto i : c10::irange(tensor.numel())) {
|
||||
for (int i = 0; i < tensor.numel(); ++i) {
|
||||
outObj[i] = PyBytes_FromStringAndSize(str->data(), str->size());
|
||||
str++;
|
||||
// cleanup on failure
|
||||
if (outObj[i] == nullptr) {
|
||||
for (const auto j : c10::irange(i)) {
|
||||
for (int j = 0; j < i; ++j) {
|
||||
Py_DECREF(outObj[j]);
|
||||
}
|
||||
CAFFE_THROW("Failed to allocate string for ndarray of strings.");
|
||||
@ -212,7 +212,7 @@ class TensorFeeder : public BlobFeederBase {
|
||||
int ndim = PyArray_NDIM(array);
|
||||
npy_intp* npy_dims = PyArray_DIMS(array);
|
||||
std::vector<int64_t> dims;
|
||||
for (const auto i : c10::irange(ndim)) {
|
||||
for (int i = 0; i < ndim; ++i) {
|
||||
dims.push_back(npy_dims[i]);
|
||||
}
|
||||
|
||||
@ -229,7 +229,7 @@ class TensorFeeder : public BlobFeederBase {
|
||||
dims, at::dtype<std::string>().device(Context::GetDeviceType()));
|
||||
}
|
||||
auto* outPtr = tensor.template mutable_data<std::string>();
|
||||
for (const auto i : c10::irange(tensor.numel())) {
|
||||
for (int i = 0; i < tensor.numel(); ++i) {
|
||||
char* str;
|
||||
Py_ssize_t strSize;
|
||||
if (PyBytes_Check(input[i])) {
|
||||
@ -375,7 +375,7 @@ class PythonOpBase : public Operator<Context> {
|
||||
|
||||
std::vector<py::object> inputs;
|
||||
inputs.reserve(InputSize());
|
||||
for (const auto i : c10::irange(InputSize())) {
|
||||
for (auto i = 0; i < InputSize(); ++i) {
|
||||
const auto* blob = &InputBlob(i);
|
||||
// Allow CPU tensors in addition to operator context's tensors
|
||||
py::object py_obj;
|
||||
@ -395,7 +395,7 @@ class PythonOpBase : public Operator<Context> {
|
||||
}
|
||||
std::vector<py::object> outputs;
|
||||
outputs.reserve(OutputSize());
|
||||
for (const auto i : c10::irange(OutputSize())) {
|
||||
for (auto i = 0; i < OutputSize(); ++i) {
|
||||
auto* blob = OutputBlob(i);
|
||||
|
||||
// Python op is always used with CPUContext only and treats inputs and
|
||||
|
@ -127,7 +127,7 @@ class BinaryElementwiseDNNLowPOp : public DNNLowPOp<T, FP32_OP> {
|
||||
size_t n, \
|
||||
size_t post, \
|
||||
CPUContext*) { \
|
||||
for (const auto i : c10::irange(pre)) { \
|
||||
for (int i = 0; i < pre; ++i) { \
|
||||
EigenArrayMap<R>(out + i * n * post, post, n) = eigen_op( \
|
||||
(ConstEigenArrayMap<T>(a + i * n * post, post, n).rowwise()), \
|
||||
(Eigen::Map<const Eigen::Array<T, 1, Eigen::Dynamic>>(b, n))); \
|
||||
|
@ -50,7 +50,7 @@ static void Im2ColNCHW(
|
||||
auto* dst = data_col + nip * (kernel_h * kernel_w * output_h * output_w) +
|
||||
kh * (kernel_w * output_h * output_w) + kw * (output_h * output_w);
|
||||
const auto* src = data_im + nip * (height * width);
|
||||
for (const auto y : c10::irange(output_h)) {
|
||||
for (auto y = 0; y < output_h; y++) {
|
||||
const auto iy = y * stride_h + kh;
|
||||
const auto ix = kw;
|
||||
if (stride_w == 1) {
|
||||
@ -59,7 +59,7 @@ static void Im2ColNCHW(
|
||||
src + (iy * width + ix),
|
||||
sizeof(T) * output_w);
|
||||
} else {
|
||||
for (const auto x : c10::irange(output_w)) {
|
||||
for (auto x = 0; x < output_w; x++) {
|
||||
memcpy(
|
||||
dst + (y * output_w + x),
|
||||
src + (iy * width + ix + x * stride_w),
|
||||
@ -78,8 +78,8 @@ static void Im2ColNCHW(
|
||||
const int pad_w = pad_l;
|
||||
const int channel_size = height * width;
|
||||
for (int channel = channels; channel--; data_im += channel_size) {
|
||||
for (const auto kernel_row : c10::irange(kernel_h)) {
|
||||
for (const auto kernel_col : c10::irange(kernel_w)) {
|
||||
for (int kernel_row = 0; kernel_row < kernel_h; kernel_row++) {
|
||||
for (int kernel_col = 0; kernel_col < kernel_w; kernel_col++) {
|
||||
int input_row = -pad_h + kernel_row * dilation_h;
|
||||
for (int output_rows = output_h; output_rows; output_rows--) {
|
||||
if (!utils::IsAGeZeroAndALtB(input_row, height)) {
|
||||
@ -113,12 +113,12 @@ static void Im2ColNCHW(
|
||||
int width_col = (width + pad_l + pad_r - dkernel_w) / stride_w + 1;
|
||||
|
||||
int channels_col = channels * kernel_h * kernel_w;
|
||||
for (const auto c : c10::irange(channels_col)) {
|
||||
for (int c = 0; c < channels_col; ++c) {
|
||||
int w_offset = c % kernel_w;
|
||||
int h_offset = (c / kernel_w) % kernel_h;
|
||||
int c_im = c / kernel_h / kernel_w;
|
||||
for (const auto h : c10::irange(height_col)) {
|
||||
for (const auto w : c10::irange(width_col)) {
|
||||
for (int h = 0; h < height_col; ++h) {
|
||||
for (int w = 0; w < width_col; ++w) {
|
||||
int h_pad = h * stride_h - pad_t + h_offset * dilation_h;
|
||||
int w_pad = w * stride_w - pad_l + w_offset * dilation_w;
|
||||
if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
|
||||
@ -152,20 +152,20 @@ static void Im2ColNdNCHW(
|
||||
kernel_shape, kernel_shape + N, 1, std::multiplies<int>());
|
||||
std::vector<int> d_offset(N, 0);
|
||||
std::vector<int> d_iter(N, 0);
|
||||
for (const auto i : c10::irange(outer_size)) {
|
||||
for (int i = 0; i < outer_size; ++i) {
|
||||
// Loop over spatial axes in reverse order to compute a per-axis offset.
|
||||
int offset = i;
|
||||
for (int d_i = N - 1; d_i >= 0; --d_i) {
|
||||
d_offset[d_i] = offset % kernel_shape[d_i];
|
||||
offset /= kernel_shape[d_i];
|
||||
}
|
||||
for (const auto j : c10::irange(inner_size)) {
|
||||
for (int j = 0; j < inner_size; ++j) {
|
||||
// Loop over spatial axes in forward order to compute the indices in the
|
||||
// image and column, and whether the index lies in the padding.
|
||||
const int col_index = i * inner_size + j;
|
||||
int img_index = i / kernel_size;
|
||||
bool is_padding = false;
|
||||
for (const auto d_i : c10::irange(N)) {
|
||||
for (int d_i = 0; d_i < N; ++d_i) {
|
||||
const int d_img = d_iter[d_i] * stride[d_i] - pad[d_i] +
|
||||
d_offset[d_i] * dilation[d_i];
|
||||
is_padding |= d_img < 0 || d_img >= img_shape[d_i + 1];
|
||||
@ -216,13 +216,13 @@ static void Im2ColNHWC(
|
||||
T* data_col_temp =
|
||||
data_col + h * width_col * kernel_h * kernel_w * channels;
|
||||
int w_pad = -pad_l;
|
||||
for (const auto w : c10::irange(width_col)) {
|
||||
for (int w = 0; w < width_col; ++w) {
|
||||
int r = 0;
|
||||
for (int ih = h_pad; ih < h_pad + dkernel_h; ih += dilation_h, ++r) {
|
||||
int s = 0;
|
||||
for (int iw = w_pad; iw < w_pad + dkernel_w; iw += dilation_w, ++s) {
|
||||
if (ih >= 0 && ih < height && iw >= 0 && iw < width) {
|
||||
for (const auto g : c10::irange(groups)) {
|
||||
for (int g = 0; g < groups; ++g) {
|
||||
memcpy(
|
||||
data_col_temp +
|
||||
((g * kernel_h + r) * kernel_w + s) * (channels / groups),
|
||||
@ -232,7 +232,7 @@ static void Im2ColNHWC(
|
||||
}
|
||||
} else {
|
||||
// This should be simply padded with zero.
|
||||
for (const auto g : c10::irange(groups)) {
|
||||
for (int g = 0; g < groups; ++g) {
|
||||
for (int i = 0; i < channels / groups; ++i) {
|
||||
data_col_temp
|
||||
[(((g * kernel_h + r) * kernel_w) + s) *
|
||||
@ -293,12 +293,12 @@ static void Im2Col3DNHWC(
|
||||
#endif
|
||||
for (int t = 0; t < frame_col; ++t) {
|
||||
int t_pad = -pad_p + t * stride_t;
|
||||
for (const auto h : c10::irange(height_col)) {
|
||||
for (int h = 0; h < height_col; ++h) {
|
||||
int h_pad = -pad_t + h * stride_h;
|
||||
T* data_col_temp = data_col +
|
||||
(t * height_col + h) * width_col * kernel_t * kernel_h * kernel_w *
|
||||
channels;
|
||||
for (const auto w : c10::irange(width_col)) {
|
||||
for (int w = 0; w < width_col; ++w) {
|
||||
int w_pad = -pad_l + w * stride_w;
|
||||
int q = 0;
|
||||
for (int it = t_pad; it < t_pad + dkernel_t; it += dilation_t, ++q) {
|
||||
@ -309,7 +309,7 @@ static void Im2Col3DNHWC(
|
||||
iw += dilation_w, ++s) {
|
||||
if (it >= 0 && it < num_frames && ih >= 0 && ih < height &&
|
||||
iw >= 0 && iw < width) {
|
||||
for (const auto g : c10::irange(groups)) {
|
||||
for (int g = 0; g < groups; ++g) {
|
||||
memcpy(
|
||||
data_col_temp +
|
||||
(((g * kernel_t + q) * kernel_h + r) * kernel_w + s) *
|
||||
@ -320,7 +320,7 @@ static void Im2Col3DNHWC(
|
||||
}
|
||||
} else {
|
||||
// This should be simply padded with zero.
|
||||
for (const auto g : c10::irange(groups)) {
|
||||
for (int g = 0; g < groups; ++g) {
|
||||
for (int i = 0; i < channels / groups; ++i) {
|
||||
data_col_temp
|
||||
[((((g * kernel_t + q) * kernel_h + r) * kernel_w) +
|
||||
|
@ -36,8 +36,8 @@ void StoreMatrixInMatrixMarketFormat(
|
||||
}
|
||||
fprintf(fp, "%d %d\n", m, n);
|
||||
// matrix market array format uses column-major order
|
||||
for (const auto j : c10::irange(n)) {
|
||||
for (const auto i : c10::irange(m)) {
|
||||
for (int j = 0; j < n; ++j) {
|
||||
for (int i = 0; i < m; ++i) {
|
||||
if (is_integral<T>::value) {
|
||||
// NOLINTNEXTLINE(clang-analyzer-core.NullDereference)
|
||||
fprintf(fp, "%d\n", static_cast<int>(a[j * m + i]));
|
||||
|
@ -54,7 +54,7 @@ class GatherDNNLowPOp final : public GatherOp<CPUContext> {
|
||||
const Index* idxs = indices.template data<Index>();
|
||||
auto out = static_cast<char*>(output->raw_mutable_data(data.dtype()));
|
||||
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (int i = 0; i < N; ++i) {
|
||||
auto idx = idxs[i];
|
||||
CAFFE_ENFORCE(
|
||||
0 <= idx && idx < data.size(0),
|
||||
|
@ -149,7 +149,7 @@ class SafeDequeueBlobsOp final : public Operator<Context> {
|
||||
}
|
||||
|
||||
const int kTensorGrowthPct = 40;
|
||||
for (const auto i : c10::irange(numRecords_)) {
|
||||
for (int i = 0; i < numRecords_; ++i) {
|
||||
if (!queue->blockingRead(blobPtrs_)) {
|
||||
// if we read at least one record, status is still true
|
||||
return i > 0;
|
||||
|
@ -32,7 +32,7 @@ class EnqueueRebatchingQueueOp : public Operator<CPUContext> {
|
||||
CAFFE_ENFORCE_EQ(InputSize(), queue->numBlobs() + 1);
|
||||
std::vector<const Tensor*> inputTensors;
|
||||
inputTensors.reserve(InputSize() - 1);
|
||||
for (const auto i : c10::irange(1, InputSize())) {
|
||||
for (int i = 1; i < InputSize(); ++i) {
|
||||
inputTensors.push_back(&Input(i));
|
||||
}
|
||||
|
||||
@ -56,7 +56,7 @@ class DequeueRebatchingQueueOp : public Operator<CPUContext> {
|
||||
|
||||
std::vector<Tensor*> outputTensors;
|
||||
outputTensors.reserve(OutputSize());
|
||||
for (const auto i : c10::irange(OutputSize())) {
|
||||
for (int i = 0; i < OutputSize(); ++i) {
|
||||
outputTensors.push_back(Output(i));
|
||||
}
|
||||
|
||||
|
@ -18,7 +18,7 @@ void AdadeltaUpdate(
|
||||
float* nh,
|
||||
float* nd,
|
||||
Context* /*context*/) {
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (int i = 0; i < N; ++i) {
|
||||
float gi = g[i];
|
||||
float di = d[i];
|
||||
float hi = nh[i] = decay * h[i] + (1.0f - decay) * gi * gi;
|
||||
@ -120,7 +120,7 @@ class SparseAdadeltaOp final : public Operator<Context> {
|
||||
}
|
||||
|
||||
auto block_size = Input(GRAD).numel() / n;
|
||||
for (const auto i : c10::irange(n)) {
|
||||
for (int i = 0; i < n; ++i) {
|
||||
auto idx = indices[i];
|
||||
if (block_size == 1) {
|
||||
float gi = gradIn[i];
|
||||
|
@ -82,8 +82,8 @@ class SparseAdagradFusedWithSparseLengthsSumGradientOp final
|
||||
auto* grad_buffer_data =
|
||||
is_mean ? grad_buffer_.template mutable_data<T>() : NULL;
|
||||
if (is_mean) {
|
||||
for (const auto rangeIndex : c10::irange(numSegments)) {
|
||||
for (const auto tmpIndex : c10::irange(block_size)) {
|
||||
for (auto rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
|
||||
for (auto tmpIndex = 0; tmpIndex < block_size; ++tmpIndex) {
|
||||
auto offsetI = rangeIndex * block_size;
|
||||
grad_buffer_data[offsetI + tmpIndex] = lengths[rangeIndex] > 0
|
||||
? gradIn[offsetI + tmpIndex] / lengths[rangeIndex]
|
||||
@ -92,7 +92,7 @@ class SparseAdagradFusedWithSparseLengthsSumGradientOp final
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto rangeIndex : c10::irange(numSegments)) {
|
||||
for (auto rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
|
||||
for (auto start = dataIndex; dataIndex < start + lengths[rangeIndex];
|
||||
++dataIndex) {
|
||||
std::size_t idx = indices[dataIndex];
|
||||
@ -243,7 +243,7 @@ class SparseAdagradFusedWithSparseLengthsWeightedSumGradientOp final
|
||||
// ignores this dependency and fuses these two loops.
|
||||
std::vector<T> temp_grad(block_size);
|
||||
int dataIndex = 0;
|
||||
for (const auto rangeIndex : c10::irange(numSegments)) {
|
||||
for (auto rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
|
||||
for (auto start = dataIndex; dataIndex < start + lengths[rangeIndex];
|
||||
++dataIndex) {
|
||||
std::size_t idx = indices[dataIndex];
|
||||
@ -277,7 +277,7 @@ class SparseAdagradFusedWithSparseLengthsWeightedSumGradientOp final
|
||||
CAFFE_ENFORCE_EQ(dataIndex, n);
|
||||
|
||||
dataIndex = 0;
|
||||
for (const auto rangeIndex : c10::irange(numSegments)) {
|
||||
for (auto rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
|
||||
for (auto start = dataIndex; dataIndex < start + lengths[rangeIndex];
|
||||
++dataIndex) {
|
||||
std::size_t idx = indices[dataIndex];
|
||||
@ -285,7 +285,7 @@ class SparseAdagradFusedWithSparseLengthsWeightedSumGradientOp final
|
||||
auto offsetIdx = idx * block_size;
|
||||
auto localOffset = dataIndex - start;
|
||||
|
||||
for (const auto i : c10::irange(block_size)) {
|
||||
for (int i = 0; i < block_size; ++i) {
|
||||
temp_grad[i] = auxParamIn[localOffset] * gradIn[offsetI + i];
|
||||
}
|
||||
|
||||
@ -409,7 +409,7 @@ class SparseAdagradFusedWithSparseLengthsWeightedSumGradientApproxOp final
|
||||
|
||||
std::vector<T> temp_grad(block_size);
|
||||
int dataIndex = 0;
|
||||
for (const auto rangeIndex : c10::irange(numSegments)) {
|
||||
for (auto rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
|
||||
for (auto start = dataIndex; dataIndex < start + lengths[rangeIndex];
|
||||
++dataIndex) {
|
||||
std::size_t idx = indices[dataIndex];
|
||||
@ -440,7 +440,7 @@ class SparseAdagradFusedWithSparseLengthsWeightedSumGradientApproxOp final
|
||||
auxGrad + dataIndex,
|
||||
&context_);
|
||||
|
||||
for (const auto i : c10::irange(block_size)) {
|
||||
for (int i = 0; i < block_size; ++i) {
|
||||
temp_grad[i] = auxParamIn[localOffset] * gradIn[offsetI + i];
|
||||
}
|
||||
|
||||
|
@ -39,7 +39,7 @@ void adagrad_update_output_effective_lr(
|
||||
const float* lr,
|
||||
Context* /*context*/,
|
||||
float weight_decay = 0.f) {
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (auto i = 0; i < N; ++i) {
|
||||
float grad = std::fma(weight_decay, paramIn[i], gradIn[i]);
|
||||
float moment = momentOut[i] = decay * momentIn[i] + grad * grad;
|
||||
float effective_lr = effectiveLROut[i] =
|
||||
@ -63,7 +63,7 @@ void adagrad_update_output_effective_lr_and_update(
|
||||
const float* lr,
|
||||
Context* /*context*/,
|
||||
float weight_decay = 0.f) {
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (auto i = 0; i < N; ++i) {
|
||||
float grad = std::fma(weight_decay, paramIn[i], gradIn[i]);
|
||||
float moment = momentOut[i] = decay * momentIn[i] + grad * grad;
|
||||
float effective_lr = effectiveLROut[i] =
|
||||
@ -300,7 +300,7 @@ class SparseAdagradOp final : public Operator<CPUContext> {
|
||||
const auto* momentIn = Input(MOMENT_1).template data<float>();
|
||||
|
||||
std::vector<float> grad(block_size);
|
||||
for (const auto i : c10::irange(n)) {
|
||||
for (auto i = 0; i < n; ++i) {
|
||||
auto idx = indices[i];
|
||||
auto offsetI = i * block_size;
|
||||
auto offsetIdx = idx * block_size;
|
||||
@ -504,7 +504,7 @@ class RowWiseSparseAdagradOp final : public Operator<Context> {
|
||||
#else
|
||||
VLOG(1) << "using plain adagrad updates in RowWiseSparseAdagradOp";
|
||||
|
||||
for (const auto i : c10::irange(n)) {
|
||||
for (auto i = 0; i < n; ++i) {
|
||||
auto idx = indices[i];
|
||||
float freq = (counter_halflife_ > 0 && count[idx] > 0)
|
||||
? counter_halflife_ / count[idx]
|
||||
@ -542,13 +542,13 @@ class RowWiseSparseAdagradOp final : public Operator<Context> {
|
||||
const float* g = gradIn + offsetI;
|
||||
float* h = moment + idx;
|
||||
float hs = 0.;
|
||||
for (const auto j : c10::irange(block_size)) {
|
||||
for (auto j = 0; j < block_size; ++j) {
|
||||
float gj = std::fma(weight_decay_ * freq, w[j], g[j]);
|
||||
hs += gj * gj;
|
||||
}
|
||||
float hi = h[0] = h[0] + hs / block_size;
|
||||
float step = lr[0] / (std::sqrt(hi) + epsilon_);
|
||||
for (const auto j : c10::irange(block_size)) {
|
||||
for (auto j = 0; j < block_size; ++j) {
|
||||
float gj = std::fma(weight_decay_ * freq, w[j], g[j]);
|
||||
w[j] = w[j] + gj * step;
|
||||
}
|
||||
|
@ -21,7 +21,7 @@ void adam_update(
|
||||
float correction,
|
||||
const float* lr,
|
||||
Context* /*context*/) {
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (auto i = 0; i < N; ++i) {
|
||||
float gi = g[i];
|
||||
float mi = nm[i] = m[i] * beta1 + gi * (1 - beta1);
|
||||
float vi = nv[i] = v[i] * beta2 + gi * gi * (1 - beta2);
|
||||
@ -45,7 +45,7 @@ void adam_compute(
|
||||
float correction,
|
||||
const float* lr,
|
||||
Context* /*context*/) {
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (auto i = 0; i < N; ++i) {
|
||||
float gi = g[i];
|
||||
float mi = nm[i] = m[i] * beta1 + gi * (1 - beta1);
|
||||
float vi = nv[i] = v[i] * beta2 + gi * gi * (1 - beta2);
|
||||
@ -74,7 +74,7 @@ void adam_compute_smart_decay(
|
||||
Context* /*context*/) {
|
||||
float k = (float)(t - lastSeenIn[0]);
|
||||
lastSeenOut[0] = t;
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (auto i = 0; i < N; ++i) {
|
||||
float gi = g[i];
|
||||
// The number of steps since this param was last seen.
|
||||
// We don't need integer precision for k. Float is fine and it's faster to convert here.
|
||||
@ -107,7 +107,7 @@ void adam_compute_output_grad(
|
||||
float correction,
|
||||
const float* lr,
|
||||
Context* /*context*/) {
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (auto i = 0; i < N; ++i) {
|
||||
float gi = g[i];
|
||||
float mi = nm[i] = m[i] * beta1 + gi * (1 - beta1);
|
||||
float vi = nv[i] = v[i] * beta2 + gi * gi * (1 - beta2);
|
||||
@ -135,7 +135,7 @@ void radam_update(
|
||||
float r_correction,
|
||||
const float* lr,
|
||||
Context* /*context*/) {
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (auto i = 0; i < N; ++i) {
|
||||
float gi = g[i];
|
||||
float mi = nm[i] = m[i] * beta1 + gi * (1 - beta1);
|
||||
float vi = nv[i] = v[i] * beta2 + gi * gi * (1 - beta2);
|
||||
@ -169,7 +169,7 @@ void radam_compute(
|
||||
float r_correction,
|
||||
const float* lr,
|
||||
Context* /*context*/) {
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (auto i = 0; i < N; ++i) {
|
||||
float gi = g[i];
|
||||
float mi = nm[i] = m[i] * beta1 + gi * (1 - beta1);
|
||||
float vi = nv[i] = v[i] * beta2 + gi * gi * (1 - beta2);
|
||||
@ -204,7 +204,7 @@ void radam_compute_output_grad(
|
||||
float r_correction,
|
||||
const float* lr,
|
||||
Context* /*context*/) {
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (auto i = 0; i < N; ++i) {
|
||||
float gi = g[i];
|
||||
float mi = nm[i] = m[i] * beta1 + gi * (1 - beta1);
|
||||
float vi = nv[i] = v[i] * beta2 + gi * gi * (1 - beta2);
|
||||
@ -350,7 +350,7 @@ class SparseAdamOp final : public Operator<Context> {
|
||||
auto* moment2Out = Output(OUTPUT_MOMENT_2)->template mutable_data<T>();
|
||||
|
||||
if (OutputSize() == 3) {
|
||||
for (const auto i : c10::irange(n)) {
|
||||
for (auto i = 0; i < n; ++i) {
|
||||
auto idx = indices[i];
|
||||
|
||||
if (block_size == 1) {
|
||||
@ -444,7 +444,7 @@ class SparseAdamOp final : public Operator<Context> {
|
||||
} else {
|
||||
Output(OUTPUT_GRAD)->ResizeLike(Input(GRAD));
|
||||
auto* gradOut = Output(OUTPUT_GRAD)->template mutable_data<T>();
|
||||
for (const auto i : c10::irange(n)) {
|
||||
for (auto i = 0; i < n; ++i) {
|
||||
auto idx = indices[i];
|
||||
|
||||
if (block_size == 1) {
|
||||
@ -593,7 +593,7 @@ class SmartDecaySparseAdamOp final : public Operator<Context> {
|
||||
auto* moment2Out = Output(OUTPUT_MOMENT_2)->template mutable_data<T>();
|
||||
int64_t* lastSeenOut = Output(OUTPUT_LAST_SEEN)->template mutable_data<int64_t>();
|
||||
|
||||
for (const auto i : c10::irange(n)) {
|
||||
for (auto i = 0; i < n; ++i) {
|
||||
auto idx = indices[i];
|
||||
auto offsetI = i * block_size;
|
||||
auto offsetIdx = idx * block_size;
|
||||
@ -673,7 +673,7 @@ class RowWiseSparseAdamOp final : public Operator<Context> {
|
||||
auto* moment2Out = Output(OUTPUT_MOMENT_2)->template mutable_data<T>();
|
||||
|
||||
if (OutputSize() == 3) {
|
||||
for (const auto i : c10::irange(n)) {
|
||||
for (auto i = 0; i < n; ++i) {
|
||||
auto idx = indices[i];
|
||||
|
||||
if (block_size == 1) {
|
||||
@ -719,13 +719,13 @@ class RowWiseSparseAdamOp final : public Operator<Context> {
|
||||
float* nm2 = moment2Out + idx;
|
||||
|
||||
float m2_sum = 0.;
|
||||
for (const auto j : c10::irange(block_size)) {
|
||||
for (auto j = 0; j < block_size; ++j) {
|
||||
float gj = g[j];
|
||||
m2_sum += gj * gj;
|
||||
}
|
||||
float vi = nm2[0] =
|
||||
m2[0] * beta2_ + (m2_sum / block_size) * (1 - beta2_);
|
||||
for (const auto j : c10::irange(block_size)) {
|
||||
for (auto j = 0; j < block_size; ++j) {
|
||||
float mi = nm1[j] = m1[j] * beta1_ + g[j] * (1 - beta1_);
|
||||
nw[j] = w[j] + lr[0] * correction * mi / (std::sqrt(vi) + epsilon_);
|
||||
}
|
||||
@ -734,7 +734,7 @@ class RowWiseSparseAdamOp final : public Operator<Context> {
|
||||
} else {
|
||||
Output(OUTPUT_GRAD)->ResizeLike(Input(GRAD));
|
||||
auto* gradOut = Output(OUTPUT_GRAD)->template mutable_data<T>();
|
||||
for (const auto i : c10::irange(n)) {
|
||||
for (auto i = 0; i < n; ++i) {
|
||||
auto idx = indices[i];
|
||||
|
||||
if (block_size == 1) {
|
||||
@ -781,13 +781,13 @@ class RowWiseSparseAdamOp final : public Operator<Context> {
|
||||
float* ng = gradOut + offsetI;
|
||||
|
||||
float m2_sum = 0.;
|
||||
for (const auto j : c10::irange(block_size)) {
|
||||
for (auto j = 0; j < block_size; ++j) {
|
||||
float gj = g[j];
|
||||
m2_sum += gj * gj;
|
||||
}
|
||||
float vi = nm2[0] =
|
||||
m2[0] * beta2_ + (m2_sum / block_size) * (1 - beta2_);
|
||||
for (const auto j : c10::irange(block_size)) {
|
||||
for (auto j = 0; j < block_size; ++j) {
|
||||
float mi = nm1[j] = m1[j] * beta1_ + g[j] * (1 - beta1_);
|
||||
float ngi = ng[j] = correction * mi / (std::sqrt(vi) + epsilon_);
|
||||
nw[j] = w[j] + lr[0] * ngi;
|
||||
|
@ -21,7 +21,7 @@ void lr_update(
|
||||
float x = 0;
|
||||
float y = 0, z = 0;
|
||||
const float kEps = 1e-12f;
|
||||
for (const auto i : c10::irange(n)) {
|
||||
for (auto i = 0; i < n; i++) {
|
||||
x += grad[i] * effgrad[i];
|
||||
if (normalized_lr_adaption) {
|
||||
y += grad[i] * grad[i];
|
||||
|
@ -5,7 +5,6 @@
|
||||
#include <cmath>
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/export_caffe2_op_to_c10.h"
|
||||
#include <c10/util/irange.h>
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/sgd/learning_rate_functors.h"
|
||||
|
||||
@ -163,7 +162,7 @@ class LearningRateOp final : public Operator<Context> {
|
||||
sub_policy_num_iters.size(),
|
||||
0,
|
||||
"Must specify at least one sub learning rate policy.");
|
||||
for (const auto i : c10::irange(sub_policy_num_iters.size())) {
|
||||
for (size_t i = 0; i < sub_policy_num_iters.size(); ++i) {
|
||||
CAFFE_ENFORCE_GT(
|
||||
sub_policy_num_iters[i],
|
||||
0,
|
||||
|
@ -17,7 +17,7 @@ void momentum_sgd_update(
|
||||
float* param,
|
||||
Context* /*context*/) {
|
||||
const float LR = lr[0];
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (auto i = 0; i < N; ++i) {
|
||||
if (!nesterov) {
|
||||
const float adjusted_gradient = LR * g[i] + momentum * m[i];
|
||||
nm[i] = adjusted_gradient;
|
||||
@ -154,7 +154,7 @@ class SparseMomentumSGDUpdateOp final : public Operator<Context> {
|
||||
auto* momentumOut = Output(OUTPUT_MOMENTUM)->template mutable_data<T>();
|
||||
auto* paramOut = Output(OUTPUT_PARAM)->template mutable_data<T>();
|
||||
|
||||
for (const auto i : c10::irange(n)) {
|
||||
for (auto i = 0; i < n; ++i) {
|
||||
auto idx = indices[i];
|
||||
auto offsetI = i * block_size;
|
||||
auto offsetIdx = idx * block_size;
|
||||
|
@ -217,8 +217,8 @@ class RowWiseSparseAdagradFusedWithSparseLengthsSumGradientOp final
|
||||
auto* grad_buffer_data =
|
||||
is_mean ? grad_buffer_.template mutable_data<T>() : NULL;
|
||||
if (is_mean) {
|
||||
for (const auto rangeIndex : c10::irange(numSegments)) {
|
||||
for (const auto tmpIndex : c10::irange(block_size)) {
|
||||
for (auto rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
|
||||
for (auto tmpIndex = 0; tmpIndex < block_size; ++tmpIndex) {
|
||||
auto offsetI = rangeIndex * block_size;
|
||||
grad_buffer_data[offsetI + tmpIndex] = lengths[rangeIndex] > 0
|
||||
? gradIn[offsetI + tmpIndex] / lengths[rangeIndex]
|
||||
@ -269,7 +269,7 @@ class RowWiseSparseAdagradFusedWithSparseLengthsSumGradientOp final
|
||||
T counter_halflife,
|
||||
rowWiseAdagradT& kernel) {
|
||||
int dataIndex = 0;
|
||||
for (const auto rangeIndex : c10::irange(numSegments)) {
|
||||
for (auto rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
|
||||
auto offsetI = rangeIndex * block_size;
|
||||
const float* g = gradIn + offsetI;
|
||||
|
||||
@ -557,7 +557,7 @@ class RowWiseSparseAdagradFusedWithSparseLengthsWeightedSumGradientOp final
|
||||
// ignores this dependency and fuses these two loops.
|
||||
std::vector<T> temp_grad(block_size);
|
||||
int dataIndex = 0;
|
||||
for (const auto rangeIndex : c10::irange(numSegments)) {
|
||||
for (auto rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
|
||||
for (auto start = dataIndex; dataIndex < start + lengths[rangeIndex];
|
||||
++dataIndex) {
|
||||
std::size_t idx = indices[dataIndex];
|
||||
@ -591,7 +591,7 @@ class RowWiseSparseAdagradFusedWithSparseLengthsWeightedSumGradientOp final
|
||||
CAFFE_ENFORCE_EQ(dataIndex, n);
|
||||
|
||||
dataIndex = 0;
|
||||
for (const auto rangeIndex : c10::irange(numSegments)) {
|
||||
for (auto rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
|
||||
auto offsetI = rangeIndex * block_size;
|
||||
const float* g = gradIn + offsetI;
|
||||
|
||||
@ -606,7 +606,7 @@ class RowWiseSparseAdagradFusedWithSparseLengthsWeightedSumGradientOp final
|
||||
auto offsetIdx = idx * block_size;
|
||||
auto localOffset = dataIndex - start;
|
||||
|
||||
for (const auto i : c10::irange(block_size)) {
|
||||
for (int i = 0; i < block_size; ++i) {
|
||||
temp_grad[i] = auxParamIn[localOffset] * g[i];
|
||||
}
|
||||
|
||||
@ -839,7 +839,7 @@ class RowWiseSparseAdagradFusedWithSparseLengthsWeightedSumGradientApproxOp
|
||||
|
||||
std::vector<T> temp_grad(block_size);
|
||||
int dataIndex = 0;
|
||||
for (const auto rangeIndex : c10::irange(numSegments)) {
|
||||
for (auto rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
|
||||
auto offsetI = rangeIndex * block_size;
|
||||
const float* g = gradIn + offsetI;
|
||||
|
||||
@ -902,7 +902,7 @@ class RowWiseSparseAdagradFusedWithSparseLengthsWeightedSumGradientApproxOp
|
||||
|
||||
alignas(64) float temp[VLEN];
|
||||
_mm256_store_ps(temp, acc_v);
|
||||
for (const auto j : c10::irange(VLEN)) {
|
||||
for (int j = 0; j < VLEN; ++j) {
|
||||
acc += temp[j];
|
||||
}
|
||||
#endif
|
||||
|
@ -40,7 +40,7 @@ class RowWiseCounterOp final : public Operator<CPUContext> {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (const auto i : c10::irange(n)) {
|
||||
for (auto i = 0; i < n; ++i) {
|
||||
const std::size_t idx = indices[i];
|
||||
CAFFE_ENFORCE_GE(
|
||||
Input(COUNTER).numel(),
|
||||
|
@ -19,7 +19,7 @@ void storm_update(
|
||||
const float beta,
|
||||
Context* /*context*/) {
|
||||
float gradSqSumTmp = 0.0;
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (auto i = 0; i < N; ++i) {
|
||||
const float gi = gradIn[i];
|
||||
gradSqSumTmp += gi * gi;
|
||||
}
|
||||
@ -27,7 +27,7 @@ void storm_update(
|
||||
|
||||
const float nlr = lr[0] * std::pow(beta + gradSqSumOut[0], -1.0 / 3.0);
|
||||
const float alpha = momentum * nlr * nlr;
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (auto i = 0; i < N; ++i) {
|
||||
const float gi = gradIn[i];
|
||||
const float mi = momentIn[i];
|
||||
float new_mi = momentOut[i] = gi + (1.0 - alpha) * (mi - gi);
|
||||
@ -120,7 +120,7 @@ class SparseStormOp final : public Operator<Context> {
|
||||
}
|
||||
|
||||
float gradSqSumTmp = 0.0;
|
||||
for (const auto i : c10::irange(Input(GRAD).numel())) {
|
||||
for (auto i = 0; i < Input(GRAD).numel(); ++i) {
|
||||
const float gi = gradIn[i];
|
||||
gradSqSumTmp += gi * gi;
|
||||
}
|
||||
@ -130,7 +130,7 @@ class SparseStormOp final : public Operator<Context> {
|
||||
const float alpha = momentum_ * nlr * nlr;
|
||||
const auto block_size = Input(GRAD).numel() / n;
|
||||
|
||||
for (const auto i : c10::irange(n)) {
|
||||
for (auto i = 0; i < n; ++i) {
|
||||
auto idx = indices[i];
|
||||
if (block_size == 1) {
|
||||
const float gi = gradIn[i];
|
||||
@ -162,7 +162,7 @@ class SparseStormOp final : public Operator<Context> {
|
||||
i);
|
||||
#endif
|
||||
|
||||
for (const auto j : c10::irange(block_size)) {
|
||||
for (auto j = 0; j < block_size; ++j) {
|
||||
const float gi = gradIn[offsetI + j];
|
||||
const float mi = momentIn[offsetIdx + j];
|
||||
float new_mi = momentOut[offsetIdx + j] =
|
||||
|
@ -15,12 +15,12 @@ void wngrad_update(
|
||||
float epsilon,
|
||||
const float* lr,
|
||||
Context* /*context*/) {
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (auto i = 0; i < N; ++i) {
|
||||
float gi = g[i];
|
||||
nw[i] = w[i] + lr[0] * gi / (h[0] + epsilon);
|
||||
}
|
||||
float nhTmp = 0.0;
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (auto i = 0; i < N; ++i) {
|
||||
float gi = g[i];
|
||||
nhTmp += gi * gi;
|
||||
}
|
||||
@ -42,13 +42,13 @@ void wngrad_update_output_effective_lr(
|
||||
Context* /*context*/) {
|
||||
effectiveLROut[0] = lr[0] / (seqBIn[0] + epsilon);
|
||||
float seqBTmp = 0.0;
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (auto i = 0; i < N; ++i) {
|
||||
float gi = gradIn[i];
|
||||
seqBTmp += gi * gi;
|
||||
}
|
||||
seqBTmp /= (seqBIn[0] + epsilon);
|
||||
seqBOut[0] = seqBIn[0] + seqBTmp;
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (auto i = 0; i < N; ++i) {
|
||||
float grad = gradIn[i];
|
||||
paramOut[i] = paramIn[i] + effectiveLROut[0] * grad;
|
||||
}
|
||||
@ -69,14 +69,14 @@ void wngrad_update_output_effective_lr_and_update(
|
||||
Context* /*context*/) {
|
||||
effectiveLROut[0] = lr[0] / (seqBIn[0] + epsilon);
|
||||
float seqBTmp = 0.0;
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (auto i = 0; i < N; ++i) {
|
||||
float gi = gradIn[i];
|
||||
seqBTmp += gi * gi;
|
||||
}
|
||||
seqBTmp /= (seqBIn[0] + epsilon);
|
||||
seqBOut[0] = seqBIn[0] + seqBTmp;
|
||||
|
||||
for (const auto i : c10::irange(N)) {
|
||||
for (auto i = 0; i < N; ++i) {
|
||||
float grad = gradIn[i];
|
||||
float update = updateOut[i] = effectiveLROut[0] * grad;
|
||||
paramOut[i] = paramIn[i] + update;
|
||||
@ -193,7 +193,7 @@ class SparseWngradOp final : public Operator<Context> {
|
||||
|
||||
auto block_size = Input(GRAD).numel() / n;
|
||||
|
||||
for (const auto i : c10::irange(n)) {
|
||||
for (auto i = 0; i < n; ++i) {
|
||||
auto idx = indices[i];
|
||||
if (block_size == 1) {
|
||||
float gi = gradIn[i];
|
||||
@ -222,7 +222,7 @@ class SparseWngradOp final : public Operator<Context> {
|
||||
" for input i:",
|
||||
i);
|
||||
#endif
|
||||
for (const auto j : c10::irange(block_size)) {
|
||||
for (auto j = 0; j < block_size; ++j) {
|
||||
float gi = gradIn[offsetI + j];
|
||||
paramOut[offsetIdx + j] =
|
||||
paramIn[offsetIdx + j] + lr[0] * gi / (seqBIn[0] + epsilon_);
|
||||
@ -230,7 +230,7 @@ class SparseWngradOp final : public Operator<Context> {
|
||||
}
|
||||
}
|
||||
float seqBTmp = 0.0;
|
||||
for (const auto i : c10::irange(Input(GRAD).numel())) {
|
||||
for (auto i = 0; i < Input(GRAD).numel(); ++i) {
|
||||
float gi = gradIn[i];
|
||||
seqBTmp += gi * gi;
|
||||
}
|
||||
|
@ -133,7 +133,7 @@ CAFFE_ENFORCE_EQ(param_tensor.dim(), moment_tensor.dim());
|
||||
CAFFE_ENFORCE_EQ(param_tensor.dim(), g_avg_tensor.dim());
|
||||
CAFFE_ENFORCE_EQ(param_tensor.dim(), g2_avg_tensor.dim());
|
||||
CAFFE_ENFORCE_EQ(param_tensor.dim(), grad_tensor.dim());
|
||||
for (const auto i : c10::irange(param_tensor.dim())) {
|
||||
for (int i = 0; i < param_tensor.dim(); ++i) {
|
||||
CAFFE_ENFORCE_EQ(param_tensor.dim32(i), moment_tensor.dim32(i));
|
||||
CAFFE_ENFORCE_EQ(param_tensor.dim32(i), g_avg_tensor.dim32(i));
|
||||
CAFFE_ENFORCE_EQ(param_tensor.dim32(i), g2_avg_tensor.dim32(i));
|
||||
|
@ -28,7 +28,7 @@ class TORCH_API PatternNetTransform : public Transform {
|
||||
"External outputs do not match!");
|
||||
ordered_ops_ = GetPatternTraversalOrder(p_);
|
||||
inverse_ops_.resize(ordered_ops_.size());
|
||||
for (const auto i : c10::irange(ordered_ops_.size())) {
|
||||
for (size_t i = 0; i < ordered_ops_.size(); i++) {
|
||||
inverse_ops_[ordered_ops_[i]] = i;
|
||||
}
|
||||
}
|
||||
|
@ -9,7 +9,6 @@
|
||||
|
||||
#include <c10/util/Logging.h>
|
||||
#include <c10/util/string_view.h>
|
||||
#include <c10/util/irange.h>
|
||||
|
||||
#include "caffe2/utils/proto_wrap.h"
|
||||
#include "caffe2/proto/caffe2_pb.h"
|
||||
|
@ -4,7 +4,6 @@
|
||||
#include <condition_variable>
|
||||
#include <thread>
|
||||
#include "c10/util/thread_name.h"
|
||||
#include <c10/util/irange.h>
|
||||
#include "caffe2/core/common.h"
|
||||
#include "caffe2/core/logging.h"
|
||||
|
||||
@ -340,7 +339,7 @@ class WorkersPool {
|
||||
CreateWorkers(workers_count);
|
||||
DCHECK_LE(workers_count, (int)workers_.size());
|
||||
counter_to_decrement_when_ready_.Reset(workers_count);
|
||||
for (const auto task : c10::irange(1, tasks.size())) {
|
||||
for (size_t task = 1; task < tasks.size(); ++task) {
|
||||
workers_[task - 1]->StartWork(tasks[task].get());
|
||||
}
|
||||
// Execute the remaining workload immediately on the current thread.
|
||||
|
@ -8,7 +8,6 @@
|
||||
#include <string>
|
||||
|
||||
#include <c10/core/thread_pool.h>
|
||||
#include <c10/util/irange.h>
|
||||
#include <caffe2/core/db.h>
|
||||
#include <caffe2/core/logging.h>
|
||||
#include <caffe2/operators/prefetch_op.h>
|
||||
@ -226,7 +225,7 @@ void VideoInputOp<Context>::CheckParamsAndPrint() {
|
||||
if (random_sampling_rate_) {
|
||||
LOG(INFO) << "random sampling with max:" << random_sampling_rate_;
|
||||
}
|
||||
for (const auto i : c10::irange(channels_rgb_)) {
|
||||
for (int i = 0; i < channels_rgb_; i++) {
|
||||
LOG(INFO) << " RGB " << i << "-th channel mean: " << mean_rgb_[i]
|
||||
<< " std: " << 1.f / inv_std_rgb_[i];
|
||||
}
|
||||
@ -238,7 +237,7 @@ void VideoInputOp<Context>::CheckParamsAndPrint() {
|
||||
<< "and a sampling rate of 1:" << sampling_rate_of_
|
||||
<< " flow_data_type_: " << flow_data_type_
|
||||
<< " flow_alg_type_: " << flow_alg_type_;
|
||||
for (const auto i : c10::irange(channels_of_)) {
|
||||
for (int i = 0; i < channels_of_; i++) {
|
||||
LOG(INFO) << " Optical flow" << i
|
||||
<< "-th channel mean: " << mean_of_[i]
|
||||
<< " std: " << 1.f / inv_std_of_[i];
|
||||
@ -258,7 +257,7 @@ void VideoInputOp<Context>::CheckParamsAndPrint() {
|
||||
if (video_res_type_ == VideoResType::USE_SHORT_EDGE) {
|
||||
if (jitter_scales_.size() > 0) {
|
||||
LOG(INFO) << "Using scale jittering:";
|
||||
for (const auto idx : c10::irange(jitter_scales_.size())) {
|
||||
for (int idx = 0; idx < jitter_scales_.size(); idx++) {
|
||||
LOG(INFO) << "scale " << idx << ": " << jitter_scales_[idx];
|
||||
}
|
||||
} else {
|
||||
@ -391,7 +390,7 @@ VideoInputOp<Context>::VideoInputOp(
|
||||
}
|
||||
|
||||
channels_rgb_ = 3;
|
||||
for (const auto i : c10::irange(4, 7)) {
|
||||
for (int i = 4; i < 7; i++) {
|
||||
mean_rgb_.push_back(InputDataMean[i]);
|
||||
inv_std_rgb_.push_back(1.f / InputDataStd[i]);
|
||||
}
|
||||
@ -404,7 +403,7 @@ VideoInputOp<Context>::VideoInputOp(
|
||||
get_optical_flow_ = false;
|
||||
get_rgb_ = true;
|
||||
sampling_rate_rgb_ = 1;
|
||||
for (const auto i : c10::irange(4, 7)) {
|
||||
for (int i = 4; i < 7; i++) {
|
||||
mean_rgb_.push_back(InputDataMean[i]);
|
||||
inv_std_rgb_.push_back(1.f / InputDataStd[i]);
|
||||
}
|
||||
@ -421,7 +420,7 @@ VideoInputOp<Context>::VideoInputOp(
|
||||
switch (flow_data_type_) {
|
||||
case FlowDataType::Flow2C:
|
||||
channels_of_ = 2;
|
||||
for (const auto i : c10::irange(channels_of_)) {
|
||||
for (int i = 0; i < channels_of_; i++) {
|
||||
mean_of_.push_back(InputDataMean[i]);
|
||||
inv_std_of_.push_back(1.f / InputDataStd[i]);
|
||||
}
|
||||
@ -429,7 +428,7 @@ VideoInputOp<Context>::VideoInputOp(
|
||||
|
||||
case FlowDataType::Flow3C:
|
||||
channels_of_ = 3;
|
||||
for (const auto i : c10::irange(channels_of_)) {
|
||||
for (int i = 0; i < channels_of_; i++) {
|
||||
mean_of_.push_back(InputDataMean[i]);
|
||||
inv_std_of_.push_back(1.f / InputDataStd[i]);
|
||||
}
|
||||
@ -438,7 +437,7 @@ VideoInputOp<Context>::VideoInputOp(
|
||||
// early fusion with gray
|
||||
case FlowDataType::FlowWithGray:
|
||||
channels_of_ = 3;
|
||||
for (const auto i : c10::irange(2)) {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
mean_of_.push_back(InputDataMean[i]);
|
||||
inv_std_of_.push_back(1.f / InputDataStd[i]);
|
||||
}
|
||||
@ -449,11 +448,11 @@ VideoInputOp<Context>::VideoInputOp(
|
||||
// early fusion with RGB
|
||||
case FlowDataType::FlowWithRGB:
|
||||
channels_of_ = 5;
|
||||
for (const auto i : c10::irange(2)) {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
mean_of_.push_back(InputDataMean[i]);
|
||||
inv_std_of_.push_back(1.f / InputDataStd[i]);
|
||||
}
|
||||
for (const auto i : c10::irange(4, 7)) {
|
||||
for (int i = 4; i < 7; i++) {
|
||||
mean_of_.push_back(InputDataMean[i]);
|
||||
inv_std_of_.push_back(1.f / InputDataStd[i]);
|
||||
}
|
||||
@ -528,15 +527,15 @@ void VideoInputOp<Context>::GetLabelsFromProto(
|
||||
int* label_data) {
|
||||
int num_clips = clip_per_video_ * crop_per_clip_;
|
||||
if (!do_multi_label_) {
|
||||
for (const auto i : c10::irange(num_clips)) {
|
||||
for (int i = 0; i < num_clips; i++) {
|
||||
label_data[i] = label_proto.int32_data(0);
|
||||
}
|
||||
} else {
|
||||
// For multiple label case, output label is a binary vector
|
||||
// where presented concepts are marked 1
|
||||
memset(label_data, 0, sizeof(int) * num_of_class_ * num_clips);
|
||||
for (const auto i : c10::irange(num_clips)) {
|
||||
for (const auto j : c10::irange(label_proto.int32_data_size())) {
|
||||
for (int i = 0; i < num_clips; i++) {
|
||||
for (int j = 0; j < label_proto.int32_data_size(); j++) {
|
||||
CAFFE_ENFORCE_LT(
|
||||
label_proto.int32_data(j),
|
||||
num_of_class_,
|
||||
@ -660,7 +659,7 @@ bool VideoInputOp<Context>::GetClipsAndLabelsFromDBValue(
|
||||
const TensorProto& start_frm_proto = protos.protos(curr_proto_idx++);
|
||||
start_frm = start_frm_proto.int32_data(0);
|
||||
if (get_start_frame_) {
|
||||
for (const auto i : c10::irange(num_clips)) {
|
||||
for (int i = 0; i < num_clips; i++) {
|
||||
start_frame_data[i] = start_frm;
|
||||
}
|
||||
}
|
||||
@ -670,7 +669,7 @@ bool VideoInputOp<Context>::GetClipsAndLabelsFromDBValue(
|
||||
CAFFE_ENFORCE_GE(
|
||||
protos.protos_size(), curr_proto_idx + 1, "Video Id not provided");
|
||||
const TensorProto& video_id_proto = protos.protos(curr_proto_idx);
|
||||
for (const auto i : c10::irange(num_clips)) {
|
||||
for (int i = 0; i < num_clips; i++) {
|
||||
video_id_data[i] = video_id_proto.int64_data(0);
|
||||
}
|
||||
}
|
||||
@ -775,7 +774,7 @@ void VideoInputOp<Context>::DecodeAndTransform(
|
||||
int clip_offset_of = channels_of_ * length_of_ * crop_size_ * crop_size_;
|
||||
for (int i = 0; i < std::min(clip_per_video_, int(buffer_rgb.size()));
|
||||
i++) {
|
||||
for (const auto j : c10::irange(crop_per_clip_)) {
|
||||
for (int j = 0; j < crop_per_clip_; j++) {
|
||||
// get the rectangle for cropping
|
||||
int h_off = 0;
|
||||
int w_off = 0;
|
||||
@ -858,7 +857,7 @@ void VideoInputOp<Context>::DecodeAndTransform(
|
||||
}
|
||||
}
|
||||
if (buffer_rgb.size() > 0) {
|
||||
for (const auto i : c10::irange(buffer_rgb.size())) {
|
||||
for (int i = 0; i < buffer_rgb.size(); i++) {
|
||||
unsigned char* buff = buffer_rgb[i];
|
||||
delete[] buff;
|
||||
}
|
||||
@ -887,12 +886,12 @@ bool VideoInputOp<Context>::Prefetch() {
|
||||
// Prefetching handled with a thread pool of "decode_threads" threads.
|
||||
std::mt19937 meta_randgen(time(nullptr));
|
||||
std::vector<std::mt19937> randgen_per_thread;
|
||||
for (const auto i : c10::irange(num_decode_threads_)) {
|
||||
for (int i = 0; i < num_decode_threads_; ++i) {
|
||||
randgen_per_thread.emplace_back(meta_randgen());
|
||||
}
|
||||
|
||||
std::bernoulli_distribution mirror_this_clip(0.5);
|
||||
for (const auto item_id : c10::irange(batch_size_)) {
|
||||
for (int item_id = 0; item_id < batch_size_; ++item_id) {
|
||||
std::mt19937* randgen =
|
||||
&randgen_per_thread[item_id % num_decode_threads_];
|
||||
|
||||
|
@ -5,7 +5,6 @@
|
||||
#include <test/cpp/api/support.h>
|
||||
|
||||
#include <c10/util/ArrayRef.h>
|
||||
#include <c10/util/irange.h>
|
||||
#include <c10/util/tempfile.h>
|
||||
|
||||
#include <algorithm>
|
||||
@ -174,7 +173,7 @@ TEST(DataTest, InfiniteStreamDataset) {
|
||||
for (auto& batch : *data_loader) {
|
||||
ASSERT_LT(batch_index, 3);
|
||||
ASSERT_EQ(batch.size(), kBatchSize);
|
||||
for (const auto j : c10::irange(kBatchSize)) {
|
||||
for (size_t j = 0; j < kBatchSize; ++j) {
|
||||
ASSERT_EQ(batch.at(j), 1 + (batch_index * kBatchSize) + j);
|
||||
}
|
||||
batch_index += 1;
|
||||
@ -838,7 +837,7 @@ TEST(DataTest, CanUseCustomTypeAsIndexType) {
|
||||
|
||||
size_t i = 0;
|
||||
for (auto batch : *data_loader) {
|
||||
for (const auto j : c10::irange(kBatchSize)) {
|
||||
for (int j = 0; j < kBatchSize; ++j) {
|
||||
ASSERT_EQ(batch.at(j), 10 + j);
|
||||
}
|
||||
i += 1;
|
||||
@ -858,7 +857,7 @@ TEST(DataTest, DistributedRandomSamplerSingleReplicaProduceCorrectSamples) {
|
||||
ASSERT_EQ(res.size(), sample_count);
|
||||
|
||||
std::sort(res.begin(), res.end());
|
||||
for (const auto i : c10::irange(res.size())) {
|
||||
for (size_t i = 0; i < res.size(); ++i) {
|
||||
ASSERT_EQ(res[i], i);
|
||||
}
|
||||
}
|
||||
@ -873,14 +872,14 @@ TEST(DataTest, DistributedRandomSamplerMultiReplicaProduceCorrectSamples) {
|
||||
size_t batch_size) {
|
||||
std::vector<std::unique_ptr<samplers::DistributedRandomSampler>> samplers;
|
||||
|
||||
for (const auto i : c10::irange(num_replicas)) {
|
||||
for (size_t i = 0; i < num_replicas; ++i) {
|
||||
samplers.emplace_back(
|
||||
torch::make_unique<samplers::DistributedRandomSampler>(
|
||||
sample_count, num_replicas, i, allow_duplicates));
|
||||
}
|
||||
|
||||
std::vector<size_t> res;
|
||||
for (const auto i : c10::irange(num_replicas)) {
|
||||
for (size_t i = 0; i < num_replicas; ++i) {
|
||||
(*samplers[i]).reset();
|
||||
torch::optional<std::vector<size_t>> idx;
|
||||
while ((idx = (*samplers[i]).next(batch_size)).has_value()) {
|
||||
@ -954,7 +953,7 @@ TEST(DataTest, DistributedSequentialSamplerSingleReplicaProduceCorrectSamples) {
|
||||
ASSERT_EQ(res.size(), sample_count);
|
||||
|
||||
std::sort(res.begin(), res.end());
|
||||
for (const auto i : c10::irange(res.size())) {
|
||||
for (size_t i = 0; i < res.size(); ++i) {
|
||||
ASSERT_EQ(res[i], i);
|
||||
}
|
||||
}
|
||||
@ -970,14 +969,14 @@ TEST(DataTest, DistributedSequentialSamplerMultiReplicaProduceCorrectSamples) {
|
||||
std::vector<std::unique_ptr<samplers::DistributedSequentialSampler>>
|
||||
samplers;
|
||||
|
||||
for (const auto i : c10::irange(num_replicas)) {
|
||||
for (size_t i = 0; i < num_replicas; ++i) {
|
||||
samplers.emplace_back(
|
||||
torch::make_unique<samplers::DistributedSequentialSampler>(
|
||||
sample_count, num_replicas, i, allow_duplicates));
|
||||
}
|
||||
|
||||
std::vector<size_t> res;
|
||||
for (const auto i : c10::irange(num_replicas)) {
|
||||
for (size_t i = 0; i < num_replicas; ++i) {
|
||||
(*samplers[i]).reset();
|
||||
torch::optional<std::vector<size_t>> idx;
|
||||
while ((idx = (*samplers[i]).next(batch_size)).has_value()) {
|
||||
@ -1491,7 +1490,7 @@ TEST(DataLoaderTest, StatefulDatasetWithNoWorkers) {
|
||||
|
||||
auto data_loader = torch::data::make_data_loader(D{});
|
||||
|
||||
for (const auto i : c10::irange(10)) {
|
||||
for (size_t i = 0; i < 10; ++i) {
|
||||
const auto number_of_iterations =
|
||||
std::distance(data_loader->begin(), data_loader->end());
|
||||
ASSERT_EQ(
|
||||
@ -1532,7 +1531,7 @@ TEST(DataLoaderTest, StatefulDatasetWithManyWorkers) {
|
||||
torch::data::datasets::make_shared_dataset<D>(),
|
||||
DataLoaderOptions().workers(kNumberOfWorkers));
|
||||
|
||||
for (const auto i : c10::irange(10)) {
|
||||
for (size_t i = 0; i < 10; ++i) {
|
||||
const auto number_of_iterations =
|
||||
std::distance(data_loader->begin(), data_loader->end());
|
||||
ASSERT_EQ(
|
||||
@ -1575,7 +1574,7 @@ TEST(DataLoaderTest, StatefulDatasetWithMap) {
|
||||
})),
|
||||
DataLoaderOptions{});
|
||||
|
||||
for (const auto i : c10::irange(10)) {
|
||||
for (size_t i = 0; i < 10; ++i) {
|
||||
const auto number_of_iterations =
|
||||
std::distance(data_loader->begin(), data_loader->end());
|
||||
ASSERT_EQ(
|
||||
@ -1676,8 +1675,7 @@ TEST(DataLoaderTest, ChunkDataSetGetBatch) {
|
||||
dataset,
|
||||
DataLoaderOptions(batch_size).workers(dataloader_worker_count));
|
||||
|
||||
for (const auto epoch_index : c10::irange(epoch_count)) {
|
||||
(void)epoch_index; // Suppress unused variable warning
|
||||
for (int epoch_index = 0; epoch_index < epoch_count; ++epoch_index) {
|
||||
std::vector<bool> result(total_example_count, false);
|
||||
int iteration_count = 0;
|
||||
for (auto iterator = data_loader->begin();
|
||||
@ -1689,11 +1687,11 @@ TEST(DataLoaderTest, ChunkDataSetGetBatch) {
|
||||
// When prefetch_count is equal to 1 and no worker thread, the batch
|
||||
// order is deterministic. So we can verify elements in each batch.
|
||||
if (prefetch_count == 1 && dataloader_worker_count == 0) {
|
||||
for (const auto j : c10::irange(batch_size)) {
|
||||
for (size_t j = 0; j < batch_size; ++j) {
|
||||
ASSERT_EQ(batch[j], iteration_count * batch_size + j);
|
||||
}
|
||||
}
|
||||
for (const auto j : c10::irange(batch_size)) {
|
||||
for (size_t j = 0; j < batch_size; ++j) {
|
||||
result[batch[j]] = true;
|
||||
}
|
||||
}
|
||||
@ -1980,8 +1978,7 @@ TEST(DataLoaderTest, ChunkDatasetSave) {
|
||||
dataset,
|
||||
DataLoaderOptions(batch_size).workers(dataloader_worker_count));
|
||||
|
||||
for (const auto epoch_index : c10::irange(epoch_count)) {
|
||||
(void)epoch_index; // Suppress unused variable warning
|
||||
for (int epoch_index = 0; epoch_index < epoch_count; ++epoch_index) {
|
||||
int iteration_count = 0;
|
||||
for (auto iterator = data_loader->begin(); iterator != data_loader->end();
|
||||
++iterator, ++iteration_count) {
|
||||
@ -2082,7 +2079,7 @@ TEST(DataLoaderTest, ChunkDatasetLoad) {
|
||||
auto data_loader = torch::data::make_data_loader(
|
||||
dataset, DataLoaderOptions(batch_size).workers(dataloader_worker_count));
|
||||
|
||||
for (const auto epoch_index : c10::irange(epoch_count)) {
|
||||
for (int epoch_index = 0; epoch_index < epoch_count; ++epoch_index) {
|
||||
int iteration_count = 0;
|
||||
|
||||
// For the first epoch, the returned batch should be returned from the
|
||||
@ -2131,7 +2128,7 @@ TEST(DataLoaderTest, ChunkDatasetCrossChunkShuffle) {
|
||||
size_t index = 0;
|
||||
|
||||
// Repeatly sample every 5 indices.
|
||||
for (const auto i : c10::irange(batch_size)) {
|
||||
for (size_t i = 0; i < batch_size; ++i) {
|
||||
for (size_t j = 0; j < size_ / batch_size; ++j) {
|
||||
indices_[index++] = i + batch_size * j;
|
||||
}
|
||||
@ -2225,11 +2222,11 @@ TEST(DataLoaderTest, ChunkDatasetCrossChunkShuffle) {
|
||||
// construct expected result
|
||||
int offset = 0;
|
||||
|
||||
for (const auto i : c10::irange((chunk_count + cross_chunk_shuffle_count - 1) /
|
||||
cross_chunk_shuffle_count)) {
|
||||
for (const auto j : c10::irange(chunk_size)) {
|
||||
(void)j; // Suppress unused variable warning
|
||||
for (const auto k : c10::irange(cross_chunk_shuffle_count)) {
|
||||
for (int i = 0; i < (chunk_count + cross_chunk_shuffle_count - 1) /
|
||||
cross_chunk_shuffle_count;
|
||||
i++) {
|
||||
for (int j = 0; j < chunk_size; ++j) {
|
||||
for (int k = 0; k < cross_chunk_shuffle_count; ++k) {
|
||||
if (i * cross_chunk_shuffle_count + k < chunk_count) {
|
||||
expected_result.push_back(i * cross_chunk_shuffle_count + k);
|
||||
}
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
#include <torch/torch.h>
|
||||
#include <ATen/native/Pow.h>
|
||||
#include <c10/util/irange.h>
|
||||
#include <torch/types.h>
|
||||
#include <torch/utils.h>
|
||||
#include <test/cpp/api/support.h>
|
||||
@ -25,7 +24,7 @@ TEST_F(DispatchTest, TestAVX2) {
|
||||
setenv("ATEN_CPU_CAPABILITY", "avx2", 1);
|
||||
#endif
|
||||
const auto actual_pow_avx2 = vals_tensor.pow(pows_tensor);
|
||||
for (const auto i : c10::irange(4)) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
ASSERT_EQ(result[i], actual_pow_avx2[i].item<int>());
|
||||
}
|
||||
}
|
||||
@ -41,7 +40,7 @@ TEST_F(DispatchTest, TestAVX512) {
|
||||
setenv("ATEN_CPU_CAPABILITY", "avx512", 1);
|
||||
#endif
|
||||
const auto actual_pow_avx512 = vals_tensor.pow(pows_tensor);
|
||||
for (const auto i : c10::irange(4)) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
ASSERT_EQ(result[i], actual_pow_avx512[i].item<int>());
|
||||
}
|
||||
}
|
||||
@ -57,7 +56,7 @@ TEST_F(DispatchTest, TestDefault) {
|
||||
setenv("ATEN_CPU_CAPABILITY", "default", 1);
|
||||
#endif
|
||||
const auto actual_pow_default = vals_tensor.pow(pows_tensor);
|
||||
for (const auto i : c10::irange(4)) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
ASSERT_EQ(result[i], actual_pow_default[i].item<int>());
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,5 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <c10/util/irange.h>
|
||||
#include <torch/torch.h>
|
||||
|
||||
#include <test/cpp/api/support.h>
|
||||
@ -14,7 +13,7 @@ struct ExpandingArrayTest : torch::test::SeedingFixture {};
|
||||
TEST_F(ExpandingArrayTest, CanConstructFromInitializerList) {
|
||||
torch::ExpandingArray<5> e({1, 2, 3, 4, 5});
|
||||
ASSERT_EQ(e.size(), 5);
|
||||
for (const auto i : c10::irange(e.size())) {
|
||||
for (size_t i = 0; i < e.size(); ++i) {
|
||||
ASSERT_EQ((*e)[i], i + 1);
|
||||
}
|
||||
}
|
||||
@ -22,7 +21,7 @@ TEST_F(ExpandingArrayTest, CanConstructFromInitializerList) {
|
||||
TEST_F(ExpandingArrayTest, CanConstructFromVector) {
|
||||
torch::ExpandingArray<5> e(std::vector<int64_t>{1, 2, 3, 4, 5});
|
||||
ASSERT_EQ(e.size(), 5);
|
||||
for (const auto i : c10::irange(e.size())) {
|
||||
for (size_t i = 0; i < e.size(); ++i) {
|
||||
ASSERT_EQ((*e)[i], i + 1);
|
||||
}
|
||||
}
|
||||
@ -30,7 +29,7 @@ TEST_F(ExpandingArrayTest, CanConstructFromVector) {
|
||||
TEST_F(ExpandingArrayTest, CanConstructFromArray) {
|
||||
torch::ExpandingArray<5> e(std::array<int64_t, 5>({1, 2, 3, 4, 5}));
|
||||
ASSERT_EQ(e.size(), 5);
|
||||
for (const auto i : c10::irange(e.size())) {
|
||||
for (size_t i = 0; i < e.size(); ++i) {
|
||||
ASSERT_EQ((*e)[i], i + 1);
|
||||
}
|
||||
}
|
||||
@ -38,7 +37,7 @@ TEST_F(ExpandingArrayTest, CanConstructFromArray) {
|
||||
TEST_F(ExpandingArrayTest, CanConstructFromSingleValue) {
|
||||
torch::ExpandingArray<5> e(5);
|
||||
ASSERT_EQ(e.size(), 5);
|
||||
for (const auto i : c10::irange(e.size())) {
|
||||
for (size_t i = 0; i < e.size(); ++i) {
|
||||
ASSERT_EQ((*e)[i], 5);
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,5 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <c10/util/irange.h>
|
||||
#include <torch/torch.h>
|
||||
#include <test/cpp/api/support.h>
|
||||
|
||||
@ -15,15 +14,15 @@ torch::Tensor naive_dft(torch::Tensor x, bool forward=true) {
|
||||
// Roots of unity, exp(-2*pi*j*n/N) for n in [0, N), reversed for inverse transform
|
||||
std::vector<c10::complex<double>> roots(len);
|
||||
const auto angle_base = (forward ? -2.0 : 2.0) * M_PI / len;
|
||||
for (const auto i : c10::irange(len)) {
|
||||
for (int64_t i = 0; i < len; ++i) {
|
||||
auto angle = i * angle_base;
|
||||
roots[i] = c10::complex<double>(std::cos(angle), std::sin(angle));
|
||||
}
|
||||
|
||||
const auto in = x.data_ptr<c10::complex<double>>();
|
||||
const auto out = out_tensor.data_ptr<c10::complex<double>>();
|
||||
for (const auto i : c10::irange(len)) {
|
||||
for (const auto j : c10::irange(len)) {
|
||||
for (int64_t i = 0; i < len; ++i) {
|
||||
for (int64_t j = 0; j < len; ++j) {
|
||||
out[i] += roots[(j * i) % len] * in[j];
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,5 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <c10/util/irange.h>
|
||||
#include <torch/torch.h>
|
||||
|
||||
#include <test/cpp/api/support.h>
|
||||
@ -1128,7 +1127,7 @@ TEST_F(FunctionalTest, GumbelSoftmax) {
|
||||
int dims[] = {1, -1};
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays,cppcoreguidelines-avoid-magic-numbers)
|
||||
int expected[] = {5*3, 5*4};
|
||||
for (const auto i : c10::irange(2)) {
|
||||
for(auto i=0; i<2; i++) {
|
||||
auto logits = torch::randn({5, 4, 3});
|
||||
int expected_count = expected[i];
|
||||
auto y_draw = F::gumbel_softmax(logits, F::GumbelSoftmaxFuncOptions().hard(true).dim(dims[i]));
|
||||
@ -1150,8 +1149,7 @@ TEST_F(FunctionalTest, GumbelSoftmax) {
|
||||
|
||||
auto counts = torch::zeros_like(logits);
|
||||
torch::Tensor y_draw;
|
||||
for (const auto i : c10::irange(num_draws)) {
|
||||
(void)i; // Suppress unused variable warning
|
||||
for (auto i=0; i<num_draws; i++) {
|
||||
y_draw = F::gumbel_softmax(logits, F::GumbelSoftmaxFuncOptions().hard(true));
|
||||
counts += y_draw;
|
||||
}
|
||||
@ -1177,7 +1175,7 @@ TEST_F(FunctionalTest, Softmax) {
|
||||
auto output = F::softmax(input, /*dim=*/1);
|
||||
auto sum = torch::sum(torch::exp(input), 1);
|
||||
|
||||
for (const auto i : c10::irange(2)) {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
auto expected = torch::exp(input[i]) / sum[i];
|
||||
ASSERT_TRUE(torch::allclose(output[i], expected));
|
||||
}
|
||||
@ -1189,7 +1187,7 @@ TEST_F(FunctionalTest, Softmin) {
|
||||
auto output = F::softmin(input, /*dim=*/1);
|
||||
auto sum = torch::sum(torch::exp(-input), 1);
|
||||
|
||||
for (const auto i : c10::irange(2)) {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
auto expected = torch::exp(-input[i]) / sum[i];
|
||||
ASSERT_TRUE(torch::allclose(output[i], expected));
|
||||
}
|
||||
@ -1201,7 +1199,7 @@ TEST_F(FunctionalTest, LogSoftmax) {
|
||||
auto output = F::log_softmax(input, /*dim=*/1);
|
||||
auto sum = torch::sum(torch::exp(input), 1);
|
||||
|
||||
for (const auto i : c10::irange(2)) {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
auto expected = torch::log(torch::exp(input[i]) / sum[i]);
|
||||
ASSERT_TRUE(torch::allclose(output[i], expected));
|
||||
}
|
||||
|
@ -1,6 +1,5 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <c10/util/irange.h>
|
||||
#include <torch/torch.h>
|
||||
|
||||
#include <test/cpp/api/init_baseline.h>
|
||||
@ -15,7 +14,7 @@ void check_exact_values(
|
||||
const std::vector<std::vector<torch::Tensor>>& expected_parameters) {
|
||||
ASSERT_EQ(parameters.size(), expected_parameters.size());
|
||||
|
||||
for (const auto i : c10::irange(parameters.size())) {
|
||||
for (size_t i = 0; i < parameters.size(); i++) {
|
||||
auto layerParameters = parameters[i];
|
||||
auto expectedLayerParameters = expected_parameters[i];
|
||||
|
||||
@ -28,7 +27,7 @@ void check_exact_values(
|
||||
ASSERT_TRUE(false);
|
||||
}
|
||||
|
||||
for (const auto p : c10::irange(layerParameters.size(0))) {
|
||||
for (size_t p = 0; p < layerParameters.size(0); p++) {
|
||||
// Always compare using double dtype, regardless of the original dtype of the tensors
|
||||
auto tensor = layerParameters[p].to(torch::kFloat64);
|
||||
auto expectedTensor = expectedLayerParameters[p].to(torch::kFloat64);
|
||||
|
@ -1,6 +1,5 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <c10/util/irange.h>
|
||||
#include <torch/torch.h>
|
||||
|
||||
#include <test/cpp/api/support.h>
|
||||
@ -123,12 +122,10 @@ bool test_mnist(
|
||||
torch::Device device(with_cuda ? torch::kCUDA : torch::kCPU);
|
||||
model->to(device);
|
||||
|
||||
for (const auto epoch : c10::irange(number_of_epochs)) {
|
||||
(void)epoch; // Suppress unused variable warning
|
||||
for (size_t epoch = 0; epoch < number_of_epochs; epoch++) {
|
||||
// NOLINTNEXTLINE(performance-for-range-copy)
|
||||
for (torch::data::Example<> batch : *data_loader) {
|
||||
auto data = batch.data.to(device);
|
||||
auto targets = batch.target.to(device);
|
||||
auto data = batch.data.to(device), targets = batch.target.to(device);
|
||||
torch::Tensor prediction = forward_op(std::move(data));
|
||||
// NOLINTNEXTLINE(performance-move-const-arg)
|
||||
torch::Tensor loss = torch::nll_loss(prediction, std::move(targets));
|
||||
@ -199,7 +196,7 @@ TEST_F(IntegrationTest, CartPole) {
|
||||
|
||||
std::vector<torch::Tensor> policy_loss;
|
||||
std::vector<torch::Tensor> value_loss;
|
||||
for (const auto i : c10::irange(0U, saved_log_probs.size())) {
|
||||
for (auto i = 0U; i < saved_log_probs.size(); i++) {
|
||||
auto advantage = r_t[i] - saved_values[i].item<float>();
|
||||
policy_loss.push_back(-advantage * saved_log_probs[i]);
|
||||
value_loss.push_back(
|
||||
|
@ -1,6 +1,5 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <c10/util/irange.h>
|
||||
#include <torch/torch.h>
|
||||
|
||||
#include <test/cpp/api/support.h>
|
||||
@ -705,7 +704,7 @@ TEST_F(ModuleTest, ModulesReturnsExpectedSubmodulesForFlatModel) {
|
||||
std::vector<std::shared_ptr<torch::nn::Module>> expected = {
|
||||
model.ptr(), model[0], model[1], model[2]};
|
||||
ASSERT_EQ(modules.size(), expected.size());
|
||||
for (const auto i : c10::irange(expected.size())) {
|
||||
for (size_t i = 0; i < expected.size(); ++i) {
|
||||
// Assert pointer equality.
|
||||
ASSERT_EQ(modules[i].get(), expected[i].get());
|
||||
}
|
||||
@ -718,7 +717,7 @@ TEST_F(ModuleTest, ModulesExcludesSelfWhenIncludeSelfSetToFalse) {
|
||||
std::vector<std::shared_ptr<torch::nn::Module>> expected = {
|
||||
model[0], model[1], model[2]};
|
||||
ASSERT_EQ(modules.size(), expected.size());
|
||||
for (const auto i : c10::irange(expected.size())) {
|
||||
for (size_t i = 0; i < expected.size(); ++i) {
|
||||
// Assert pointer equality.
|
||||
ASSERT_EQ(modules[i].get(), expected[i].get());
|
||||
}
|
||||
@ -731,7 +730,7 @@ TEST_F(ModuleTest, NamedModulesReturnsExpectedNamedSubmodulesForFlatModel) {
|
||||
std::vector<std::shared_ptr<torch::nn::Module>> expected = {
|
||||
model.ptr(), model[0], model[1], model[2]};
|
||||
ASSERT_EQ(modules.size(), expected.size());
|
||||
for (const auto i : c10::irange(expected.size())) {
|
||||
for (size_t i = 0; i < expected.size(); ++i) {
|
||||
// Assert pointer equality.
|
||||
ASSERT_EQ(modules[i].key(), i ? std::to_string(i - 1) : std::string());
|
||||
ASSERT_EQ(modules[i].value().get(), expected[i].get());
|
||||
@ -746,7 +745,7 @@ TEST_F(ModuleTest, NamedModulesExcludesSelfWhenIncludeSelfSetToFalse) {
|
||||
std::vector<std::shared_ptr<torch::nn::Module>> expected = {
|
||||
model[0], model[1], model[2]};
|
||||
ASSERT_EQ(modules.size(), expected.size());
|
||||
for (const auto i : c10::irange(expected.size())) {
|
||||
for (size_t i = 0; i < expected.size(); ++i) {
|
||||
// Assert pointer equality.
|
||||
ASSERT_EQ(modules[i].key(), std::to_string(i));
|
||||
ASSERT_EQ(modules[i].value().get(), expected[i].get());
|
||||
@ -759,7 +758,7 @@ TEST_F(ModuleTest, ChildrenReturnsExpectedSubmodulesForFlatModel) {
|
||||
std::vector<std::shared_ptr<torch::nn::Module>> expected = {
|
||||
model[0], model[1], model[2]};
|
||||
ASSERT_EQ(modules.size(), expected.size());
|
||||
for (const auto i : c10::irange(expected.size())) {
|
||||
for (size_t i = 0; i < expected.size(); ++i) {
|
||||
// Assert pointer equality.
|
||||
ASSERT_EQ(modules[i].get(), expected[i].get());
|
||||
}
|
||||
@ -775,7 +774,7 @@ TEST_F(ModuleTest, NamedChildrenReturnsExpectedNamedSubmodulesForFlatModel) {
|
||||
std::vector<std::shared_ptr<torch::nn::Module>> expected = {
|
||||
model[0], model[1], model[2]};
|
||||
ASSERT_EQ(modules.size(), expected.size());
|
||||
for (const auto i : c10::irange(expected.size())) {
|
||||
for (size_t i = 0; i < expected.size(); ++i) {
|
||||
// Assert pointer equality.
|
||||
ASSERT_EQ(modules[i].key(), std::to_string(i));
|
||||
ASSERT_EQ(modules[i].value().get(), expected[i].get());
|
||||
@ -823,7 +822,7 @@ TEST_F(ModuleTest, NamedBuffersReturnsExpectedTensorsForFlatModel) {
|
||||
struct TestContainer : torch::nn::Module {
|
||||
TestContainer(int64_t number, std::vector<TestContainer> modules = {})
|
||||
: tensor(torch::tensor(number)) {
|
||||
for (const auto i : c10::irange(modules.size())) {
|
||||
for (size_t i = 0; i < modules.size(); ++i) {
|
||||
register_module(
|
||||
std::to_string(i),
|
||||
std::make_shared<TestContainer>(std::move(modules[i])));
|
||||
@ -867,7 +866,7 @@ TEST_F(ModuleTest, ModulesReturnsExpectedSubmodulesForDeepModel) {
|
||||
std::vector<std::shared_ptr<torch::nn::Module>> modules = model->modules();
|
||||
|
||||
ASSERT_EQ(modules.size(), 10);
|
||||
for (const auto i : c10::irange(modules.size())) {
|
||||
for (size_t i = 0; i < modules.size(); ++i) {
|
||||
ASSERT_EQ(get_test_container_item(modules[i]), i);
|
||||
}
|
||||
}
|
||||
@ -880,7 +879,7 @@ TEST_F(ModuleTest, NamedModulesReturnsExpectedNamedSubmodulesForDeepModel) {
|
||||
|
||||
ASSERT_EQ(modules.size(), expected.size());
|
||||
|
||||
for (const auto i : c10::irange(expected.size())) {
|
||||
for (size_t i = 0; i < expected.size(); ++i) {
|
||||
ASSERT_EQ(modules[i].key(), expected[i].first);
|
||||
ASSERT_EQ(get_test_container_item(modules[i].value()), expected[i].second);
|
||||
}
|
||||
|
@ -1,6 +1,5 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <c10/util/irange.h>
|
||||
#include <torch/torch.h>
|
||||
|
||||
#include <algorithm>
|
||||
@ -119,7 +118,7 @@ TEST_F(ModuleListTest, AccessWithAt) {
|
||||
ASSERT_EQ(list->size(), 3);
|
||||
|
||||
// returns the correct module for a given index
|
||||
for (const auto i : c10::irange(modules.size())) {
|
||||
for (size_t i = 0; i < modules.size(); ++i) {
|
||||
ASSERT_EQ(&list->at<M>(i), modules[i].get());
|
||||
}
|
||||
|
||||
@ -144,7 +143,7 @@ TEST_F(ModuleListTest, AccessWithPtr) {
|
||||
ASSERT_EQ(list->size(), 3);
|
||||
|
||||
// returns the correct module for a given index
|
||||
for (const auto i : c10::irange(modules.size())) {
|
||||
for (size_t i = 0; i < modules.size(); ++i) {
|
||||
ASSERT_EQ(list->ptr(i).get(), modules[i].get());
|
||||
ASSERT_EQ(list[i].get(), modules[i].get());
|
||||
ASSERT_EQ(list->ptr<M>(i).get(), modules[i].get());
|
||||
|
@ -1,6 +1,5 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <c10/util/irange.h>
|
||||
#include <torch/torch.h>
|
||||
|
||||
#include <test/cpp/api/support.h>
|
||||
@ -1149,7 +1148,7 @@ TEST_F(ModulesTest, LayerNorm) {
|
||||
s.backward();
|
||||
ASSERT_EQ(y.ndimension(), 2);
|
||||
ASSERT_EQ(s.ndimension(), 0);
|
||||
for (const auto i : c10::irange(2)) {
|
||||
for (auto i = 0; i < 2; i++) {
|
||||
ASSERT_EQ(y.size(i), 2);
|
||||
}
|
||||
|
||||
@ -1167,7 +1166,7 @@ TEST_F(ModulesTest, GroupNorm) {
|
||||
s.backward();
|
||||
ASSERT_EQ(y.ndimension(), 2);
|
||||
ASSERT_EQ(s.ndimension(), 0);
|
||||
for (const auto i : c10::irange(2)) {
|
||||
for (auto i = 0; i < 2; i++) {
|
||||
ASSERT_EQ(y.size(i), 2);
|
||||
}
|
||||
|
||||
@ -2596,7 +2595,7 @@ TEST_F(ModulesTest, Softmax) {
|
||||
auto output = m(input);
|
||||
auto sum = torch::sum(torch::exp(input), 1);
|
||||
|
||||
for (const auto i : c10::irange(2)) {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
auto expected = torch::exp(input[i]) / sum[i];
|
||||
ASSERT_TRUE(torch::allclose(output[i], expected));
|
||||
}
|
||||
@ -2608,7 +2607,7 @@ TEST_F(ModulesTest, Softmin) {
|
||||
auto output = m(input);
|
||||
auto sum = torch::sum(torch::exp(-input), 1);
|
||||
|
||||
for (const auto i : c10::irange(2)) {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
auto expected = torch::exp(-input[i]) / sum[i];
|
||||
ASSERT_TRUE(torch::allclose(output[i], expected));
|
||||
}
|
||||
@ -2620,7 +2619,7 @@ TEST_F(ModulesTest, LogSoftmax) {
|
||||
auto output = m(input);
|
||||
auto sum = torch::sum(torch::exp(input), 1);
|
||||
|
||||
for (const auto i : c10::irange(2)) {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
auto expected = torch::log(torch::exp(input[i]) / sum[i]);
|
||||
ASSERT_TRUE(torch::allclose(output[i], expected));
|
||||
}
|
||||
@ -2657,7 +2656,7 @@ TEST_F(ModulesTest, AdaptiveLogSoftmaxWithLoss) {
|
||||
auto logprob_out = asfm->log_prob(x);
|
||||
NLLLoss nll_loss;
|
||||
|
||||
for (const auto v : c10::irange(4)) {
|
||||
for (int64_t v = 0; v < 4; ++v) {
|
||||
auto y = torch::full({4}, v, torch::kLong);
|
||||
auto asm_out = asfm(x, y);
|
||||
auto out = asm_out.output;
|
||||
@ -2676,10 +2675,10 @@ TEST_F(ModulesTest, Softmax2d) {
|
||||
auto output = m(input);
|
||||
auto sum = torch::sum(torch::exp(input), 1);
|
||||
|
||||
for (const auto i : c10::irange(1)) {
|
||||
for (const auto j : c10::irange(2)) {
|
||||
for (const auto k : c10::irange(3)) {
|
||||
for (const auto l : c10::irange(4)) {
|
||||
for (int i = 0; i < 1; i++) {
|
||||
for (int j = 0; j < 2; j++) {
|
||||
for (int k = 0; k < 3; k++) {
|
||||
for (int l = 0; l < 4; l++) {
|
||||
auto expected = torch::exp(input[i][j][k][l]) / sum[i][k][l];
|
||||
ASSERT_TRUE(torch::allclose(output[i][j][k][l], expected));
|
||||
}
|
||||
@ -3390,8 +3389,8 @@ namespace detail {
|
||||
TORCH_INTERNAL_ASSERT(a.size(0) == b.size(0));
|
||||
TORCH_INTERNAL_ASSERT(a.size(1) == b.size(1));
|
||||
auto retval = torch::zeros({a.size(0), a.size(1), a.size(2), b.size(3)}, torch::kFloat32);
|
||||
for (const auto i : c10::irange(a.size(0))) {
|
||||
for (const auto j : c10::irange(a.size(1))) {
|
||||
for (int i = 0; i < a.size(0); i++) {
|
||||
for (int j = 0; j < a.size(1); j++) {
|
||||
retval[i][j] = torch::matmul(a[i][j], b[i][j]);
|
||||
}
|
||||
}
|
||||
@ -3400,9 +3399,9 @@ namespace detail {
|
||||
|
||||
torch::Tensor _softmax(const torch::Tensor& x) {
|
||||
auto output = torch::zeros(x.sizes());
|
||||
for (const auto i : c10::irange(x.size(0))) {
|
||||
for (const auto j : c10::irange(x.size(1))) {
|
||||
for (const auto k : c10::irange(x.size(2))) {
|
||||
for (int i = 0; i < x.size(0); i++) {
|
||||
for (int j = 0; j < x.size(1); j++) {
|
||||
for (int k = 0; k < x.size(2); k++) {
|
||||
const auto& x_curr = x[i][j][k];
|
||||
const auto e_x = torch::exp(x_curr - torch::max(x_curr));
|
||||
output[i][j][k] = e_x / torch::sum(e_x);
|
||||
@ -3425,10 +3424,10 @@ namespace detail {
|
||||
const auto s1 = QKT.size(2);
|
||||
const auto s2 = QKT.size(3);
|
||||
if (unseen_mask.defined() || key_padding_mask.defined()) {
|
||||
for (const auto i : c10::irange(b1)) {
|
||||
for (const auto j : c10::irange(b2)) {
|
||||
for (const auto m : c10::irange(s1)) {
|
||||
for (const auto n : c10::irange(s2)) {
|
||||
for (int i = 0; i < b1; i++) {
|
||||
for (int j = 0; j < b2; j++) {
|
||||
for (int m = 0; m < s1; m++) {
|
||||
for (int n = 0; n < s2; n++) {
|
||||
if (unseen_mask.defined() && unseen_mask[m][n].item<double>() == 0) {
|
||||
QKT[i][j][m][n] = -std::numeric_limits<double>::infinity();
|
||||
}
|
||||
@ -3476,8 +3475,7 @@ namespace detail {
|
||||
std::uniform_int_distribution<int> d_2_10(2, 10);
|
||||
std::uniform_int_distribution<int> d_3_10(3, 10);
|
||||
bool registration_checked = false;
|
||||
for (const auto i : c10::irange(100)) {
|
||||
(void)i; // Suppress unused variable warning
|
||||
for (int i = 0; i < 100; i++) {
|
||||
const auto batch_sz = d_2_10(generator);
|
||||
const auto seq_len = d_2_10(generator);
|
||||
const auto d_head = d_3_10(generator);
|
||||
|
@ -1,6 +1,5 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <c10/util/irange.h>
|
||||
#include <torch/torch.h>
|
||||
|
||||
#include <test/cpp/api/support.h>
|
||||
@ -41,7 +40,7 @@ TEST_F(NNUtilsTest, ClipGradNorm) {
|
||||
auto compare_scaling =
|
||||
[&](const std::vector<torch::Tensor>& grads) -> torch::Tensor {
|
||||
std::vector<torch::Tensor> p_scale;
|
||||
for (const auto i : c10::irange(grads.size())) {
|
||||
for (int i = 0; i < grads.size(); i++) {
|
||||
auto param = l->parameters()[i];
|
||||
auto grad = grads[i];
|
||||
p_scale.push_back(param.grad().data().div(grad).view(-1));
|
||||
@ -62,7 +61,7 @@ TEST_F(NNUtilsTest, ClipGradNorm) {
|
||||
std::numeric_limits<float>::infinity(),
|
||||
};
|
||||
for (auto norm_type : norm_types) {
|
||||
for (const auto i : c10::irange(grads.size())) {
|
||||
for (int i = 0; i < grads.size(); i++) {
|
||||
l->parameters()[i].mutable_grad() =
|
||||
grads[i].clone().view_as(l->parameters()[i].data());
|
||||
}
|
||||
@ -81,7 +80,7 @@ TEST_F(NNUtilsTest, ClipGradNorm) {
|
||||
torch::ones(10).div(500),
|
||||
};
|
||||
for (auto norm_type : norm_types) {
|
||||
for (const auto i : c10::irange(grads.size())) {
|
||||
for (int i = 0; i < grads.size(); i++) {
|
||||
l->parameters()[i].grad().data().copy_(grads[i]);
|
||||
}
|
||||
auto norm_before = compute_norm(norm_type);
|
||||
@ -228,7 +227,7 @@ TEST_F(NNUtilsTest, ClipGradNormErrorIfNonfinite) {
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto)
|
||||
EXPECT_THROW(utils::clip_grad_norm_(parameters, 1., norm_type, true), std::exception) << msg;
|
||||
// Grads should not change if error is thrown
|
||||
for (const auto p_idx : c10::irange(parameters.size())) {
|
||||
for (int64_t p_idx = 0; p_idx < parameters.size(); p_idx++) {
|
||||
ASSERT_TRUE(torch::allclose(parameters[p_idx].grad(), grads_before[p_idx], 1.0, 0.0, /*equal_nan*/ true)) << msg;
|
||||
}
|
||||
} else {
|
||||
@ -286,7 +285,7 @@ TEST_F(NNUtilsTest, ClipGradValue) {
|
||||
std::vector<std::vector<torch::Tensor>> grad_lists = {
|
||||
{grad_w, grad_b}, {grad_w, torch::Tensor()}};
|
||||
for (auto grad_list : grad_lists) {
|
||||
for (const auto i : c10::irange(grad_list.size())) {
|
||||
for (int i = 0; i < grad_list.size(); i++) {
|
||||
auto p = l->parameters()[i];
|
||||
auto g = grad_list[i];
|
||||
p.mutable_grad() = g.defined() ? g.clone().view_as(p.data()) : g;
|
||||
@ -336,7 +335,7 @@ TEST_F(NNUtilsTest, ConvertParameters) {
|
||||
};
|
||||
|
||||
utils::vector_to_parameters(vector, zero_parameters);
|
||||
for (const auto i : c10::irange(zero_parameters.size())) {
|
||||
for (int i = 0; i < zero_parameters.size(); ++i) {
|
||||
ASSERT_TRUE(zero_parameters[i].allclose(parameters[i]));
|
||||
}
|
||||
|
||||
@ -369,8 +368,7 @@ int64_t PackedSequenceTest_max_length = 6;
|
||||
std::vector<torch::Tensor> PackedSequenceTest_ordered_sequence(torch::ScalarType tensor_type) {
|
||||
std::vector<torch::Tensor> seqs;
|
||||
seqs.reserve(PackedSequenceTest_batch_size);
|
||||
for (const auto i : c10::irange(PackedSequenceTest_batch_size)) {
|
||||
(void)i; // Suppress unused variable warning
|
||||
for (int64_t i = 0; i < PackedSequenceTest_batch_size; i++) {
|
||||
seqs.emplace_back(torch::empty({
|
||||
torch::randint(1, PackedSequenceTest_max_length, {1}).item<int64_t>()
|
||||
}, tensor_type));
|
||||
@ -392,7 +390,7 @@ std::tuple<torch::Tensor, torch::Tensor> PackedSequenceTest_padded_sequence(torc
|
||||
// Create Tensor of random padded sequences
|
||||
auto ordered = PackedSequenceTest_ordered_sequence(tensor_type);
|
||||
auto lengths = torch::empty({(int64_t)ordered.size()}, torch::kInt64);
|
||||
for (const auto i : c10::irange(ordered.size())) {
|
||||
for (int64_t i = 0; i < ordered.size(); i++) {
|
||||
lengths[i] = ordered[i].size(0);
|
||||
}
|
||||
auto padded_tensor = rnn_utils::pad_sequence(ordered);
|
||||
@ -621,9 +619,9 @@ TEST_F(NNUtilsTest, PackPaddedSequence) {
|
||||
}
|
||||
auto padded = torch::cat(tensors_to_be_cat, 1);
|
||||
std::vector<torch::Tensor> expected_data_vec;
|
||||
for (const auto n : c10::irange(batch_sizes.size(0))) {
|
||||
for (int64_t n = 0; n < batch_sizes.size(0); n++) {
|
||||
int64_t batch_size = batch_sizes[n].item<int64_t>();
|
||||
for (const auto i : c10::irange(batch_size)) {
|
||||
for (int64_t i = 0; i < batch_size; i++) {
|
||||
expected_data_vec.emplace_back(torch::arange(1., 6) + (i + 1) * 100 + 5 * n);
|
||||
}
|
||||
}
|
||||
@ -633,7 +631,7 @@ TEST_F(NNUtilsTest, PackPaddedSequence) {
|
||||
if (should_shuffle) {
|
||||
// Shuffle the padded sequence to create an unsorted sequence
|
||||
std::vector<int64_t> permutation;
|
||||
for (const auto i : c10::irange(sorted_lengths.size())) {
|
||||
for (int64_t i = 0; i < sorted_lengths.size(); i++) {
|
||||
permutation.emplace_back(i);
|
||||
}
|
||||
std::shuffle(
|
||||
@ -704,7 +702,7 @@ TEST_F(NNUtilsTest, PackPaddedSequence) {
|
||||
if (batch_first) {
|
||||
grad_output.transpose_(0, 1);
|
||||
}
|
||||
for (const auto i : c10::irange(lengths.size(0))) {
|
||||
for (int64_t i = 0; i < lengths.size(0); i++) {
|
||||
int64_t l = lengths[i].item<int64_t>();
|
||||
ASSERT_TRUE(torch::allclose(
|
||||
padded.grad().narrow(0, 0, l).select(1, i),
|
||||
|
Reference in New Issue
Block a user