Tensor reinitialization codemod - 3/5 (#15912)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/15912

Codemod generated with clangr shard mode, 25 files per diff,
To eliminiate partially initialized Tensor, we split the initialization of local Tensor variables into two steps, first declare un uninitialized Tensor, and
call `ReinitializeTensor` to initialize it.
motivation: https://github.com/pytorch/pytorch/pull/12407

Reviewed By: dzhulgakov

Differential Revision: D13586734

fbshipit-source-id: 8485d2c51225343961351c7a2e8f95055534f9a9
This commit is contained in:
Jerry Zhang
2019-01-16 19:46:19 -08:00
committed by Facebook Github Bot
parent 57d29ffa9c
commit d277f77da2
26 changed files with 172 additions and 125 deletions

View File

@ -316,7 +316,7 @@ bool CosineSimilarityOp<float, CUDAContext>::RunOnDevice() {
const float* X_data = X.data<float>();
const float* Y_data = Y.data<float>();
// Auxiliary arrays, one allocation of memory
aux_.Resize(2 * N);
ReinitializeTensor(&aux_, {2 * N}, at::dtype<float>().device(CUDA));
float* aux_data = aux_.mutable_data<float>();
float* x2 = aux_data;
float* y2 = aux_data + N;
@ -371,7 +371,7 @@ bool CosineSimilarityGradientOp<float, CUDAContext>::RunOnDevice() {
auto* dY_data = dY->template mutable_data<float>();
// one memory allocation, a few arrays
aux_.Resize(6 * N);
ReinitializeTensor(&aux_, {6 * N}, at::dtype<float>().device(CUDA));
float* aux_data = aux_.mutable_data<float>();
float* xn = aux_data;
float* yn = aux_data + N;

View File

@ -156,7 +156,7 @@ class CosineSimilarityOp : public Operator<Context> {
OUTPUT_TAGS(COS_OUT);
private:
Tensor aux_{Context::GetDeviceType()};
Tensor aux_;
};
template <typename T, class Context>
@ -173,7 +173,7 @@ class CosineSimilarityGradientOp final : public Operator<Context> {
OUTPUT_TAGS(DER_X_OUT, DER_Y_OUT);
private:
Tensor aux_{Context::GetDeviceType()};
Tensor aux_;
};
template <typename T, class Context>

View File

@ -98,15 +98,12 @@ void fc_op_cpu_impl(
math_type);
// Add bias term
Tensor bias_multiplier(cache->bias_multiplier_);
if (bias_multiplier.numel() != M) {
// If the helper bias multiplier is not M, reshape and fill it with one.
bias_multiplier.Resize(M);
caffe2::math::Set<DataType, Context>(
M,
caffe2::convert::To<float, DataType>(1),
bias_multiplier.template mutable_data<DataType>(),
static_cast<Context*>(&context));
}
ReinitializeTensor(&bias_multiplier, {M}, at::dtype<DataType>().device(CPU));
caffe2::math::Set<DataType, Context>(
M,
caffe2::convert::To<float, DataType>(1),
bias_multiplier.template mutable_data<DataType>(),
static_cast<Context*>(&context));
caffe2::math::Gemm<DataType, Context, caffe2::DefaultEngine>(
CblasNoTrans,
CblasNoTrans,

View File

@ -12,7 +12,7 @@ struct FullyConnected final {
struct Cache final {
vector<int64_t> Y_shape_cache_;
C10Tensor bias_multiplier_ = C10Tensor(Tensor{CPU});
C10Tensor bias_multiplier_ = C10Tensor(Tensor());
};
using Signature = void(

View File

@ -1,6 +1,7 @@
#ifndef CAFFE2_OPERATORS_FULLY_CONNECTED_OP_H_
#define CAFFE2_OPERATORS_FULLY_CONNECTED_OP_H_
#include <c10/util/Optional.h>
#include "caffe2/core/context.h"
#include "caffe2/core/operator.h"
#include "caffe2/utils/conversions.h"
@ -104,15 +105,22 @@ class FullyConnectedOp final : public Operator<Context> {
&context_,
math_type);
// Add bias term
if (bias_multiplier_.numel() != M) {
// If the helper bias multiplier is not M, reshape and fill it with one.
bias_multiplier_.Resize(M);
if (!bias_multiplier_.has_value()) {
bias_multiplier_ = caffe2::empty({M}, at::dtype<T_B>().device(Context::GetDeviceType()));
math::Set<T_B, Context>(
M,
convert::To<float, T_B>(1),
bias_multiplier_.template mutable_data<T_B>(),
bias_multiplier_->template mutable_data<T_B>(),
&context_);
} else if (bias_multiplier_->numel() != M) {
bias_multiplier_->Resize(M);
math::Set<T_B, Context>(
M,
convert::To<float, T_B>(1),
bias_multiplier_->template mutable_data<T_B>(),
&context_);
}
math::Gemm<T_B, Context, Engine>(
CblasNoTrans,
CblasNoTrans,
@ -120,7 +128,7 @@ class FullyConnectedOp final : public Operator<Context> {
N,
1,
1,
bias_multiplier_.template data<T_B>(),
bias_multiplier_->template data<T_B>(),
b.template data<T_B>(),
1,
Y->template mutable_data<T_Y>(),
@ -144,7 +152,7 @@ class FullyConnectedOp final : public Operator<Context> {
// A local vector to cache the output shape so we don't need to recreate
// a vector object every time we run Run().
vector<int64_t> Y_shape_cache_;
Tensor bias_multiplier_{Context::GetDeviceType()};
c10::optional<Tensor> bias_multiplier_;
bool float16_compute_;
};
@ -250,14 +258,19 @@ class FullyConnectedGradientOp : public Operator<Context> {
dW->template mutable_data<T_DW>(),
&context_,
math_type);
if (bias_multiplier_.numel() != M) {
// If the helper bias multiplier is not M, reshape and fill it
// with one.
bias_multiplier_.Resize(M);
if (!bias_multiplier_.has_value()) {
bias_multiplier_ = caffe2::empty({M}, at::dtype<T_B>().device(Context::GetDeviceType()));
math::Set<T_B, Context>(
M,
convert::To<float, T_B>(1),
bias_multiplier_.template mutable_data<T_B>(),
bias_multiplier_->template mutable_data<T_B>(),
&context_);
} else if (bias_multiplier_->numel() != M) {
bias_multiplier_->Resize(M);
math::Set<T_B, Context>(
M,
convert::To<float, T_B>(1),
bias_multiplier_->template mutable_data<T_B>(),
&context_);
}
// Compute dB
@ -267,7 +280,7 @@ class FullyConnectedGradientOp : public Operator<Context> {
N,
1,
dY.template data<T_DY>(),
bias_multiplier_.template data<T_B>(),
bias_multiplier_->template data<T_B>(),
0,
db->template mutable_data<T_DB>(),
&context_);
@ -307,7 +320,7 @@ class FullyConnectedGradientOp : public Operator<Context> {
protected:
size_t axis_{1};
size_t axis_w_{1};
Tensor bias_multiplier_{Context::GetDeviceType()};
c10::optional<Tensor> bias_multiplier_;
bool float16_compute_;
};

View File

@ -55,13 +55,13 @@ class GivenTensorByteStringToUInt8FillOp final : public FillerOp<Context> {
<< " given size: " << source_values.size();
auto str = source_values[0];
values_.Resize(str.size());
ReinitializeTensor(&values_, {static_cast<int64_t>(str.size())}, at::dtype<uint8_t>().device(CPU));
uint8_t* values_data = values_.template mutable_data<uint8_t>();
for (int i = 0; i < str.size(); i++) {
values_data[i] = static_cast<uint8_t>(str[i]);
}
}
}
Tensor values_{CPU};
Tensor values_;
};
} // namespace caffe2

View File

@ -60,7 +60,7 @@ class GivenTensorFillOp final : public FillerOp<Context> {
void ExtractValues() {
auto source_values =
this->template GetRepeatedArgument<Type>("values");
values_.Resize(source_values.size());
ReinitializeTensor(&values_, {static_cast<int64_t>(source_values.size())}, at::dtype<Type>().device(CPU));
Type* values_data = values_.template mutable_data<Type>();
for (int i = 0; i < source_values.size(); i++) {
values_data[i] = static_cast<Type>(source_values[i]);
@ -83,6 +83,6 @@ class GivenTensorFillOp final : public FillerOp<Context> {
}
bool (GivenTensorFillOp::*body_)(Tensor* output);
Tensor values_{CPU};
Tensor values_;
};
} // namespace caffe2

View File

@ -104,8 +104,10 @@ bool GroupNormGradientOp<T, Context>::RunOnDeviceImpl(
// dL/ds = Sum(dL/dY * gamma * X)
// dL/db = Sum(dL/dY * gamma)
const int C = G * D;
ds_.Resize(N, G);
db_.Resize(N, G);
ReinitializeTensor(
&ds_, {N, G}, at::dtype<T>().device(Context::GetDeviceType()));
ReinitializeTensor(
&db_, {N, G}, at::dtype<T>().device(Context::GetDeviceType()));
T* ds_data = ds_.template mutable_data<T>();
T* db_data = db_.template mutable_data<T>();
math::Set<T, Context>(N * G, T(0), ds_data, &context_);

View File

@ -326,8 +326,10 @@ bool GroupNormGradientOp<float, CUDAContext>::RunOnDeviceImpl(
float* dbeta_data) {
const int size = N * G * D * HxW;
const int C = G * D;
ds_.Resize(N, G);
db_.Resize(N, G);
ReinitializeTensor(
&ds_, {N, G}, at::dtype<float>().device(CUDA));
ReinitializeTensor(
&db_, {N, G}, at::dtype<float>().device(CUDA));
float* ds_data = ds_.mutable_data<float>();
float* db_data = db_.mutable_data<float>();
if (order_ == StorageOrder::NCHW) {

View File

@ -57,8 +57,8 @@ class GroupNormOp final : public Operator<Context> {
mu_data = mu->template mutable_data<T>();
rsig_data = rsig->template mutable_data<T>();
} else {
mu_.Resize(N, G);
rsig_.Resize(N, G);
ReinitializeTensor(&mu_, {N, G}, at::dtype<T>().device(Context::GetDeviceType()));
ReinitializeTensor(&rsig_, {N, G}, at::dtype<T>().device(Context::GetDeviceType()));
mu_data = mu_.template mutable_data<T>();
rsig_data = rsig_.template mutable_data<T>();
}
@ -88,8 +88,8 @@ class GroupNormOp final : public Operator<Context> {
T* mu,
T* rsig) {
const int C = G * D;
scale_.Resize(N, C);
bias_.Resize(N, C);
ReinitializeTensor(&scale_, {N, C}, at::dtype<T>().device(Context::GetDeviceType()));
ReinitializeTensor(&bias_, {N, C}, at::dtype<T>().device(Context::GetDeviceType()));
T* scale_data = scale_.template mutable_data<T>();
T* bias_data = bias_.template mutable_data<T>();
if (order_ == StorageOrder::NCHW) {
@ -175,10 +175,10 @@ class GroupNormOp final : public Operator<Context> {
const StorageOrder order_;
const bool is_test_;
Tensor mu_{Context::GetDeviceType()};
Tensor rsig_{Context::GetDeviceType()};
Tensor scale_{Context::GetDeviceType()};
Tensor bias_{Context::GetDeviceType()};
Tensor mu_;
Tensor rsig_;
Tensor scale_;
Tensor bias_;
// Input: X, gamma, beta
// Output: Y, mu, inv_sig
@ -255,8 +255,8 @@ class GroupNormGradientOp final : public Operator<Context> {
const int group_;
const StorageOrder order_;
Tensor ds_{Context::GetDeviceType()};
Tensor db_{Context::GetDeviceType()};
Tensor ds_;
Tensor db_;
// Input: dY, X, gamma, beta, mu, inv_sig
// Output: dX, dgamma, dbeta

View File

@ -24,34 +24,39 @@ float HSoftmaxOp<float, CPUContext>::RunForwardSingle(const float* X,
//Softmax
float* softmax_output_data = int_output + int_output_offset;
if (scale_.numel() != 1) {
scale_.Resize(1);
if (!scale_.has_value()) {
scale_ = caffe2::empty({1}, at::dtype<float>().device(CPU));
}
if (sum_multiplier_.numel() != dim_out) {
sum_multiplier_.Resize(dim_out);
if (!sum_multiplier_.has_value()) {
sum_multiplier_ = caffe2::empty({dim_out}, at::dtype<float>().device(CPU));
math::Set<float, CPUContext>(dim_out, 1.f,
sum_multiplier_.mutable_data<float>(), &context_);
sum_multiplier_->mutable_data<float>(), &context_);
} else if (sum_multiplier_->numel() != dim_out) {
sum_multiplier_->Resize(dim_out);
math::Set<float, CPUContext>(dim_out, 1.f,
sum_multiplier_->mutable_data<float>(), &context_);
}
math::RowwiseMax<float, CPUContext>(1, dim_out, fc_output_data,
scale_.mutable_data<float>(), &context_);
scale_->mutable_data<float>(), &context_);
// Put the intermediate result X - max(X) into Y
context_.template CopyFromCPU<float>(
dim_out, fc_output_data, softmax_output_data);
// Subtract the scale
math::Gemv<float, CPUContext>(CblasNoTrans, dim_out, 1, -1,
sum_multiplier_.data<float>(), scale_.data<float>(), 1, softmax_output_data,
sum_multiplier_->data<float>(), scale_->data<float>(), 1, softmax_output_data,
&context_);
// Exponentiation
math::Exp<float, CPUContext>(dim_out, softmax_output_data,
softmax_output_data, &context_);
math::Gemv<float, CPUContext>(CblasNoTrans, 1, dim_out, 1,
softmax_output_data, sum_multiplier_.data<float>(), 0,
scale_.mutable_data<float>(), &context_);
softmax_output_data, sum_multiplier_->data<float>(), 0,
scale_->mutable_data<float>(), &context_);
// Do division
const float scale = *scale_.data<float>();
const float scale = *(scale_->data<float>());
for (int j = 0; j < dim_out; ++j) {
softmax_output_data[j] /= scale;
}
@ -94,10 +99,14 @@ bool HSoftmaxOp<float, CPUContext>::RunOnDevice() {
float* int_output_data = intermediate_output->template mutable_data<float>();
int int_output_offset = 0;
if (bias_multiplier_.numel() != M) {
bias_multiplier_.Resize(M);
if (!bias_multiplier_.has_value()) {
bias_multiplier_ = caffe2::empty({M}, at::dtype<float>().device(CPU));
math::Set<float, CPUContext>(M, static_cast<float>(1),
bias_multiplier_.mutable_data<float>(), &context_);
bias_multiplier_->mutable_data<float>(), &context_);
} else if (bias_multiplier_->numel() != M) {
bias_multiplier_->Resize(M);
math::Set<float, CPUContext>(M, static_cast<float>(1),
bias_multiplier_->mutable_data<float>(), &context_);
}
for (int sample = 0; sample < M; ++sample) {
@ -112,7 +121,7 @@ bool HSoftmaxOp<float, CPUContext>::RunOnDevice() {
//Adding log probabilities
Ydata[sample] += RunForwardSingle(X.data<float>() + sample*K,
W.data<float>() + w_offset*K, b.data<float>() + w_offset, target,
int_output_data, bias_multiplier_.data<float>()+sample, w_length, K,
int_output_data, bias_multiplier_->data<float>()+sample, w_length, K,
int_output_offset);
}
}
@ -137,15 +146,19 @@ void HSoftmaxGradientOp<float, CPUContext>::RunBackwardSingle(const float* X,
int_output_offset -= dim_out;
//Softmax
if (scale_.numel() != 1) {
scale_.Resize(1);
if (!scale_.has_value()) {
scale_ = caffe2::empty({1}, at::dtype<float>().device(CPU));
}
float* scaledata = scale_.mutable_data<float>();
float* scaledata = scale_->mutable_data<float>();
if (sum_multiplier_.numel() != dim_out) {
sum_multiplier_.Resize(dim_out);
if (!sum_multiplier_.has_value()) {
sum_multiplier_ = caffe2::empty({dim_out}, at::dtype<float>().device(CPU));
math::Set<float, CPUContext>(dim_out, 1.f,
sum_multiplier_.mutable_data<float>(), &context_);
sum_multiplier_->mutable_data<float>(), &context_);
} else if (sum_multiplier_->numel() != dim_out) {
sum_multiplier_->Resize(dim_out);
math::Set<float, CPUContext>(dim_out, 1.f,
sum_multiplier_->mutable_data<float>(), &context_);
}
float* dX_softmax = dint_output + int_output_offset - dim_out;
@ -154,19 +167,19 @@ void HSoftmaxGradientOp<float, CPUContext>::RunBackwardSingle(const float* X,
math::Dot<float, CPUContext>(dim_out, X_entropy, dX_entropy, scaledata,
&context_);
math::Gemv<float, CPUContext>(CblasTrans, 1, dim_out, -1,
sum_multiplier_.data<float>(), scaledata , 1, dX_softmax, &context_);
sum_multiplier_->data<float>(), scaledata , 1, dX_softmax, &context_);
math::Mul<float, CPUContext>(dim_out, dX_softmax, X_entropy, dX_softmax,
&context_);
int_output_offset -= dim_out;
//FC
if (bias_multiplier_.numel() != 1) {
if (!bias_multiplier_.has_value()) {
// If the helper bias multiplier has not been created, reshape and fill
// it with 1
bias_multiplier_.Resize(1);
bias_multiplier_ = caffe2::empty({1}, at::dtype<float>().device(CPU));
math::Set<float, CPUContext>(1, static_cast<float>(1),
bias_multiplier_.template mutable_data<float>(), &context_);
bias_multiplier_->template mutable_data<float>(), &context_);
}
// Compute dW and add incrementally
@ -177,7 +190,7 @@ void HSoftmaxGradientOp<float, CPUContext>::RunBackwardSingle(const float* X,
// Compute dB and add incrementally
// db = db + dX_softmax*bias_multiplier_
math::Gemv<float, CPUContext>(CblasTrans, 1, dim_out, 1, dX_softmax,
bias_multiplier_.template data<float>(), 1, db, &context_);
bias_multiplier_->template data<float>(), 1, db, &context_);
// Compute dX and add incrementally
// dX = dX + W'dX_softmax
@ -265,7 +278,7 @@ bool HSoftmaxSearchOp<float, CPUContext>::pruning(
b + w_offset,
-1,
int_output_data,
bias_multiplier_.template data<float>() + sample,
bias_multiplier_->template data<float>() + sample,
w_length,
K,
int_output_offset);
@ -351,13 +364,14 @@ bool HSoftmaxSearchOp<float, CPUContext>::RunOnDevice() {
auto* Y_names = Output(0, {M, top_n_}, at::dtype<string>());
auto* Y_scores = Output(1, {M, top_n_}, at::dtype<float>());
if (bias_multiplier_.numel() != M) {
bias_multiplier_.Resize(M);
math::Set<float, CPUContext>(
M,
static_cast<float>(1),
bias_multiplier_.mutable_data<float>(),
&context_);
if (!bias_multiplier_.has_value()) {
bias_multiplier_ = caffe2::empty({M}, at::dtype<float>().device(CPU));
math::Set<float, CPUContext>(M, static_cast<float>(1),
bias_multiplier_->mutable_data<float>(), &context_);
} else if (bias_multiplier_->numel() != M) {
bias_multiplier_->Resize(M);
math::Set<float, CPUContext>(M, static_cast<float>(1),
bias_multiplier_->mutable_data<float>(), &context_);
}
for (int sample = 0; sample < M; ++sample) {

View File

@ -1,6 +1,7 @@
#ifndef CAFFE2_OPERATORS_H_SOFTMAX_OP_H_
#define CAFFE2_OPERATORS_H_SOFTMAX_OP_H_
#include <c10/util/Optional.h>
#include "caffe2/core/context.h"
#include "caffe2/core/logging.h"
#include "caffe2/core/operator.h"
@ -25,9 +26,9 @@ class HSoftmaxOpBase : public Operator<Context> {
protected:
std::unordered_map<int, PathProto> hierarchy_all_map_;
Tensor scale_{Context::GetDeviceType()};
Tensor sum_multiplier_{Context::GetDeviceType()};
Tensor bias_multiplier_{Context::GetDeviceType()};
c10::optional<Tensor> scale_;
c10::optional<Tensor> sum_multiplier_;
c10::optional<Tensor> bias_multiplier_;
static constexpr T kLOG_THRESHOLD() {
return 1e-20f;
}

View File

@ -39,10 +39,12 @@ bool InstanceNormGradientOp<T, Context>::RunOnDeviceWithOrderNHWC() {
// Resize before we get into the per-instance loop
if (InputSize() < 5) {
mean_.Resize(N, C);
ReinitializeTensor(
&mean_, {N, C}, at::dtype<T>().device(Context::GetDeviceType()));
}
if (InputSize() < 6) {
inv_stdev_.Resize(N, C);
ReinitializeTensor(
&inv_stdev_, {N, C}, at::dtype<T>().device(Context::GetDeviceType()));
}
// looping over per-instance and using Eigen blocks to extract out
@ -174,7 +176,8 @@ bool InstanceNormGradientOp<T, Context>::RunOnDeviceWithOrderNCHW() {
// Compute mean if it wasn't passed in
if (InputSize() < 5) {
mean_.Resize(N, C);
ReinitializeTensor(
&mean_, {N, C}, at::dtype<T>().device(Context::GetDeviceType()));
EigenVectorArrayMap<T> mean_mutable_arr(
mean_.template mutable_data<T>(), N * C);
mean_mutable_arr = input_mat.colwise().mean();
@ -189,7 +192,8 @@ bool InstanceNormGradientOp<T, Context>::RunOnDeviceWithOrderNCHW() {
// compute 1 / stdev if not passed in
if (InputSize() < 6) {
inv_stdev_.Resize(N, C);
ReinitializeTensor(
&inv_stdev_, {N, C}, at::dtype<T>().device(Context::GetDeviceType()));
EigenVectorArrayMap<T> inv_stdev_mutable_arr(
inv_stdev_.template mutable_data<T>(), N * C);

View File

@ -378,7 +378,7 @@ bool InstanceNormGradientOp<float, CUDAContext>::RunOnDeviceWithOrderNHWC() {
const auto dim_stride = C;
if (InputSize() < 5) {
mean_.Resize(N, C);
ReinitializeTensor(&mean_, {N, C}, at::dtype<float>().device(CUDA));
auto mean_mutable_data = mean_.mutable_data<float>();
InstanceNormMeanKernel<<<
CAFFE_GET_BLOCKS(N * C),
@ -401,7 +401,7 @@ bool InstanceNormGradientOp<float, CUDAContext>::RunOnDeviceWithOrderNHWC() {
const auto mean_data = mean.data<float>();
if (InputSize() < 6) {
inv_stdev_.Resize(N, C);
ReinitializeTensor(&inv_stdev_, {N, C}, at::dtype<float>().device(CUDA));
auto inv_stdev_mutable_data = inv_stdev_.mutable_data<float>();
InstanceNormInvStdevKernel<<<
CAFFE_GET_BLOCKS(N * C),

View File

@ -81,8 +81,8 @@ class InstanceNormGradientOp : public Operator<Context> {
// temp results that could get passed through to this gradient, but if not,
// are stored here
Tensor mean_{Context::GetDeviceType()};
Tensor inv_stdev_{Context::GetDeviceType()};
Tensor mean_;
Tensor inv_stdev_;
INPUT_TAGS(INPUT, SCALE, BIAS, OUTPUT_GRAD, MEAN, INV_STDEV);
OUTPUT_TAGS(INPUT_GRAD, SCALE_GRAD, BIAS_GRAD);

View File

@ -175,7 +175,7 @@ bool IntegralImageGradientOp<float, CUDAContext>::RunOnDevice() {
// Col pass reduces shape to (N, C, H, W)
vector<int64_t> row_pass_shape(dY.sizes().vec());
row_pass_shape[3] -= 1;
row_pass_buffer_.Resize(row_pass_shape);
ReinitializeTensor(&row_pass_buffer_, row_pass_shape, at::dtype<float>().device(CUDA));
const int chans = row_pass_buffer_.dim32(1);
const int rows_out = row_pass_buffer_.dim32(2);
const int cols_out = row_pass_buffer_.dim32(3);

View File

@ -28,7 +28,7 @@ class IntegralImageGradientOp final : public Operator<Context> {
bool RunOnDevice() override;
protected:
Tensor row_pass_buffer_{Context::GetDeviceType()};
Tensor row_pass_buffer_;
};
} // namespace caffe2

View File

@ -132,11 +132,11 @@ class LayerNormGradientOp final : public Operator<Context> {
const int N = X.size_from_dim(canonical_axis);
auto* dX = Output(0, X.sizes(), at::dtype<T>());
ds_.Resize(M);
db_.Resize(M);
dY_scale_.Resize(M);
X_scale_.Resize(M);
bias_.Resize(M);
ReinitializeTensor(&ds_, {M}, at::dtype<T>().device(Context::GetDeviceType()));
ReinitializeTensor(&db_, {M}, at::dtype<T>().device(Context::GetDeviceType()));
ReinitializeTensor(&dY_scale_, {M}, at::dtype<T>().device(Context::GetDeviceType()));
ReinitializeTensor(&X_scale_, {M}, at::dtype<T>().device(Context::GetDeviceType()));
ReinitializeTensor(&bias_, {M}, at::dtype<T>().device(Context::GetDeviceType()));
const T* dY_data = dY.template data<T>();
const T* X_data = X.template data<T>();
const T* mean_data = mean.template data<T>();
@ -200,11 +200,11 @@ class LayerNormGradientOp final : public Operator<Context> {
const int axis_;
Tensor ds_{Context::GetDeviceType()};
Tensor db_{Context::GetDeviceType()};
Tensor dY_scale_{Context::GetDeviceType()};
Tensor X_scale_{Context::GetDeviceType()};
Tensor bias_{Context::GetDeviceType()};
Tensor ds_;
Tensor db_;
Tensor dY_scale_;
Tensor X_scale_;
Tensor bias_;
};
} // namespace caffe2

View File

@ -22,7 +22,7 @@ template <>
bool LengthsTileOp<CUDAContext>::RunOnDevice() {
auto& data = Input(DATA);
auto& lengths = Input(LENGTHS);
CAFFE_ENFORCE_EQ(lengths.ndim(), 1, "LENGTHS must be 1-D");
CAFFE_ENFORCE_GE(data.ndim(), 1, "DATA should be at least 1-D");
@ -45,8 +45,8 @@ bool LengthsTileOp<CUDAContext>::RunOnDevice() {
auto numElements = total_length * numElementsPerRow;
auto numBlocks = CAFFE_GET_BLOCKS(numElements);
rowMappingHost_.Resize(total_length);
rowMappingDevice_.Resize(total_length);
ReinitializeTensor(&rowMappingHost_, {total_length}, at::dtype<int32_t>().device(CPU));
ReinitializeTensor(&rowMappingDevice_, {total_length}, at::dtype<int32_t>().device(CPU));
auto* rowOffsets = rowMappingHost_.mutable_data<int32_t>();
int32_t outputRow = 0;
for (int64_t i = 0; i < lengths_size; i++) {

View File

@ -20,8 +20,8 @@ class LengthsTileOp : public Operator<Context> {
private:
Tensor lengths_host_{CPU};
Tensor rowMappingHost_{CPU};
Tensor rowMappingDevice_{Context::GetDeviceType()};
Tensor rowMappingHost_;
Tensor rowMappingDevice_;
};
} // namespace caffe2

View File

@ -52,7 +52,10 @@ void LambdaRankNdcgOp<float, CPUContext>::ResizeInvLogITensor(int size) {
new_size <<= 1;
}
if (new_size != old_size) {
inv_log_i_.Resize(new_size);
ReinitializeTensor(
&inv_log_i_,
{new_size},
at::dtype<float>().device(CPU));
auto* data = inv_log_i_.template mutable_data<float>();
EigenVectorArrayMap<float> vec(data, inv_log_i_.numel());
const float log2f_ = std::log(2.f);
@ -64,7 +67,8 @@ void LambdaRankNdcgOp<float, CPUContext>::ResizeInvLogITensor(int size) {
template <>
void LambdaRankNdcgOp<float, CPUContext>::ComputeDiscounts(int* idx, int N) {
discount_.Resize(N);
ReinitializeTensor(
&discount_, {N}, at::dtype<float>().device(CPU));
auto* discount_data = discount_.template mutable_data<float>();
auto* inv_log_i_data = inv_log_i_.template mutable_data<float>();
for (int i = 0; i < N; i++) {
@ -94,8 +98,10 @@ float LambdaRankNdcgOp<float, CPUContext>::LambdaRankNdcgSession(
return 0;
}
ideal_idx_.Resize(N);
rank_idx_.Resize(N);
ReinitializeTensor(
&ideal_idx_, {N}, at::dtype<int>().device(CPU));
ReinitializeTensor(
&rank_idx_, {N}, at::dtype<int>().device(CPU));
auto* rank_idx_data = rank_idx_.template mutable_data<int>();
auto* ideal_idx_data = ideal_idx_.template mutable_data<int>();
@ -114,7 +120,8 @@ float LambdaRankNdcgOp<float, CPUContext>::LambdaRankNdcgSession(
}
const double log2f_ = std::log(2.f);
gain_.Resize(N);
ReinitializeTensor(
&gain_, {N}, at::dtype<float>().device(CPU));
auto* gain_data = gain_.template mutable_data<float>();
EigenVectorArrayMap<float> gain_vec(gain_data, gain_.numel());
@ -141,7 +148,8 @@ float LambdaRankNdcgOp<float, CPUContext>::LambdaRankNdcgSession(
// similar to ideal but replace with actual discounts
double dcg = (gain_vec * discount_vec).sum();
lambda_.Resize(N * N);
ReinitializeTensor(
&lambda_, {N * N}, at::dtype<float>().device(CPU));
auto* lambda_data = lambda_.template mutable_data<float>();
EigenArrayMap<float> lambda_mat(lambda_data, N, N);
// computes lambda weight (i, j) = abs(gain_dff * discount_diff)

View File

@ -35,12 +35,12 @@ class LambdaRankNdcgOp final : public Operator<Context> {
Tensor** dy);
bool use_ndcg_as_loss_;
bool use_exp_gain_;
Tensor gain_{Context::GetDeviceType()};
Tensor discount_{Context::GetDeviceType()};
Tensor rank_idx_{Context::GetDeviceType()};
Tensor ideal_idx_{Context::GetDeviceType()};
Tensor lambda_{Context::GetDeviceType()};
Tensor inv_log_i_{Context::GetDeviceType()};
Tensor gain_;
Tensor discount_;
Tensor rank_idx_;
Tensor ideal_idx_;
Tensor lambda_;
Tensor inv_log_i_;
};
template <typename T, class Context>

View File

@ -25,8 +25,14 @@ bool PercentileOp<CPUContext>::RunOnDevice() {
num_values,
"Sum of lengths should be equal to the total number of samples");
values_tensor.Resize(num_values);
percentiles_tensor.Resize(num_values);
ReinitializeTensor(
&values_tensor,
{num_values},
at::dtype<float>().device(CPU));
ReinitializeTensor(
&percentiles_tensor,
{num_values},
at::dtype<float>().device(CPU));
float* values_tensor_data = values_tensor.template mutable_data<float>();
float* percentiles_tensor_data =
percentiles_tensor.template mutable_data<float>();

View File

@ -25,8 +25,8 @@ class PercentileOp final : public Operator<Context> {
protected:
INPUT_TAGS(X, VAL_PCT_PAIRS, LENS);
OUTPUT_TAGS(PCT);
Tensor values_tensor{Context::GetDeviceType()};
Tensor percentiles_tensor{Context::GetDeviceType()};
Tensor values_tensor;
Tensor percentiles_tensor;
};
} // namespace caffe2

View File

@ -55,7 +55,7 @@ class Int8ConcatOp final : public Operator<CPUContext> {
}
Y_dims[axis_] += Xi.t.size(axis_);
}
Y->t.Resize(Y_dims);
ReinitializeTensor(&Y->t, Y_dims, at::dtype<uint8_t>().device(CPU));
int before = X0.t.size_to_dim(axis_);
int after = X0.t.size_from_dim(axis_ + 1);
const auto C_total = Y_dims[axis_];

View File

@ -43,7 +43,7 @@ class Int8FCOp final : public Operator<CPUContext> {
CHECK_EQ(K, W.t.size(1));
CHECK_EQ(N, B.t.numel());
const auto M = X.t.numel() / K;
Y->t.Resize(M, N);
ReinitializeTensor(&Y->t, {M, N}, at::dtype<uint8_t>().device(CPU));
runWithSharedBuffer<CPUContext>(ws_, [&](Tensor* buffer) {
initQNNPACK();