mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
100 lines
3.4 KiB
C++
100 lines
3.4 KiB
C++
#include "caffe2/operators/softmax_op.h"
|
|
#include "caffe2/operators/softmax_shared.h"
|
|
|
|
namespace caffe2 {
|
|
|
|
// Implementation for the CPU context.
|
|
template <>
|
|
bool SoftmaxOp<float, CPUContext>::RunOnDevice() {
|
|
auto& X = Input(0);
|
|
auto* Y = Output(0);
|
|
const auto canonical_axis = X.canonical_axis_index(axis_);
|
|
const int N = X.size_to_dim(canonical_axis);
|
|
const int D = X.size_from_dim(canonical_axis);
|
|
Y->ResizeLike(X);
|
|
float* Ydata = Y->mutable_data<float>();
|
|
// First, get scales
|
|
if (scale_.size() != N) {
|
|
scale_.Resize(N);
|
|
}
|
|
if (sum_multiplier_.size() != D) {
|
|
sum_multiplier_.Resize(D);
|
|
math::Set<float, CPUContext>(D, 1.f, sum_multiplier_.mutable_data<float>(),
|
|
&context_);
|
|
}
|
|
|
|
SoftmaxCPU(context_, N, D, X, Ydata, scale_, sum_multiplier_);
|
|
return true;
|
|
}
|
|
|
|
// Implementation for the CPU context.
|
|
template <>
|
|
bool SoftmaxGradientOp<float, CPUContext>::RunOnDevice() {
|
|
auto& Y = Input(0);
|
|
auto& dY = Input(1);
|
|
auto* dX = Output(0);
|
|
const auto canonical_axis = Y.canonical_axis_index(axis_);
|
|
const int N = Y.size_to_dim(canonical_axis);
|
|
const int D = Y.size_from_dim(canonical_axis);
|
|
// First, get scales
|
|
if (scale_.size() != N) {
|
|
scale_.Resize(N);
|
|
}
|
|
if (sum_multiplier_.size() != D) {
|
|
sum_multiplier_.Resize(D);
|
|
math::Set<float, CPUContext>(D, 1.f, sum_multiplier_.mutable_data<float>(),
|
|
&context_);
|
|
}
|
|
dX->ResizeLike(Y);
|
|
const float* Ydata = Y.data<float>();
|
|
const float* dYdata = dY.data<float>();
|
|
float* dXdata = dX->mutable_data<float>();
|
|
context_.Copy<float, CPUContext, CPUContext>(Y.size(), dYdata, dXdata);
|
|
float* scaledata = scale_.mutable_data<float>();
|
|
for (int i = 0; i < N; ++i) {
|
|
math::Dot<float, CPUContext>(D, Ydata + i * D, dYdata + i * D,
|
|
scaledata + i, &context_);
|
|
}
|
|
math::Gemm<float, CPUContext>(CblasNoTrans, CblasNoTrans, N, D, 1, -1,
|
|
scaledata, sum_multiplier_.data<float>(), 1,
|
|
dXdata, &context_);
|
|
math::Mul<float, CPUContext>(Y.size(), dXdata, Ydata, dXdata,
|
|
&context_);
|
|
return true;
|
|
}
|
|
|
|
namespace {
|
|
REGISTER_CPU_OPERATOR(Softmax, SoftmaxOp<float, CPUContext>);
|
|
REGISTER_CPU_OPERATOR(SoftmaxGradient, SoftmaxGradientOp<float, CPUContext>);
|
|
|
|
OPERATOR_SCHEMA(Softmax)
|
|
.NumInputs(1)
|
|
.NumOutputs(1)
|
|
.SetDoc(R"DOC(
|
|
The operator computes the softmax normalized values for each layer in the batch
|
|
of the given input. The input is a 2-D tensor (Tensor<float>) of size
|
|
(batch_size x input_feature_dimensions). The output tensor has the same shape
|
|
and contains the softmax normalized values of the corresponding input.
|
|
)DOC")
|
|
.Input(0, "input", "The input data as 2-D Tensor<float>.")
|
|
.Output(0, "output", "The softmax normalized output values with the same "
|
|
"shape as input tensor.");
|
|
|
|
// Input: Y, dY. Output: dX
|
|
OPERATOR_SCHEMA(SoftmaxGradient).NumInputs(2).NumOutputs(1);
|
|
|
|
class GetSoftmaxGradient : public GradientMakerBase {
|
|
using GradientMakerBase::GradientMakerBase;
|
|
vector<OperatorDef> GetGradientDefs() override {
|
|
return SingleGradientDef(
|
|
def_.type() + "Gradient", "",
|
|
vector<string>{O(0), GO(0)},
|
|
vector<string>{GI(0)});
|
|
}
|
|
};
|
|
REGISTER_GRADIENT(Softmax, GetSoftmaxGradient);
|
|
REGISTER_GRADIENT(SoftmaxFp16, GetSoftmaxGradient);
|
|
|
|
} // namespace
|
|
} // namespace caffe2
|