mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
This PR removes unnecessary "static" for C++ functions and variables in anonymous namespace as detected by clang-tidy. This enhances code readability. The related rules are planed to be enabled in follow-up PRs. Pull Request resolved: https://github.com/pytorch/pytorch/pull/165035 Approved by: https://github.com/Skylion007
274 lines
9.5 KiB
C++
274 lines
9.5 KiB
C++
#include <ATen/functorch/DynamicLayer.h>
|
|
#include <torch/library.h>
|
|
#include <ATen/ATen.h>
|
|
#include <ATen/WrapDimUtils.h>
|
|
#include <ATen/functorch/TensorWrapper.h>
|
|
#include <ATen/functorch/BatchedTensorImpl.h>
|
|
#include <ATen/Dispatch.h>
|
|
#include <c10/util/irange.h>
|
|
#include <c10/util/Exception.h>
|
|
#include <ATen/NamedTensorUtils.h>
|
|
#include <ATen/native/LinearAlgebraUtils.h>
|
|
#include <ATen/native/xnnpack/Engine.h>
|
|
|
|
namespace at::functorch {
|
|
|
|
// NOTE: [functorch's PyTorch Operator Hacks]
|
|
//
|
|
// This file contains hacks for composite PyTorch operators that are problematic.
|
|
// For example, the composite op might have in-place operations,
|
|
// or call data_ptr. We have some idea of how to fix these things in the long term
|
|
// e.g., upstream the changes to PyTorch.
|
|
//
|
|
// TODO: all of these should be fixed in a more blessed way. In particular,
|
|
// it is bad if any of these go out-of-sync with the implementations in
|
|
// pytorch/pytorch.
|
|
|
|
// TODO: upstream into core
|
|
|
|
namespace {
|
|
Tensor index_select_backward_hack(const Tensor& grad, IntArrayRef self_sizes, int64_t dim, const Tensor& index) {
|
|
return at::zeros(self_sizes, grad.options()).index_add(dim, index, grad);
|
|
}
|
|
|
|
// TODO: linear is pretty important for performance, but I'm not sure how to work
|
|
// around the in-place.
|
|
Tensor linear_hack(const Tensor& input, const Tensor& weight, const std::optional<Tensor>& bias_opt) {
|
|
// See [Note: hacky wrapper removal for optional tensor]
|
|
auto bias = bias_opt.has_value()
|
|
? c10::MaybeOwned<Tensor>::borrowed(*bias_opt)
|
|
: c10::MaybeOwned<Tensor>::owned(std::in_place);
|
|
|
|
if (input.is_mkldnn()) {
|
|
return at::mkldnn_linear(input, weight, *bias);
|
|
}
|
|
#if defined(C10_MOBILE)
|
|
if (at::native::xnnpack::use_linear(input, weight, *bias)) {
|
|
return at::native::xnnpack::linear(input, weight, *bias);
|
|
}
|
|
#endif
|
|
if (input.dim() == 2 && bias->defined()) {
|
|
// Fused op is marginally faster.
|
|
return at::addmm(*bias, input, weight.t());
|
|
}
|
|
if (input.dim() == 3 && bias->defined() && input.is_contiguous()) {
|
|
// Also hit the fused path for contiguous 3D input.
|
|
const auto input_sizes = input.sizes();
|
|
const auto result = at::addmm(*bias, input.view({input_sizes[0] * input_sizes[1], input_sizes[2]}), weight.t());
|
|
return result.view({input_sizes[0], input_sizes[1], result.size(1)});
|
|
}
|
|
auto output = at::matmul(input, weight.t());
|
|
if (bias->defined()) {
|
|
const auto& stack = getDynamicLayerStack();
|
|
bool any_vmap_layers = std::any_of(
|
|
stack.begin(), stack.end(),
|
|
[](const DynamicLayer& dl){ return dl.key() == TransformType::Vmap; });
|
|
if (any_vmap_layers) {
|
|
return output.add(*bias);
|
|
}
|
|
return output.add_(*bias);
|
|
}
|
|
return output;
|
|
}
|
|
|
|
inline at::Tensor apply_loss_reduction(const at::Tensor& unreduced, int64_t reduction) {
|
|
if (reduction == at::Reduction::Mean) {
|
|
return unreduced.mean();
|
|
} else if (reduction == at::Reduction::Sum) {
|
|
return unreduced.sum();
|
|
}
|
|
return unreduced;
|
|
}
|
|
|
|
Tensor binary_cross_entropy_with_logits_hack(
|
|
const Tensor& input,
|
|
const Tensor& target,
|
|
const std::optional<Tensor>& weight_opt,
|
|
const std::optional<Tensor>& pos_weight_opt,
|
|
int64_t reduction) {
|
|
// See [Note: hacky wrapper removal for optional tensor]
|
|
c10::MaybeOwned<Tensor> weight_maybe_owned = at::borrow_from_optional_tensor(weight_opt);
|
|
const Tensor& weight = *weight_maybe_owned;
|
|
const Tensor& pos_weight = pos_weight_opt.value_or(Tensor());
|
|
|
|
Tensor loss;
|
|
auto max_val = (-input).clamp_min(0);
|
|
if (pos_weight.defined()) {
|
|
// pos_weight need to be broadcasted, thus mul(target) is not inplace.
|
|
auto log_weight = (pos_weight - 1).mul(target).add_(1);
|
|
loss = (1 - target).mul(input).add(log_weight.mul(((-max_val).exp_().add((-input - max_val).exp_())).log_().add_(max_val)));
|
|
} else {
|
|
loss = (1 - target).mul(input).add_(max_val).add_((-max_val).exp_().add((-input -max_val).exp_()).log_());
|
|
}
|
|
|
|
if (weight.defined()) {
|
|
loss = loss * weight;
|
|
}
|
|
|
|
return apply_loss_reduction(loss, reduction);
|
|
}
|
|
|
|
Tensor trace_backward_decomp(const Tensor& grad, IntArrayRef sizes) {
|
|
TORCH_CHECK(sizes.size() == 2, "expected matrix input");
|
|
auto grad_input = at::zeros(sizes[0] * sizes[1], grad.options());
|
|
auto indices = at::arange(0, grad_input.numel(), sizes[1] + 1, grad.options().dtype(at::kLong));
|
|
// Workaround using index_put instead of yet unsupported index_fill_
|
|
grad_input = grad_input.index_put({indices}, grad);
|
|
return grad_input.view(sizes);
|
|
}
|
|
}
|
|
|
|
// dropout hack
|
|
// TODO: make the following changes in pytorch/pytorch
|
|
namespace dropout_hack {
|
|
|
|
namespace {
|
|
|
|
template<bool inplace>
|
|
using Ctype = std::conditional_t<inplace, Tensor&, Tensor>;
|
|
|
|
Tensor make_feature_noise(const Tensor& input) {
|
|
auto input_sizes = input.sizes();
|
|
TORCH_CHECK(input.dim() >= 2, "Feature dropout requires at least 2 dimensions in the input");
|
|
std::vector<int64_t> sizes;
|
|
sizes.reserve(input.dim());
|
|
sizes.push_back(input_sizes[0]);
|
|
sizes.push_back(input_sizes[1]);
|
|
for ([[maybe_unused]] const auto i : c10::irange(2, input.dim())) {
|
|
sizes.push_back(1);
|
|
}
|
|
// NB: THIS WAS CHANGED FROM THE ORIGINAL
|
|
return at::empty(sizes, input.options());
|
|
}
|
|
|
|
bool is_fused_kernel_acceptable(const Tensor& input, double p) {
|
|
return (input.is_cuda() || input.is_xpu() || input.is_lazy() || input.is_privateuseone()) && p > 0 && p < 1 && input.numel() > 0;
|
|
}
|
|
|
|
// NB: sure, we could have used different overloads here, but I would feel insecure
|
|
// knowing that this dispatch depends only on the constness of the references
|
|
template<bool inplace>
|
|
Tensor& multiply(Tensor& input, const Tensor& noise) {
|
|
static_assert(inplace, "Wrong multiply overload triggered in Dropout.cpp");
|
|
return input.mul_(noise);
|
|
}
|
|
|
|
template<bool inplace>
|
|
Tensor multiply(const Tensor& input, const Tensor& noise) {
|
|
static_assert(!inplace, "Wrong multiply overload triggered in Dropout.cpp");
|
|
return input.mul(noise);
|
|
}
|
|
|
|
template<bool feature_dropout, bool alpha_dropout, bool inplace, typename T>
|
|
Ctype<inplace> _dropout_impl(T& input, double p, bool train) {
|
|
TORCH_CHECK(p >= 0 && p <= 1, "dropout probability has to be between 0 and 1, but got ", p);
|
|
if (p == 0 || !train || input.numel() == 0) {
|
|
return input;
|
|
}
|
|
|
|
if (p == 1) {
|
|
return multiply<inplace>(input, at::zeros({}, input.options()));
|
|
}
|
|
|
|
at::Tensor b; // used for alpha_dropout only
|
|
|
|
// NB: THIS WAS CHANGED FROM THE ORIGINAL
|
|
Tensor noise;
|
|
if (feature_dropout) {
|
|
auto empty = make_feature_noise(input);
|
|
noise = at::bernoulli(empty, 1 - p);
|
|
} else {
|
|
// NB: it is important that this is at::empty and not at::empty_like
|
|
auto empty = at::empty({}, input.options()).expand(input.sizes());
|
|
noise = at::bernoulli(empty, 1 - p);
|
|
}
|
|
|
|
if (alpha_dropout) {
|
|
constexpr double alpha = 1.7580993408473766;
|
|
double a = 1. / std::sqrt((alpha * alpha * p + 1) * (1 - p));
|
|
b = noise.add(-1).mul_(alpha * a).add_(alpha * a * p);
|
|
noise.mul_(a);
|
|
} else {
|
|
noise.div_(1 - p);
|
|
}
|
|
|
|
if (!alpha_dropout) {
|
|
return multiply<inplace>(input, noise);
|
|
} else {
|
|
return multiply<inplace>(input, noise).add_(b);
|
|
}
|
|
}
|
|
|
|
#define ALIAS_SPECIALIZATION(ALIAS_NAME, IS_FEATURE, IS_ALPHA) \
|
|
template <bool inplace, typename... Args> \
|
|
Ctype<inplace> ALIAS_NAME(Args&&... args) { \
|
|
return _dropout_impl<IS_FEATURE, IS_ALPHA, inplace>(std::forward<Args>(args)...); \
|
|
}
|
|
|
|
ALIAS_SPECIALIZATION(_dropout, false, false)
|
|
ALIAS_SPECIALIZATION(_feature_dropout, true, false)
|
|
ALIAS_SPECIALIZATION(_alpha_dropout, false, true )
|
|
ALIAS_SPECIALIZATION(_feature_alpha_dropout, true, true )
|
|
|
|
Tensor dropout(const Tensor& input, double p, bool train) {
|
|
auto result = [&]() {
|
|
NoNamesGuard guard;
|
|
if (train && is_fused_kernel_acceptable(input, p)) {
|
|
return std::get<0>(at::native_dropout(input, p, train));
|
|
}
|
|
return _dropout<false>(input, p, train);
|
|
}();
|
|
namedinference::propagate_names(result, input);
|
|
return result;
|
|
}
|
|
|
|
Tensor& dropout_(Tensor& input, double p, bool train) {
|
|
return _dropout<true>(input, p, train);
|
|
}
|
|
|
|
Tensor feature_dropout(const Tensor& input, double p, bool train) {
|
|
return _feature_dropout<false>(input, p, train);
|
|
}
|
|
|
|
Tensor& feature_dropout_(Tensor& input, double p, bool train) {
|
|
return _feature_dropout<true>(input, p, train);
|
|
}
|
|
|
|
Tensor alpha_dropout(const Tensor& input, double p, bool train) {
|
|
return _alpha_dropout<false>(input, p, train);
|
|
}
|
|
|
|
Tensor& alpha_dropout_(Tensor& input, double p, bool train) {
|
|
return _alpha_dropout<true>(input, p, train);
|
|
}
|
|
|
|
Tensor feature_alpha_dropout(const Tensor& input, double p, bool train) {
|
|
return _feature_alpha_dropout<false>(input, p, train);
|
|
}
|
|
|
|
Tensor& feature_alpha_dropout_(Tensor& input, double p, bool train) {
|
|
return _feature_alpha_dropout<true>(input, p, train);
|
|
}
|
|
|
|
}
|
|
} // dropout_hack
|
|
|
|
TORCH_LIBRARY_IMPL(aten, FuncTorchDynamicLayerFrontMode, m) {
|
|
m.impl("index_select_backward", index_select_backward_hack);
|
|
m.impl("linear", linear_hack);
|
|
m.impl("binary_cross_entropy_with_logits", binary_cross_entropy_with_logits_hack);
|
|
m.impl("trace_backward", trace_backward_decomp);
|
|
|
|
m.impl("dropout", dropout_hack::dropout);
|
|
m.impl("feature_dropout", dropout_hack::feature_dropout);
|
|
m.impl("alpha_dropout", dropout_hack::alpha_dropout);
|
|
m.impl("feature_alpha_dropout", dropout_hack::feature_alpha_dropout);
|
|
|
|
m.impl("dropout_", dropout_hack::dropout_);
|
|
m.impl("feature_dropout_", dropout_hack::feature_dropout_);
|
|
m.impl("alpha_dropout_", dropout_hack::alpha_dropout_);
|
|
m.impl("feature_alpha_dropout_", dropout_hack::feature_alpha_dropout_);
|
|
}
|
|
|
|
} // namespace at::functorch
|