Compare commits

..

6 Commits

Author SHA1 Message Date
6fda83926a Update
[ghstack-poisoned]
2025-11-04 12:18:37 -06:00
8e526b7753 Update (base update)
[ghstack-poisoned]
2025-11-04 12:18:37 -06:00
5471cd4ddb Update
[ghstack-poisoned]
2025-11-03 23:31:37 -06:00
cb88b96453 Update (base update)
[ghstack-poisoned]
2025-11-03 23:31:37 -06:00
9b0f7cfbed Update
[ghstack-poisoned]
2025-11-03 16:35:10 -06:00
a351071544 Update (base update)
[ghstack-poisoned]
2025-11-03 16:35:10 -06:00
12 changed files with 299 additions and 288 deletions

View File

@ -1,11 +1,15 @@
sphinx==7.2.6
sphinx==5.3.0
#Description: This is used to generate PyTorch docs
#Pinned versions: 7.2.6
#Pinned versions: 5.3.0
pytorch_sphinx_theme2==0.2.0
#Description: This is needed to generate PyTorch docs
#Pinned versions: 0.2.0
standard-imghdr==3.13.0; python_version >= "3.13"
#Description: This is needed by Sphinx, so it needs to be added here.
# The reasons are as follows:
# 1) This module has been removed from the Python standard library since Python 3.13(https://peps.python.org/pep-0594/#imghdr);
# 2) The current version of Sphinx (5.3.0) is not compatible with Python 3.13.
# Once Sphinx is upgraded to a version compatible with Python 3.13 or later, we can remove this dependency.
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@71e55749be14ceb56e7f8211a9fb649866b87ad4#egg=pytorch_sphinx_theme2
# TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering
# but it doesn't seem to work and hangs around idly. The initial thought that it is probably
# something related to Docker setup. We can investigate this later.
@ -32,17 +36,17 @@ tensorboard==2.18.0 ; python_version >= "3.13"
#Description: This is used to generate PyTorch docs
#Pinned versions: 2.13.0
breathe==4.36.0
breathe==4.34.0
#Description: This is used to generate PyTorch C++ docs
#Pinned versions: 4.36.0
#Pinned versions: 4.34.0
exhale==0.3.7
exhale==0.2.3
#Description: This is used to generate PyTorch C++ docs
#Pinned versions: 0.3.7
#Pinned versions: 0.2.3
docutils==0.20
docutils==0.16
#Description: This is used to generate PyTorch C++ docs
#Pinned versions: 0.20
#Pinned versions: 0.16
bs4==0.0.1
#Description: This is used to generate PyTorch C++ docs
@ -52,13 +56,13 @@ IPython==8.12.0
#Description: This is used to generate PyTorch functorch docs
#Pinned versions: 8.12.0
myst-nb==1.3.0
myst-nb==0.17.2
#Description: This is used to generate PyTorch functorch and torch.compile docs.
#Pinned versions: 1.3.0
#Pinned versions: 0.17.2
# The following are required to build torch.distributed.elastic.rendezvous.etcd* docs
python-etcd==0.4.5
sphinx-copybutton==0.5.0
sphinx-design==0.6.1
sphinx-design==0.4.0
sphinxcontrib-mermaid==1.0.0
myst-parser==4.0.1
myst-parser==0.18.1

View File

@ -89,41 +89,23 @@ if [ "$is_main_doc" = true ]; then
make coverage
# Now we have the coverage report, we need to make sure it is empty.
# Sphinx 7.2.6+ format: python.txt contains a statistics table with a TOTAL row
# showing the undocumented count in the third column.
# Example: | TOTAL | 99.83% | 2 |
# Count the number of lines in the file and turn that number into a variable
# $lines. The `cut -f1 ...` is to only parse the number, not the filename
# Skip the report header by subtracting 2: the header will be output even if
# there are no undocumented items.
#
# Also: see docs/source/conf.py for "coverage_ignore*" items, which should
# be documented then removed from there.
# Extract undocumented count from TOTAL row in Sphinx 7.2.6 statistics table
# The table format is: | Module | Coverage | Undocumented |
# Extract the third column (undocumented count) from the TOTAL row
undocumented=$(grep "| TOTAL" build/coverage/python.txt | awk -F'|' '{print $4}' | tr -d ' ')
if [ -z "$undocumented" ] || ! [[ "$undocumented" =~ ^[0-9]+$ ]]; then
lines=$(wc -l build/coverage/python.txt 2>/dev/null |cut -f1 -d' ')
undocumented=$((lines - 2))
if [ $undocumented -lt 0 ]; then
echo coverage output not found
exit 1
elif [ "$undocumented" -gt 0 ]; then
set +x # Disable command echoing for cleaner output
echo ""
echo "====================="
echo "UNDOCUMENTED OBJECTS:"
echo "====================="
echo ""
# Find the line number of the TOTAL row and print only what comes after it
total_line=$(grep -n "| TOTAL" build/coverage/python.txt | cut -d: -f1)
if [ -n "$total_line" ]; then
# Print only the detailed list (skip the statistics table)
tail -n +$((total_line + 2)) build/coverage/python.txt
else
# Fallback to showing entire file if TOTAL line not found
cat build/coverage/python.txt
fi
echo ""
elif [ $undocumented -gt 0 ]; then
echo undocumented objects found:
cat build/coverage/python.txt
echo "Make sure you've updated relevant .rsts in docs/source!"
echo "You can reproduce locally by running 'cd docs && make coverage && tail -n +\$((grep -n \"| TOTAL\" build/coverage/python.txt | cut -d: -f1) + 2)) build/coverage/python.txt'"
set -x # Re-enable command echoing
echo "You can reproduce locally by running 'cd docs && make coverage && cat build/coverage/python.txt'"
exit 1
fi
else

View File

@ -1,7 +1,7 @@
# Security Policy
- [**Reporting a Vulnerability**](#reporting-a-vulnerability)
- [**Using PyTorch Securely**](#using-pytorch-securely)
- [**Using Pytorch Securely**](#using-pytorch-securely)
- [Untrusted models](#untrusted-models)
- [TorchScript models](#torchscript-models)
- [Untrusted inputs](#untrusted-inputs)
@ -10,28 +10,28 @@
- [**CI/CD security principles**](#cicd-security-principles)
## Reporting Security Issues
Beware that none of the topics under [Using PyTorch Securely](#using-pytorch-securely) are considered vulnerabilities of PyTorch.
Beware that none of the topics under [Using Pytorch Securely](#using-pytorch-securely) are considered vulnerabilities of Pytorch.
However, if you believe you have found a security vulnerability in PyTorch, we encourage you to let us know right away. We will investigate all legitimate reports and do our best to quickly fix the problem.
Please report security issues using https://github.com/pytorch/pytorch/security/advisories/new
All reports submitted through the security advisories mechanism would **either be made public or dismissed by the team within 90 days of the submission**. If advisory has been closed on the grounds that it is not a security issue, please do not hesitate to create an [new issue](https://github.com/pytorch/pytorch/issues/new?template=bug-report.yml) as it is still likely a valid issue within the framework.
All reports submitted thru the security advisories mechanism would **either be made public or dismissed by the team within 90 days of the submission**. If advisory has been closed on the grounds that it is not a security issue, please do not hesitate to create an [new issue](https://github.com/pytorch/pytorch/issues/new?template=bug-report.yml) as it is still likely a valid issue within the framework.
Please refer to the following page for our responsible disclosure policy, reward guidelines, and those things that should not be reported:
https://www.facebook.com/whitehat
## Using PyTorch Securely
**PyTorch models are programs**, so treat its security seriously -- running untrusted models is equivalent to running untrusted code. In general we recommend that model weights and the python code for the model are distributed independently. That said, be careful about where you get the python code from and who wrote it (preferentially check for a provenance or checksums, do not run any pip installed package).
## Using Pytorch Securely
**Pytorch models are programs**, so treat its security seriously -- running untrusted models is equivalent to running untrusted code. In general we recommend that model weights and the python code for the model are distributed independently. That said, be careful about where you get the python code from and who wrote it (preferentially check for a provenance or checksums, do not run any pip installed package).
### Untrusted models
Be careful when running untrusted models. This classification includes models created by unknown developers or utilizing data obtained from unknown sources[^data-poisoning-sources].
**Prefer to execute untrusted models within a secure, isolated environment such as a sandbox** (e.g., containers, virtual machines). This helps protect your system from potentially malicious code. You can find further details and instructions in [this page](https://developers.google.com/code-sandboxing).
**Be mindful of risky model formats**. Give preference to share and load weights with the appropriate format for your use case. [Safetensors](https://huggingface.co/docs/safetensors/en/index) gives the most safety but is the most restricted in what it supports. [`torch.load`](https://pytorch.org/docs/stable/generated/torch.load.html#torch.load) has a significantly larger surface of attack but is more flexible in what it can serialize. See the documentation for more details.
**Be mindful of risky model formats**. Give preference to share and load weights with the appropriate format for your use case. [safetensors](https://huggingface.co/docs/safetensors/en/index) gives the most safety but is the most restricted in what it supports. [`torch.load`](https://pytorch.org/docs/stable/generated/torch.load.html#torch.load) has a significantly larger surface of attack but is more flexible in what it can serialize. See the documentation for more details.
Even for more secure serialization formats, unexpected inputs to the downstream system can cause diverse security threats (e.g. denial of service, out of bound reads/writes) and thus we recommend extensive validation of any untrusted inputs.
@ -43,7 +43,7 @@ Important Note: The trustworthiness of a model is not binary. You must always de
### TorchScript models
TorchScript models should be treated the same way as locally executable code from an unknown source. Only run TorchScript models if you trust the provider. Please note, that tools for introspecting TorchScript models (such as `torch.utils.model_dump`) may also execute partial or full code stored in those models, therefore they should be used only if you trust the provider of the binary you are about to load.
TorchScript models should treated the same way as locally executable code from an unknown source. Only run TorchScript models if you trust the provider. Please note, that tools for introspecting TorchScript models (such as `torch.utils.model_dump`) may also execute partial or full code stored in those models, therefore they should be used only if you trust the provider of the binary you are about to load.
### Untrusted inputs during training and prediction
@ -59,9 +59,9 @@ If applicable, prepare your model against bad inputs and prompt injections. Some
### Data privacy
**Take special security measures if you train your models with sensitive data**. Prioritize [sandboxing](https://developers.google.com/code-sandboxing) your models and:
- Do not feed sensitive data to an untrusted model (even if runs in a sandboxed environment)
- If you consider publishing a model that was partially trained with sensitive data, be aware that data can potentially be recovered from the trained weights (especially if the model overfits).
**Take special security measures if your model if you train models with sensitive data**. Prioritize [sandboxing](https://developers.google.com/code-sandboxing) your models and:
- Do not feed sensitive data to untrusted model (even if runs in a sandboxed environment)
- If you consider publishing a model that was partially trained with sensitive data, be aware that data can potentially be recovered from the trained weights (especially if model overfits).
### Using distributed features

View File

@ -147,6 +147,19 @@ class MetalShaderLibrary {
const std::optional<c10::Scalar> alpha = std::nullopt,
const std::optional<c10::ScalarType> scalar_arg_type = std::nullopt);
template <typename T>
void exec_unary_kernel_with_params(
TensorIteratorBase& iter,
const std::string& name,
T params,
const std::string& params_type_name);
template <typename T>
void exec_binary_kernel_with_params(
TensorIteratorBase& iter,
const std::string& name,
T params,
const std::string& params_type_name);
protected:
virtual MTLLibrary_t getLibrary();
virtual MTLLibrary_t getLibrary(

View File

@ -7,10 +7,12 @@
#include <ATen/Tensor.h>
#include <ATen/TensorIterator.h>
#include <ATen/Utils.h>
#include <ATen/mps/MPSProfiler.h>
#include <ATen/mps/MPSStream.h>
#include <ATen/native/mps/MetalShaderLibrary.h>
#include <ATen/native/mps/TensorFactory.h>
#include <c10/core/ScalarType.h>
#include <fmt/format.h>
#include <torch/library.h>
#include <unordered_map>
@ -631,4 +633,147 @@ inline bool needsGather(const TensorBase& t) {
return !is_macOS_15_0_or_newer && (!t.is_contiguous() || t.storage_offset());
}
template <typename T>
void MetalShaderLibrary::exec_unary_kernel_with_params(TensorIteratorBase& iter,
const std::string& name,
T params,
const std::string& params_type_name) {
using namespace at::mps;
// Decompose 64-bit tensor into 32-bit ones
if (!iter.can_use_32bit_indexing()) {
for (auto&& sub_iter : iter.with_32bit_indexing()) {
exec_unary_kernel_with_params(sub_iter, name, params, params_type_name);
}
return;
}
auto inputTensor = iter.input(0);
auto outputTensor = iter.output(0);
uint32_t length = iter.numel();
if (length == 0) {
return;
}
auto kernel_name = fmt::format("{}_{}_{}_{}{}",
name,
iter.is_contiguous() ? "dense" : "strided",
scalarToMetalTypeString(outputTensor),
scalarToMetalTypeString(inputTensor),
fmt::format("_{}", params_type_name));
@autoreleasepool {
auto cplState = getPipelineStateForFunc(kernel_name);
MPSStream* mpsStream = getCurrentMPSStream();
dispatch_sync(mpsStream->queue(), ^() {
auto computeEncoder = mpsStream->commandEncoder();
getMPSProfiler().beginProfileKernel(cplState, name, {inputTensor});
[computeEncoder setComputePipelineState:cplState];
bind_iter_tensors(computeEncoder, iter);
if (!iter.is_contiguous()) {
mtl_setArgs<2>(computeEncoder,
outputTensor.sizes(),
inputTensor.strides(),
outputTensor.strides(),
inputTensor.ndimension());
}
detail::mtl_setArg(computeEncoder, params, iter.is_contiguous() ? 2 : 6);
mtl_dispatch1DJob(computeEncoder, cplState, length);
getMPSProfiler().endProfileKernel(cplState);
});
}
}
template <typename T>
void MetalShaderLibrary::exec_binary_kernel_with_params(TensorIteratorBase& iter,
const std::string& name,
T params,
const std::string& params_type_name) {
using namespace mps;
// TODO: Figure a better place to downcast double scalars (probably in tensor iterator itself?)
// Right now running something like 1.0-torch.rand(5, device='mps') will create iterator with
// double as common dtype (because Python floating point are always 64-bit values)
TORCH_CHECK(iter.output().scalar_type() != at::kDouble, "float64 is not supported on MPS");
// Skip for empty iterators
if (iter.numel() == 0) {
return;
}
// Decompose 64-bit tensor into 32-bit ones
if (!iter.can_use_32bit_indexing()) {
for (auto&& sub_iter : iter.with_32bit_indexing()) {
exec_binary_kernel_with_params(sub_iter, name, params, params_type_name);
}
return;
}
auto convert_double_scalar = [](Tensor& t) {
if (t.dim() != 0) {
return;
}
if (t.scalar_type() == kDouble) {
t = t.to(kFloat);
} else if (t.scalar_type() == kComplexDouble) {
t = t.to(kComplexFloat);
}
};
Tensor input = iter.input(0);
Tensor other = iter.input(1);
Tensor out = iter.output();
convert_double_scalar(input);
convert_double_scalar(other);
MPSStream* mpsStream = getCurrentMPSStream();
const auto cast_needed = input.scalar_type() != other.scalar_type();
const auto suffix = iter.is_contiguous() ? "dense" : "strided";
// TODO: Implicitly pass both input and output types to non-cast kernels
const auto kernel_name = cast_needed
? fmt::format("{}_{}_cast_{}_{}", name, suffix, scalarToMetalTypeString(out), params_type_name)
: fmt::format("{}_{}_{}_{}_{}",
name,
suffix,
scalarToMetalTypeString(out),
scalarToMetalTypeString(input),
params_type_name);
dispatch_sync_with_rethrow(mpsStream->queue(), ^() {
@autoreleasepool {
auto computeEncoder = mpsStream->commandEncoder();
auto binaryPSO = getPipelineStateForFunc(kernel_name);
// this function call is a no-op if MPS Profiler is not enabled
getMPSProfiler().beginProfileKernel(binaryPSO, kernel_name, {input, other});
[computeEncoder setComputePipelineState:binaryPSO];
// Set input and output tensors
bind_iter_tensors(computeEncoder, iter);
// Iterator is contiguous if all of its elements are dense in storage,
// i.e. it's true for both row-first and column-first tensors
if (iter.is_contiguous()) {
detail::mtl_setArg(computeEncoder, params, 3);
if (cast_needed) {
std::array<int, 4> size_and_types = {static_cast<int>(c10::elementSize(input.scalar_type())),
static_cast<int>(c10::elementSize(other.scalar_type())),
static_cast<int>(input.scalar_type()),
static_cast<int>(other.scalar_type())};
mtl_setBytes(computeEncoder, size_and_types, 4);
}
} else {
// Please note that shapes and strides of the iterator might be
// different than that of its operands, for example binary op
// between 4x4 tensor and scalar will result in 1D 16 element iterator
std::array<int, 4> ndim_and_types = {iter.ndim(),
static_cast<int>(input.scalar_type()),
static_cast<int>(other.scalar_type()),
static_cast<int>(out.scalar_type())};
mtl_setArgs<3>(
computeEncoder, params, iter.shape(), iter.strides(0), iter.strides(1), iter.strides(2), ndim_and_types);
}
mtl_dispatch1DJob(computeEncoder, binaryPSO, iter.numel());
getMPSProfiler().endProfileKernel(binaryPSO);
}
});
}
} // namespace at::native::mps

View File

@ -0,0 +1,16 @@
#pragma once
template <typename T>
struct ELUParams {
T alpha;
T scale;
T input_scale;
};
template <typename T>
struct ELUBackwardParams {
T alpha;
T scale;
T input_scale;
bool is_result;
};

View File

@ -1,3 +1,4 @@
#include <ATen/native/mps/kernels/Activation.h>
#include <c10/metal/indexing.h>
#include <c10/metal/special_math.h>
#include <metal_stdlib>
@ -99,6 +100,59 @@ REGISTER_BINARY_OP(hardswish_backward, float, float);
REGISTER_BINARY_OP(hardswish_backward, half, half);
REGISTER_BINARY_OP(hardswish_backward, bfloat, bfloat);
struct elu_functor {
template <typename T>
inline T operator()(const T self_, const ELUParams<T> params) {
using op_T = opmath_t<T>;
auto alpha = static_cast<op_T>(params.alpha);
auto scale = static_cast<op_T>(params.scale);
auto input_scale = static_cast<op_T>(params.input_scale);
auto self = static_cast<op_T>(self_);
auto neg_res = alpha * (::metal::precise::exp(self * input_scale) - 1);
return static_cast<T>(scale * (self < 0 ? neg_res : self));
}
};
struct elu_backward_functor {
template <typename T>
inline T operator()(
const T grad_output_,
const T self_,
ELUBackwardParams<T> params) {
using op_T = opmath_t<T>;
auto alpha = static_cast<op_T>(params.alpha);
auto scale = static_cast<op_T>(params.scale);
auto input_scale = static_cast<op_T>(params.input_scale);
auto grad_output = static_cast<op_T>(grad_output_);
auto self = static_cast<op_T>(self_);
if (params.is_result) {
auto neg_coef = input_scale * (self + alpha * scale);
return static_cast<T>(grad_output * (self <= 0 ? neg_coef : scale));
} else {
auto neg_coef = input_scale * alpha * scale *
::metal::precise::exp(self * input_scale);
return static_cast<T>(grad_output * (self <= 0 ? neg_coef : scale));
}
}
};
#define REGISTER_ELU_OP(T) \
typedef ELUParams<T> ELUParams_##T; \
REGISTER_UNARY_ALPHA_OP(elu, T, ELUParams_##T, T);
REGISTER_ELU_OP(float);
REGISTER_ELU_OP(half);
REGISTER_ELU_OP(bfloat);
#define REGISTER_ELU_BACKWARD_OP(T) \
typedef ELUBackwardParams<T> ELUBackwardParams_##T; \
REGISTER_BINARY_ALPHA_OP(elu_backward, T, ELUBackwardParams_##T, T);
REGISTER_ELU_BACKWARD_OP(float);
REGISTER_ELU_BACKWARD_OP(half);
REGISTER_ELU_BACKWARD_OP(bfloat);
struct leaky_relu_functor {
template <typename T>
inline T operator()(const T x, const T negative_slope) {

View File

@ -11,8 +11,6 @@
#include <ATen/ops/_log_softmax_native.h>
#include <ATen/ops/_prelu_kernel_backward_native.h>
#include <ATen/ops/_prelu_kernel_native.h>
#include <ATen/ops/elu_backward_native.h>
#include <ATen/ops/elu_native.h>
#include <ATen/ops/gelu_backward_native.h>
#include <ATen/ops/gelu_native.h>
#include <ATen/ops/glu_backward_native.h>
@ -698,194 +696,6 @@ TORCH_IMPL_FUNC(gelu_backward_out_mps)
}
}
static void elu_variants_out_mps(const Tensor& self,
const Scalar& alpha,
const Scalar& scale,
const Scalar& input_scale,
const Tensor& result,
std::string func_name) {
using namespace mps;
using CachedGraph = MPSUnaryCachedGraph;
auto resultMemFormat = result.suggest_memory_format();
bool executeGatherOp = !(self.is_contiguous(resultMemFormat) && result.is_contiguous(resultMemFormat));
Tensor out;
if (executeGatherOp) {
out = at::empty_like(result, MemoryFormat::Contiguous);
}
// Empty output
if (result.numel() == 0) {
return;
}
MPSStream* stream = getCurrentMPSStream();
@autoreleasepool {
std::string key = func_name + ":" + getTensorsStringKey({self}) + ":" + std::to_string(alpha.to<double>()) + ":" +
std::to_string(scale.to<double>()) + ":" + std::to_string(input_scale.to<double>());
auto cachedGraph = LookUpOrCreateCachedGraph<CachedGraph>(key, [&](auto mpsGraph, auto newCachedGraph) {
MPSGraphTensor* inputTensor = mpsGraphRankedPlaceHolder(mpsGraph, self);
// scale * (max(0, x) + min(0, alpha * (exp(input_scale * x) - 1) ))
MPSGraphTensor* alphaTensor = [mpsGraph constantWithScalar:alpha.to<double>()
shape:@[ @1 ]
dataType:getMPSDataType(self)];
MPSGraphTensor* inputScaleTensor = [mpsGraph constantWithScalar:input_scale.to<double>()
shape:@[ @1 ]
dataType:getMPSDataType(self)];
MPSGraphTensor* scaleTensor = [mpsGraph constantWithScalar:scale.to<double>()
shape:@[ @1 ]
dataType:getMPSDataType(self)];
MPSGraphTensor* unitTensor = [mpsGraph constantWithScalar:1.0f shape:@[ @1 ] dataType:getMPSDataType(self)];
MPSGraphTensor* zeroTensor = [mpsGraph constantWithScalar:0.0f shape:@[ @1 ] dataType:getMPSDataType(self)];
MPSGraphTensor* scaledInputTensor = [mpsGraph multiplicationWithPrimaryTensor:inputTensor
secondaryTensor:inputScaleTensor
name:nil];
MPSGraphTensor* exponentTensor = [mpsGraph exponentWithTensor:scaledInputTensor name:nil];
MPSGraphTensor* exponentMinusOneTensor = [mpsGraph subtractionWithPrimaryTensor:exponentTensor
secondaryTensor:unitTensor
name:nil];
MPSGraphTensor* alphaTimesTensor = [mpsGraph multiplicationWithPrimaryTensor:exponentMinusOneTensor
secondaryTensor:alphaTensor
name:nil];
MPSGraphTensor* predicateTensor = [mpsGraph greaterThanWithPrimaryTensor:inputTensor
secondaryTensor:zeroTensor
name:nil];
MPSGraphTensor* fusedOutput = [mpsGraph selectWithPredicateTensor:predicateTensor
truePredicateTensor:inputTensor
falsePredicateTensor:alphaTimesTensor
name:nil];
MPSGraphTensor* outputTensor = [mpsGraph multiplicationWithPrimaryTensor:fusedOutput
secondaryTensor:scaleTensor
name:nil];
newCachedGraph->inputTensor_ = inputTensor;
newCachedGraph->outputTensor_ = outputTensor;
});
auto selfPlaceholder = Placeholder(cachedGraph->inputTensor_, self, nil, executeGatherOp);
auto outputPlaceholder = Placeholder(cachedGraph->outputTensor_, out.has_storage() ? out : result, nil, false);
auto feeds = dictionaryFromPlaceholders(selfPlaceholder);
runMPSGraph(stream, cachedGraph->graph(), feeds, outputPlaceholder);
if (out.has_storage()) {
result.copy_(out);
}
}
}
// scale * (max(0, x) + min(0, alpha * (exp(input_scale * x) - 1) ))
TORCH_IMPL_FUNC(elu_out_mps)
(const Tensor& self, const Scalar& alpha, const Scalar& scale, const Scalar& input_scale, const Tensor& result) {
elu_variants_out_mps(self, alpha, scale, input_scale, result, "elu_out_mps");
}
TORCH_IMPL_FUNC(elu_backward_out_mps)
(const Tensor& grad_output,
const Scalar& alpha,
const Scalar& scale,
const Scalar& input_scale,
bool is_result,
const Tensor& self_or_result,
const Tensor& grad_input) {
using namespace mps;
using CachedGraph = MPSUnaryGradCachedGraph;
auto gradMemFormat = grad_input.suggest_memory_format();
bool executeGatherOp = !(grad_output.is_contiguous(gradMemFormat) && self_or_result.is_contiguous(gradMemFormat) &&
grad_input.is_contiguous(gradMemFormat));
Tensor out;
if (executeGatherOp && gradMemFormat == MemoryFormat::ChannelsLast) {
out = at::empty_like(grad_input, MemoryFormat::Contiguous);
}
// Empty output
if (grad_input.numel() == 0) {
return;
}
MPSStream* stream = getCurrentMPSStream();
@autoreleasepool {
std::string key = "elu_backward_out_mps:" + getTensorsStringKey({grad_output, self_or_result}) + ":" +
std::to_string(alpha.to<double>()) + ":" + std::to_string(scale.to<double>()) + ":" +
std::to_string(input_scale.to<double>()) + ":" + std::to_string(is_result);
auto cachedGraph = LookUpOrCreateCachedGraph<CachedGraph>(key, [&](auto mpsGraph, auto newCachedGraph) {
MPSGraphTensor* gradOutputTensor = mpsGraphRankedPlaceHolder(mpsGraph, grad_output);
MPSGraphTensor* selfOrResultTensor = mpsGraphRankedPlaceHolder(mpsGraph, self_or_result);
MPSGraphTensor* lessThanZeroGradTensor = nil;
if (is_result) {
MPSGraphTensor* alphaTensor = [mpsGraph constantWithScalar:alpha.to<double>()
shape:@[ @1 ]
dataType:getMPSDataType(grad_output)];
MPSGraphTensor* resultPlusAlphaTensor = [mpsGraph additionWithPrimaryTensor:selfOrResultTensor
secondaryTensor:alphaTensor
name:nil];
auto constMul = scale.to<double>() * input_scale.to<double>();
MPSGraphTensor* constMulTensor = [mpsGraph constantWithScalar:constMul
shape:@[ @1 ]
dataType:getMPSDataType(grad_output)];
lessThanZeroGradTensor = [mpsGraph multiplicationWithPrimaryTensor:resultPlusAlphaTensor
secondaryTensor:constMulTensor
name:nil];
} else {
MPSGraphTensor* inputScaleTensor = [mpsGraph constantWithScalar:input_scale.to<double>()
shape:@[ @1 ]
dataType:getMPSDataType(grad_output)];
MPSGraphTensor* scaledInputTensor = [mpsGraph multiplicationWithPrimaryTensor:selfOrResultTensor
secondaryTensor:inputScaleTensor
name:nil];
MPSGraphTensor* expTensor = [mpsGraph exponentWithTensor:scaledInputTensor name:nil];
auto constMul = scale.to<double>() * input_scale.to<double>() * alpha.to<double>();
MPSGraphTensor* constMulTensor = [mpsGraph constantWithScalar:constMul
shape:@[ @1 ]
dataType:getMPSDataType(grad_output)];
lessThanZeroGradTensor = [mpsGraph multiplicationWithPrimaryTensor:expTensor
secondaryTensor:constMulTensor
name:nil];
}
MPSGraphTensor* scaleTensor = [mpsGraph constantWithScalar:scale.to<double>()
shape:@[ @1 ]
dataType:getMPSDataType(grad_output)];
MPSGraphTensor* zeroTensor = [mpsGraph constantWithScalar:0.0f
shape:@[ @1 ]
dataType:getMPSDataType(grad_output)];
MPSGraphTensor* predicateTensor = [mpsGraph greaterThanWithPrimaryTensor:selfOrResultTensor
secondaryTensor:zeroTensor
name:nil];
MPSGraphTensor* gradTensor = [mpsGraph selectWithPredicateTensor:predicateTensor
truePredicateTensor:scaleTensor
falsePredicateTensor:lessThanZeroGradTensor
name:nil];
MPSGraphTensor* gradInputTensor = [mpsGraph multiplicationWithPrimaryTensor:gradTensor
secondaryTensor:gradOutputTensor
name:nil];
newCachedGraph->gradOutputTensor_ = gradOutputTensor;
newCachedGraph->inputTensor_ = selfOrResultTensor;
newCachedGraph->gradInputTensor_ = gradInputTensor;
});
Placeholder gradOutputPlaceholder = Placeholder(cachedGraph->gradOutputTensor_, grad_output, nil, executeGatherOp);
Placeholder selfOrResultPlaceholder = Placeholder(cachedGraph->inputTensor_, self_or_result, nil, executeGatherOp);
Placeholder gradInputPlaceholder =
Placeholder(cachedGraph->gradInputTensor_, out.has_storage() ? out : grad_input, nil, false);
auto feeds = dictionaryFromPlaceholders(gradOutputPlaceholder, selfOrResultPlaceholder);
runMPSGraph(stream, cachedGraph->graph(), feeds, gradInputPlaceholder);
if (out.has_storage()) {
grad_input.copy_(out);
}
}
}
TORCH_IMPL_FUNC(glu_out_mps)(const Tensor& self, const int64_t dim, const Tensor& output) {
using namespace mps;
using CachedGraph = MPSUnaryCachedGraph;

View File

@ -1,8 +1,10 @@
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
#include <ATen/Dispatch.h>
#include <ATen/TensorIterator.h>
#include <ATen/mps/MPSProfiler.h>
#include <ATen/native/Activation.h>
#include <ATen/native/mps/OperationUtils.h>
#include <ATen/native/mps/kernels/Activation.h>
#include <fmt/format.h>
namespace at::native {
@ -41,6 +43,30 @@ static void hardswish_backward_kernel(at::TensorIterator& iter) {
lib.exec_binary_kernel(iter, "hardswish_backward");
}
static void elu_kernel(TensorIteratorBase& iter, const Scalar& alpha, const Scalar& scale, const Scalar& input_scale) {
AT_DISPATCH_FLOATING_TYPES_AND2(c10::kHalf, c10::kBFloat16, iter.common_dtype(), "elu_mps", [&]() {
ELUParams<scalar_t> params{alpha.to<scalar_t>(), scale.to<scalar_t>(), input_scale.to<scalar_t>()};
lib.exec_unary_kernel_with_params(
iter, "elu", params, fmt::format("ELUParams_{}", mps::scalarToMetalTypeString(iter.common_dtype())));
});
}
static void elu_backward_kernel(TensorIteratorBase& iter,
const Scalar& alpha,
const Scalar& scale,
const Scalar& input_scale,
bool is_result) {
AT_DISPATCH_FLOATING_TYPES_AND2(c10::kHalf, c10::kBFloat16, iter.common_dtype(), "elu_backward_mps", [&]() {
ELUBackwardParams<scalar_t> params{
alpha.to<scalar_t>(), scale.to<scalar_t>(), input_scale.to<scalar_t>(), is_result};
lib.exec_binary_kernel_with_params(
iter,
"elu_backward",
params,
fmt::format("ELUBackwardParams_{}", mps::scalarToMetalTypeString(iter.common_dtype())));
});
}
static void leaky_relu_kernel(TensorIteratorBase& iter, const Scalar& negative_slope) {
lib.exec_unary_kernel(iter, "leaky_relu", negative_slope);
}
@ -56,6 +82,8 @@ REGISTER_DISPATCH(hardsigmoid_stub, hardsigmoid_kernel);
REGISTER_DISPATCH(hardsigmoid_backward_stub, hardsigmoid_backward_kernel);
REGISTER_DISPATCH(hardswish_stub, hardswish_kernel);
REGISTER_DISPATCH(hardswish_backward_stub, hardswish_backward_kernel);
REGISTER_DISPATCH(elu_stub, elu_kernel);
REGISTER_DISPATCH(elu_backward_stub, elu_backward_kernel);
REGISTER_DISPATCH(leaky_relu_stub, leaky_relu_kernel);
REGISTER_DISPATCH(leaky_relu_backward_stub, leaky_relu_backward_kernel);

View File

@ -12058,8 +12058,7 @@
device_check: NoCheck # TensorIterator
python_module: nn
dispatch:
CPU, CUDA: elu_out
MPS: elu_out_mps
CPU, CUDA, MPS: elu_out
- func: elu(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor
structured_delegate: elu.out
@ -12072,8 +12071,7 @@
structured_inherits: TensorIteratorBase
python_module: nn
dispatch:
CPU, CUDA: elu_backward_out
MPS: elu_backward_out_mps
CPU, CUDA, MPS: elu_backward_out
- func: elu_backward(Tensor grad_output, Scalar alpha, Scalar scale, Scalar input_scale, bool is_result, Tensor self_or_result) -> Tensor
structured_delegate: elu_backward.grad_input

View File

@ -206,41 +206,6 @@ templates_path = [
os.path.join(os.path.dirname(pytorch_sphinx_theme2.__file__), "templates"),
]
# TODO: document these and remove them from here.
# Fixes the duplicated
autosummary_filename_map = {
"torch.nn.utils.prune.identity": "torch.nn.utils.prune.identity_function",
"torch.nn.utils.prune.Identity": "torch.nn.utils.prune.Identity_class",
"torch.optim.adamw.adamw": "torch.optim.adamw.adamw_function",
"torch.optim.adamw.AdamW": "torch.optim.adamw.AdamW_class",
"torch.optim.asgd.asgd": "torch.optim.asgd.asgd_function",
"torch.optim.asgd.ASGD": "torch.optim.asgd.ASGD_class",
"torch.optim.nadam.nadam": "torch.optim.nadam.nadam_function",
"torch.optim.nadam.NAdam": "torch.optim.nadam.NAdam_class",
"torch.optim.radam.radam": "torch.optim.radam.radam_function",
"torch.optim.radam.RAdam": "torch.optim.radam.RAdam_class",
"torch.optim.rmsprop.rmsprop": "torch.optim.rmsprop.rmsprop_function",
"torch.optim.rmsprop.RMSprop": "torch.optim.rmsprop.RMSprop_class",
"torch.optim.rprop.rprop": "torch.optim.rprop.rprop_function",
"torch.optim.rprop.Rprop": "torch.optim.rprop.Rprop_class",
"torch.optim.sgd.sgd": "torch.optim.sgd.sgd_function",
"torch.optim.sgd.SGD": "torch.optim.sgd.SGD_class",
"torch.optim.adadelta.adadelta": "torch.optim.adadelta.adadelta_function",
"torch.optim.adadelta.Adadelta": "torch.optim.adadelta.Adadelta_class",
"torch.optim.adagrad.adagrad": "torch.optim.adagrad.adagrad_function",
"torch.optim.adagrad.Adagrad": "torch.optim.adagrad.Adagrad_class",
"torch.optim.adam.adam": "torch.optim.adam.adam_function",
"torch.optim.adam.Adam": "torch.optim.adam.Adam_class",
"torch.optim.adamax.adamax": "torch.optim.adamax.adamax_function",
"torch.optim.adamax.Adamax": "torch.optim.adamax.Adamax_class",
"torch.mtia.stream": "torch.mtia.stream_function",
"torch.mtia.Stream": "torch.mtia.Stream_class",
"torch.cpu.stream": "torch.cpu.stream_function",
"torch.cpu.Stream": "torch.cpu.Stream_class",
"torch.cuda.stream": "torch.cuda.stream_function",
"torch.cuda.Stream": "torch.cuda.Stream_class",
"torch.xpu.stream": "torch.xpu.stream_function",
"torch.xpu.Stream": "torch.xpu.Stream_class",
}
coverage_ignore_functions = [
# torch
@ -3230,11 +3195,6 @@ autodoc_type_aliases = {
# Enable overriding of function signatures in the first line of the docstring.
autodoc_docstring_signature = True
# Exclude inherited IntEnum methods that have RST formatting issues in their docstrings
autodoc_default_options = {
"exclude-members": "from_bytes, to_bytes",
}
# -- katex javascript in header
#
# def setup(app):

View File

@ -253,6 +253,7 @@ regular full-precision tensor.
.. autosummary::
:toctree: generated
:nosignatures:
:template: classtemplate.rst
view
as_strided