Files
pytorch/torch/testing/_internal/common_methods_invocations.py
Natalia Gimelshein 34e3930401 fix numpy compatibility for 2d small list indices (#154806)
Will fix #119548 and linked issues once we switch from warning to the new behavior,
but for now, given how much this syntax was used in our test suite, we suspect a silent change will be disruptive.
We will change the behavior after 2.8 branch is cut.
Numpy behavior was changed at least in numpy 1.24 (more than 2 years ago)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/154806
Approved by: https://github.com/cyyever, https://github.com/Skylion007, https://github.com/albanD
2025-06-04 01:58:52 +00:00

24890 lines
1.1 MiB

# mypy: ignore-errors
from functools import wraps, partial
from itertools import product, chain, islice
import itertools
import functools
import copy
import operator
import random
import unittest
import math
import enum
import torch
import numpy as np
import numpy.typing as npt
from torch import inf, nan
from typing import Any, Union
from collections.abc import Sequence
from torch.testing import make_tensor
from torch.testing._internal.common_dtype import (
_dispatch_dtypes, floating_types, floating_types_and, complex_types, floating_and_complex_types,
floating_and_complex_types_and, all_types_and_complex_and, all_types_and, all_types_and_complex, integral_types_and,
empty_types, complex_types_and, integral_types, custom_types, all_types_complex_float8_and, float8_types,
)
from torch.testing._internal.common_device_type import \
(onlyCPU, onlyCUDA, onlyNativeDeviceTypes, disablecuDNN, skipCUDAIfNoMagma, skipCUDAIfNoMagmaAndNoCusolver,
skipCUDAIfNoCusolver, skipCPUIfNoLapack, skipCPUIfNoFFT, skipCUDAIf, precisionOverride,
skipCPUIfNoMklSparse,
toleranceOverride, tol)
from torch.testing._internal.common_cuda import (
PLATFORM_SUPPORTS_FLASH_ATTENTION, PLATFORM_SUPPORTS_MEM_EFF_ATTENTION,
SM53OrLater, SM80OrLater, SM89OrLater, with_tf32_off, TEST_CUDNN, _get_torch_cuda_version,
_get_torch_rocm_version,
)
from torch.testing._internal.common_utils import (
make_fullrank_matrices_with_distinct_singular_values,
TEST_WITH_ROCM, IS_FBCODE, IS_WINDOWS, IS_MACOS, IS_S390X, TEST_SCIPY,
torch_to_numpy_dtype_dict, numpy_to_torch_dtype, TEST_WITH_ASAN,
GRADCHECK_NONDET_TOL, slowTest, TEST_WITH_SLOW,
TEST_WITH_TORCHINDUCTOR, MACOS_VERSION
)
from torch.testing._utils import wrapper_set_seed
import torch._refs as refs # noqa: F401
import torch._refs.nn.functional
import torch._refs.special
import torch._refs.linalg
import torch._prims as prims # noqa: F401
from torch.utils import _pytree as pytree
from torch._vendor.packaging import version
from torch.testing._internal.opinfo.core import ( # noqa: F401
L,
M,
S,
XS,
_NOTHING,
_getattr_qual,
DecorateInfo,
SampleInput,
ErrorInput,
AliasInfo,
NumericsFilter,
OpInfo,
_generate_reduction_inputs,
_generate_reduction_kwargs,
sample_inputs_reduction,
ReductionOpInfo,
reference_inputs_elementwise_binary,
make_error_inputs_elementwise_binary,
generate_elementwise_binary_tensors,
generate_elementwise_binary_arbitrarily_strided_tensors,
generate_elementwise_binary_small_value_tensors,
generate_elementwise_binary_large_value_tensors,
generate_elementwise_binary_extremal_value_tensors,
generate_elementwise_binary_broadcasting_tensors,
generate_elementwise_binary_with_scalar_samples,
generate_elementwise_binary_with_scalar_and_type_promotion_samples,
generate_elementwise_binary_noncontiguous_tensors,
sample_inputs_elementwise_binary,
BinaryUfuncInfo,
sample_inputs_elementwise_unary,
generate_elementwise_unary_tensors,
generate_elementwise_unary_small_value_tensors,
generate_elementwise_unary_large_value_tensors,
generate_elementwise_unary_extremal_value_tensors,
reference_inputs_elementwise_unary,
UnaryUfuncInfo,
sample_inputs_spectral_ops,
SpectralFuncType,
SpectralFuncInfo,
ShapeFuncInfo,
sample_inputs_foreach,
ForeachFuncInfo,
gradcheck_wrapper_hermitian_input,
gradcheck_wrapper_triangular_input,
gradcheck_wrapper_triangular_input_real_positive_diagonal,
gradcheck_wrapper_masked_operation,
gradcheck_wrapper_masked_pointwise_operation,
clone_sample,
)
from torch.testing._internal.opinfo.refs import ( # NOQA: F401
_find_referenced_opinfo,
_inherit_constructor_args,
PythonRefInfo,
ReductionPythonRefInfo,
ElementwiseUnaryPythonRefInfo,
ElementwiseBinaryPythonRefInfo,
)
from torch.testing._internal.opinfo.utils import (
np_unary_ufunc_integer_promotion_wrapper,
reference_reduction_numpy,
prod_numpy
)
from torch.testing._internal import opinfo
from torch.testing._internal.opinfo.definitions.linalg import (
sample_inputs_linalg_cholesky,
sample_inputs_linalg_cholesky_inverse,
sample_inputs_cross,
sample_inputs_linalg_qr_geqrf,
sample_inputs_linalg_invertible,
sample_inputs_lu_solve,
sample_inputs_legacy_solve,
sample_inputs_svd,
sample_inputs_linalg_det_logdet_slogdet,
sample_inputs_linalg_lu,
sample_inputs_diagonal_diag_embed,
error_inputs_diagonal_diag_embed,
)
from torch.testing._internal.opinfo.definitions.special import (
sample_inputs_i0_i1,
sample_inputs_polygamma,
reference_polygamma,
)
from torch.testing._internal.opinfo.definitions._masked import (
sample_inputs_softmax_variant,
)
from torch.testing._internal.opinfo.definitions.sparse import (
error_inputs_sparse_like_fns,
sample_inputs_sparse_like_fns,
error_inputs_sparse_mul,
sample_inputs_sparse_mul,
error_inputs_sparse_reduction_sum,
sample_inputs_sparse_reduction_sum
)
if TEST_SCIPY:
from scipy import stats
import scipy.spatial
import scipy.special
# test if a tensor is close to an integer
def close_to_int(x, eps=0.1):
if x.is_complex():
y = torch.abs(torch.view_as_complex(torch.frac(torch.view_as_real(x))))
else:
y = torch.abs(torch.frac(x))
return (y < eps) | (y > (1 - eps))
def sample_inputs_slice(op_info, device, dtype, requires_grad, **kwargs):
make_input = partial(make_tensor, device=device, dtype=dtype,
low=None, high=None, requires_grad=requires_grad)
yield SampleInput(make_input(3), 0)
yield SampleInput(make_input(20, 30, 40), dim=1, start=1, end=-2)
yield SampleInput(make_input(20, 30, 40), dim=1, start=1, end=-2, step=3)
yield SampleInput(make_input(20, 30, 40), dim=0, start=-10, end=-2, step=2)
def sample_inputs_tensor_split(op_info, device, dtype, requires_grad, **kwargs):
make_input = partial(make_tensor, device=device, dtype=dtype,
low=None, high=None, requires_grad=requires_grad)
args_cases = (
# Cases with tensor indices.
(torch.tensor([1, 2, 3]),),
(torch.tensor(1),),
(torch.tensor([1, 2, 3]), 1),
(torch.tensor([1, 4, 2, 5, 3, 6])[::2], 1),
# Cases with list of indices.
((2, 4),),
((2, 4), 1),
((2, 4), -1),
# Cases with integer section.
(3,),
(3, 1),
(3, -1),
)
for args in args_cases:
yield SampleInput(make_input((S, S, S)), args=args)
def sample_inputs_hsplit(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device,
low=None, high=None, requires_grad=requires_grad)
yield SampleInput(make_arg(6), 2)
yield SampleInput(make_arg(S, S, S), [1, 2, 3])
def sample_inputs_vsplit(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device,
low=None, high=None, requires_grad=requires_grad)
yield SampleInput(make_arg(6, S), 2)
yield SampleInput(make_arg(S, S, S), [1, 2, 3])
def sample_inputs_dsplit(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device,
low=None, high=None, requires_grad=requires_grad)
yield SampleInput(make_arg(S, S, S), [1, 2, 3])
yield SampleInput(make_arg(S, S, 6), 2)
def error_inputs_hsplit(op_info, device, **kwargs):
make_arg = partial(make_tensor, dtype=torch.float32, device=device)
err_msg1 = ("torch.hsplit requires a tensor with at least 1 dimension, "
"but got a tensor with 0 dimensions!")
yield ErrorInput(SampleInput(make_arg(()), 0), error_regex=err_msg1)
err_msg2 = (f"torch.hsplit attempted to split along dimension 1, "
f"but the size of the dimension {S} "
f"is not divisible by the split_size 0!")
yield ErrorInput(SampleInput(make_arg((S, S, S)), 0), error_regex=err_msg2)
# Incorrect type for indices_or_section argument
err_msg3 = ("received an invalid combination of arguments.")
yield ErrorInput(
SampleInput(make_arg((S, S, S)), "abc"),
error_type=TypeError, error_regex=err_msg3)
def error_inputs_vsplit(op_info, device, **kwargs):
make_arg = partial(make_tensor, dtype=torch.float32, device=device)
err_msg1 = ("torch.vsplit requires a tensor with at least 2 dimension, "
"but got a tensor with 1 dimensions!")
yield ErrorInput(SampleInput(make_arg(S), 0), error_regex=err_msg1)
err_msg2 = (f"torch.vsplit attempted to split along dimension 0, "
f"but the size of the dimension {S} "
f"is not divisible by the split_size 0!")
yield ErrorInput(SampleInput(make_arg(S, S, S), 0),
error_regex=err_msg2)
# Incorrect type for indices_or_section argument
err_msg3 = ("received an invalid combination of arguments.")
yield ErrorInput(SampleInput(make_arg(S, S, S), "abc"),
error_type=TypeError, error_regex=err_msg3)
def error_inputs_dsplit(op_info, device, **kwargs):
make_arg = partial(make_tensor, dtype=torch.float32, device=device)
err_msg1 = ("torch.dsplit requires a tensor with at least 3 dimension, "
"but got a tensor with 1 dimensions!")
yield ErrorInput(SampleInput(make_arg(S), 0), error_regex=err_msg1)
err_msg2 = (f"torch.dsplit attempted to split along dimension 2, "
f"but the size of the dimension {S} "
f"is not divisible by the split_size 0!")
yield ErrorInput(SampleInput(make_arg(S, S, S), 0), error_regex=err_msg2)
def sample_inputs_as_strided(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# input shape, output shape, output stride, output storage offset
test_cases = (
((1,), (1,), (1,), 0),
((3, 3), (2, 2), (1, 2), 0),
((3, 3), (2, 2), (1, 2), 1),
((16,), (2, 2, 2, 2), (1, 1, 1, 1), 0),
((16,), (2, 1, 1, 2), (1, 7, 7, 1), 0),
)
for input_shape, output_shape, stride, storage_offset in test_cases:
input_t = make_arg(input_shape)
kwargs = dict(storage_offset=storage_offset)
yield SampleInput(input_t, args=(output_shape, stride), kwargs=kwargs)
def sample_inputs_as_strided_partial_views(op_info, device, dtype, requires_grad, **kwargs):
def make_arg():
base = make_tensor((20,), device=device, dtype=dtype)
return base[5:15].requires_grad_(requires_grad)
# as_strided on offset, partial views
yield SampleInput(make_arg(), (2, 2), (1, 2))
yield SampleInput(make_arg(), (2, 2), (1, 2), storage_offset=0)
yield SampleInput(make_arg(), (2, 2), (1, 2), storage_offset=10)
def sample_inputs_as_strided_scatter(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# input shape, output shape, output stride, output storage offset
test_cases = [
((1,), (), (), 0),
((1,), (1,), (1,), 0),
((3, 3), (2, 2), (1, 2), 0),
((3, 3), (2, 2), (1, 2), 1),
((3, 3), (2, 2), (2, 1), 0),
# Scatter to larger dimensions
((16,), (2, 2, 2, 2), (8, 4, 2, 1), 0),
# Scatter to larger dimensions with strides inverted
((16,), (2, 1, 1, 2), (1, 2, 4, 8), 0),
]
for input_shape, output_shape, stride, storage_offset in test_cases:
input_t = make_arg(input_shape)
input_src = make_arg(output_shape)
yield SampleInput(input_t, input_src, output_shape, stride, storage_offset=storage_offset)
def error_inputs_as_strided_scatter(op_info, device, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=torch.float32, requires_grad=False)
# Create a small tensor and try to scatter it out of bounds
input_t = make_arg([4, 4])
input_src = make_arg([2, 2])
yield ErrorInput(
SampleInput(input_t, input_src, [2, 2], [200, 200], storage_offset=0),
error_regex="itemsize 4 requiring a storage size of 1604 are out of bounds for storage of size 64"
)
def sample_inputs_combinations(op_info, device, dtype, requires_grad, **kwargs):
inputs = (
(0,),
(0, 1),
(0, 1, 2, 3),
)
rvals = [1, 2, 4]
products = product(inputs, rvals, [False, True])
for input_data, r, with_replacement in products:
input_t = torch.tensor(input_data, device=device, dtype=dtype, requires_grad=requires_grad)
yield SampleInput(input_t, r=r, with_replacement=with_replacement)
def sample_inputs_cartesian_prod(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(torch.tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# constructs 1-D tensors with varying number of elements
a = make_arg((0,))
b = make_arg((0, 1))
c = make_arg((0, 1, 2, 3))
# sample with only 1 tensor
yield SampleInput(a)
# sample with 2 tensors
yield SampleInput(a, b)
# sample with 3 tensors
yield SampleInput(a, b, c)
def sample_inputs_cosine_similarity(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# Ordered as input_shape, dict of dim and eps
cases: tuple[tuple, dict] = ( # type: ignore[assignment]
((S, S), {'dim': 1}),
((S, 2), {'dim': -1}),
((S,), {'dim': 0, 'eps': 0.5}),
((), {'dim': 0}),
((S, S, M), {'dim': 2}),
((S, S), {})
)
for input_shape, kwargs in cases:
yield SampleInput(make_arg(input_shape), args=(make_arg(input_shape),), kwargs=kwargs)
# Test for Broadcasting
yield SampleInput(make_arg((1, 2, 3)), args=(make_arg((2, 1, 3)),), kwargs={'dim': -1})
yield SampleInput(make_arg((1, 2, 3)), args=(make_arg((2, 1, 3)),), kwargs={'dim': -2})
yield SampleInput(make_arg((2, 3)), args=(make_arg((2, 1, 3)),), kwargs={'dim': -1})
def sample_inputs_item(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=False)
cases = (
(),
(()),
(1),
((1,)),
)
for shape in cases:
yield SampleInput(make_arg(shape))
def error_inputs_item(op, device, **kwargs):
make_arg = partial(make_tensor, dtype=torch.float32, device=device, requires_grad=False)
cases = (
(M),
((S,)),
(S, S),
(S, M, L),
)
for shape in cases:
yield ErrorInput(
SampleInput(make_arg(shape)), error_type=RuntimeError,
error_regex="elements cannot be converted to Scalar")
def sample_inputs_batch_norm(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
make_arg_without_requires_grad = partial(make_tensor, device=device, dtype=dtype, requires_grad=False)
# Ordered as: input shape, kwargs for training, momentum, eps
cases: tuple[tuple[int], dict] = ( # type: ignore[assignment]
((S, S, S), {'training': True, 'momentum': 0.5, 'eps': 0.6}),
((3, 2, 4), {'training': False, 'momentum': -1.2}),
((3, 1), {'training': True, 'momentum': 0.0}),
((0,), {'training': True}),
((0,), {'training': False}),
((3, 2, 3, 4), {'training': True, 'momentum': -1.0, 'eps': 0.5}),
((3, 2, 3, 4), {'training': False, 'momentum': -1.0, 'eps': 0.5}),
((2, 1), {}),
)
for input_shape, kwargs in cases:
# args: running mean, running var, weight and bias should necessarily be of shape: (channels,)
channels = input_shape[1] if len(input_shape) > 1 else 0
weight = make_arg(channels) if channels > 0 else None
bias = make_arg(channels) if channels > 0 else None
running_mean = make_arg_without_requires_grad(channels, low=0)
running_var = make_arg_without_requires_grad(channels, low=0)
yield SampleInput(
make_arg(input_shape),
args=(
running_mean,
running_var,
weight,
bias
),
kwargs=kwargs
)
# Checking for permutations of weights and biases as `None`
weights = [channels, None, None]
biases = [None, channels, None]
is_training = [True, False, False]
for weight, bias, training in zip(weights, biases, is_training):
yield SampleInput(
make_arg(input_shape),
args=(
running_mean,
running_var,
make_arg(channels),
make_arg(channels)
),
kwargs={'training': training}
)
# Test case for no optional kwargs
# running_mean and running_var are required in evaluation mode (training: False) but not in training mode
yield SampleInput(make_arg((1, 2, 3)), args=(None, None, None, None), kwargs={'training': True})
def sample_inputs_softmax_backward_data(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(
make_tensor, device=device, dtype=dtype, requires_grad=requires_grad
)
cases = [
((S,), 0),
((S, S), 0),
((S, M, S), -1),
]
input_dtypes = [dtype]
if dtype == torch.float and device == 'cuda':
input_dtypes += [torch.float16]
for (shape, dim), input_dtype in product(cases, input_dtypes):
input = make_arg(shape)
output = torch.nn.functional.softmax(input, dim=dim, dtype=input_dtype)
yield SampleInput(make_arg(shape), output, dim, input_dtype)
def sample_inputs_native_batch_norm(op_info, device, dtype, requires_grad, **kwargs):
samples = sample_inputs_batch_norm(op_info, device, dtype, requires_grad, **kwargs)
for sample in samples:
# torch.native_batch_norm does not support 0 numel tensors
# IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
if sample.input.numel() == 0:
continue
args = sample.args
training = sample.kwargs.get('training', True)
momentum = sample.kwargs.get('momentum', 0.5)
eps = sample.kwargs.get('eps', 1e-5)
yield SampleInput(sample.input, args=(args[2], args[3], args[0], args[1], training, momentum, eps))
def sample_inputs__native_batch_norm_legit(op_info, device, dtype, requires_grad, **kwargs):
samples = sample_inputs_batch_norm(op_info, device, dtype, requires_grad, **kwargs)
for sample in samples:
# torch.native_batch_norm does not support 0 numel tensors
# IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
if sample.input.numel() == 0:
continue
args = sample.args
training = sample.kwargs.get('training', True)
momentum = sample.kwargs.get('momentum', 0.5)
eps = sample.kwargs.get('eps', 1e-5)
if args[0] is not None and args[1] is not None:
yield SampleInput(sample.input, args=(args[2], args[3], args[0], args[1], training, momentum, eps))
else:
yield SampleInput(sample.input, args=(args[2], args[3], training, momentum, eps))
def sample_inputs__batch_norm_with_update(op_info, device, dtype, requires_grad, **kwargs):
samples = sample_inputs_batch_norm(op_info, device, dtype, requires_grad, **kwargs)
for sample in samples:
# torch.native_batch_norm does not support 0 numel tensors
# IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
if sample.input.numel() == 0:
continue
args = sample.args
momentum = sample.kwargs.get('momentum', 0.5)
eps = sample.kwargs.get('eps', 1e-5)
if any(args[i] is None for i in range(4)):
continue
yield SampleInput(sample.input, args=(args[2], args[3], args[0], args[1], momentum, eps))
def sample_inputs_nn_activation_relu(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
cases = (
(()),
((S, )),
((S, S)),
((S, M, S))
)
for shape in cases:
yield SampleInput(make_arg(shape))
def sample_inputs_prelu(op_info, device, dtype, requires_grad, **kwargs):
op_kwargs = op_info.sample_kwargs(device, dtype, None)[0]
yield from sample_inputs_elementwise_unary(op_info, device, dtype, requires_grad,
op_kwargs=op_kwargs)
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
cases = (
(()),
((S, )),
((S, S)),
((S, M, S))
)
for shape in cases:
for weight in [-1., 0., 0.8, 1.]:
weight_tensor = torch.tensor(weight, device=device, dtype=dtype, requires_grad=requires_grad)
yield SampleInput(make_arg(shape), args=(weight_tensor,))
channel_size = shape[1] if len(shape) >= 2 else 1
yield SampleInput(make_arg(shape), args=(make_arg((channel_size,)),))
weight_tensor = torch.tensor(1., device=device, dtype=dtype, requires_grad=requires_grad)
yield SampleInput(make_arg((S, S)), kwargs=dict(weight=weight_tensor,))
yield SampleInput(make_arg((S, S)), kwargs=dict(weight=make_arg((S,)),))
def reference_inputs_prelu(op, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_prelu(op, device, dtype, requires_grad, **kwargs)
yield from reference_inputs_elementwise_unary(op, device, dtype, requires_grad, **kwargs)
def sample_kwargs_prelu_scalar_weight(device, dtype, input):
weight = torch.rand((), device=device, dtype=dtype)
# NumPy does not support bfloat16, so we default to float32 (only for NumPy) in that case
if dtype == torch.bfloat16:
weight_cpu = weight.to(dtype=torch.float32, device="cpu")
else:
weight_cpu = weight.cpu()
np_weight = weight_cpu.numpy()
return ({'weight': weight}, {'weight': np_weight})
def error_inputs_prelu(op, device):
# Weight has numel != 1, but self.ndim is zero-dim tensor
inp = make_tensor((), device=device, dtype=torch.float32)
weight = make_tensor((2,), device=device, dtype=torch.float32)
yield ErrorInput(SampleInput(inp, kwargs={'weight': weight}),
error_regex="Not allow zero-dim input tensor.")
# Weight has numel != 1, but numel does not match channel size
inp = make_tensor((2, 8, 3), device=device, dtype=torch.float32)
weight = make_tensor((9,), device=device, dtype=torch.float32)
yield ErrorInput(SampleInput(inp, kwargs={'weight': weight}),
error_regex="Mismatch of parameter numbers and input channel size.")
# Weight is neither a scalar nor 1-D tensor
inp = make_tensor((2, 8, 3), device=device, dtype=torch.float32)
weight = make_tensor((2, 4), device=device, dtype=torch.float32)
yield ErrorInput(SampleInput(inp, kwargs={'weight': weight}),
error_regex="prelu: Expected `weight` to be a scalar or 1D tensor, but got: ndim = 2")
# src and index tensors must have the same # of dimensions
def sample_inputs_norm(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# ord = inf is tested in inputs_norm_inf as it fails on some tests
cases = [
((S, S), (2,), '2'),
((S, S), (0,), '0'),
((S, S), (0.5,), '0_5'),
((S, S), (1,), '1'),
((S, S), (3,), '3'),
((S, S), (-1,), 'neg_1'),
((S, S), (-2,), 'neg_2'),
((S, S), (-0.5,), 'neg_0_5'),
((S, S), (-1.5,), 'neg_1_5'),
]
cases_nonzero_input = (
((S, S, S), (1.5,), '1_5_default'),
((S, S, S), (1.5, 1), '1_5_dim'),
((S, S, S), (1.5, -1), '1_5_neg_dim'),
((S, S, S), (1.5, 1, True), 'keepdim_1_5_dim'),
((S, S, S), (1.5, -1, True), 'keepdim_1_5_neg_dim'),
)
cases_posdim = (
((S, S), (-2, 1,), 'neg_2_dim'),
((S, S), (-1, 1,), 'neg_1_dim'),
((S, S), (0, 1,), '0_dim'),
((S, S), (1, 1,), '1_dim'),
((S, S), (2, 1,), '2_dim'),
((S, S), (3, 1,), '3_dim'),
((S, S, S), (2, 1), '2_dim'),
((S, S, S), (3, 1), '3_dim'),
((S, S, S), (2, 1, True), 'keepdim_2_dim'),
((S, S, S), (3, 1, True), 'keepdim_3_dim'),
((), (2, 0), '2_dim_scalar'),
((), (3, 0), '3_dim_scalar'),
((), (2, 0, True), 'keepdim_2_dim_scalar'),
((), (3, 0, True), 'keepdim_3_dim_scalar'),
)
cases_negdim = ((shape, args[:1] + (-args[1],) + args[2:], name.replace("_dim", "_neg_dim"))
for shape, args, name in cases_posdim)
for shape, args, name in itertools.chain(cases, cases_posdim, cases_negdim):
yield SampleInput(make_arg(shape), args=args, name=name)
for shape, args, name in cases_nonzero_input:
yield SampleInput(make_arg(shape, exclude_zero=True), args=args, name=name)
def sample_inputs_norm_fro(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
cases = (
((S, S), (), 'default'),
((S, S), ('fro',), 'fro_default'),
((S, S), ('fro', [0, 1],), 'fro'),
)
for shape, args, name in cases:
yield SampleInput(make_arg(shape), args=args, name=name)
def sample_inputs_norm_nuc(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
cases = (
((S, S), ('nuc',), 'nuc'),
((S, S, S), ('nuc', [1, 2]), 'nuc_batched'),
)
for shape, args, name in cases:
yield SampleInput(make_arg(shape), args=args, name=name)
def sample_inputs_norm_inf(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
cases = (
((S, S), (-inf,), '-inf'),
((S, S), (inf,), 'inf'),
((S, S), (inf, 1,), 'inf_2_dim'),
((S, S), (inf, -1,), 'inf_2_neg_dim'),
)
for shape, args, name in cases:
yield SampleInput(make_arg(shape), args=args, name=name)
def sample_inputs_equal(op, device, dtype, requires_grad, **kwargs):
make_arg = partial(
make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
shapes = (
((), ()),
((S,), ()),
((), (S,)),
((S, 1), (S,)),
((M, S), ()),
((S, S), (S, S))
)
for shape_lhs, shape_rhs in shapes:
lhs = make_arg(shape_lhs)
rhs = make_arg(shape_rhs)
broadcasts_input = shape_lhs != torch.broadcast_shapes(shape_lhs, shape_rhs)
yield SampleInput(lhs, args=(rhs,), broadcasts_input=broadcasts_input)
if shape_lhs == shape_rhs:
yield SampleInput(lhs, args=(lhs.clone().detach_(),))
def sample_inputs_jiterator(op, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
shapes = (
((), ()),
((S,), ()),
((S, 1), (S,)),
((M, S), ()),
((S, M, S), (M, S)),
((S, M, S), (S, M, S)),
((M, 1, S), (M, S)),
((M, 1, S), (1, M, S)),
((0, 1, 3), (0, 10, 3))
)
num_inputs = kwargs.get('num_inputs')
sample_kwargs = kwargs.get('sample_kwargs', {})
for shape_lhs, shape_rhs in shapes:
lhs = make_arg(shape_lhs)
args = [make_arg(shape_rhs) for _ in range(num_inputs - 1)]
broadcasts_input = (shape_lhs != torch.broadcast_shapes(shape_lhs, shape_rhs))
yield SampleInput(lhs, args=tuple(args), kwargs=sample_kwargs, broadcasts_input=broadcasts_input)
def sample_inputs_broadcast_shapes(op, device, dtype, requires_grad, **kwargs):
shapes = (
((), ()),
((S,), ()),
((S, 1), (S,)),
((S, 1), S),
((M, S), ()),
((S, M, S), (M, S)),
((S, M, S), (S, M, S)),
((M, 1, S), (M, S)),
((M, 1, S), (1, M, S)),
((0, 1, 3), (0, 10, 3))
)
for shape in shapes:
inp, *arg0 = shape
yield SampleInput(inp, args=tuple(arg0))
def sample_inputs_add_sub(op, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_elementwise_binary(op, device, dtype, requires_grad, **kwargs)
# Adds alpha kwarg cases
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
lhs = make_arg((S, S), **op.lhs_make_tensor_kwargs)
rhs = make_arg((S, S), **op.rhs_make_tensor_kwargs)
if dtype is not torch.bool:
yield SampleInput(lhs, args=(rhs,), kwargs={'alpha': 2})
else:
yield SampleInput(lhs, args=(rhs,), kwargs={'alpha': True})
neg_alpha = -3.125 if (dtype.is_floating_point or dtype.is_complex) else -3
lhs = make_arg((S, S), **op.lhs_make_tensor_kwargs)
rhs = make_arg((S, S), **op.rhs_make_tensor_kwargs)
if dtype is not torch.bool:
yield SampleInput(lhs, args=(rhs,), kwargs={'alpha': neg_alpha})
else:
yield SampleInput(lhs, args=(rhs,), kwargs={'alpha': False})
def error_inputs_arange(op, device, **kwargs):
yield ErrorInput(SampleInput(0, args=(3, 0)), error_type=RuntimeError, error_regex='step must be nonzer')
yield ErrorInput(SampleInput(0, args=(-3, 2)), error_type=RuntimeError, error_regex='bound inconsistent with step sign')
yield ErrorInput(SampleInput(0, args=(3, -2)), error_type=RuntimeError, error_regex='bound inconsistent with step sign')
yield ErrorInput(SampleInput(0, args=(float('inf'), 2)), error_type=RuntimeError, error_regex='unsupported range')
yield ErrorInput(SampleInput(float('-inf'), args=(1, 2)), error_type=RuntimeError, error_regex='unsupported range')
def sample_inputs_arange(op, device, dtype, requires_grad, **kwargs):
int_samples = (
# positive direction
(-1, 2, 2),
# negative direction
(2, -3, -1),
# start == end
(1, 1, 1),
(1, 1, -1),
# divides evenly
(0, -8, -4),
(1, 5, 2),
# bool
(False, True, True),
# default step
(0, 1, None),
# default start
(None, 3, None),
)
def to_float(start, end, step):
start = start + 0.1 if start is not None else None
end = end + 0.1
step = float(step) if step is not None else None
return start, end, step
float_samples = (
# includes endpoint
(0., -8. - 1e-6, -4.),
(1., 5. + 1e-6, 2.),
(0., -8., -4.),
(1., 5., 2.),
*(to_float(start, end, step) for (start, end, step) in int_samples),
)
large_samples = (
(0, 10000, None),
)
samples = int_samples + float_samples
if dtype not in (torch.int8, torch.uint8):
samples += large_samples
for start, end, step in samples:
if start is None:
assert step is None
# Pass end as positional arg
yield SampleInput(end, kwargs={"dtype": dtype, "device": device})
# (Similar to) calling torch.arange(end=3)
yield SampleInput(0, kwargs={"end": end, "dtype": dtype, "device": device})
elif step is None:
yield SampleInput(start, args=(end,), kwargs={"dtype": dtype, "device": device})
else:
yield SampleInput(start, args=(end, step), kwargs={"dtype": dtype, "device": device})
yield SampleInput(2)
yield SampleInput(1, args=(3, 1))
def sample_inputs_randn(op, device, dtype, requires_grad, **kwargs):
shapes = (
(M,),
(S, S)
)
for shape in shapes:
yield SampleInput(input=shape, kwargs=dict(dtype=dtype, device=device, requires_grad=requires_grad))
def sample_inputs_normal(op, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=False)
samples = (
((S, S), 0, 5),
((S, S, S), -2, 0.5),
)
for shape, mean, std in samples:
yield SampleInput(make_arg(shape), args=(mean, std))
def error_inputs_normal(op, device, **kwargs):
t = torch.zeros([10], device=device)
invalid_std = -1
yield ErrorInput(
SampleInput(t, args=(0, invalid_std)),
error_type=RuntimeError,
error_regex=fr"normal expects std >= 0.0, but found std {invalid_std}",
)
def sample_inputs_cauchy(op, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=False)
samples = (
((M,), 0, 0.5),
((S, S), 0, 1),
((S, S, S), -2, 1),
)
for shape, median, gamma in samples:
yield SampleInput(make_arg(shape), args=(median, gamma))
def error_inputs_cauchy(op, device, **kwargs):
t = torch.zeros([10], device=device)
invalid_scale = 0
yield ErrorInput(
SampleInput(t, args=(0, invalid_scale,)),
error_type=RuntimeError,
error_regex=fr"cauchy_ expects sigma > 0.0, but found sigma={invalid_scale}",
)
def sample_inputs_exponential(op, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=False)
samples = (
((M,), 0.5),
((S, S), 1),
((S, S, S), 1.5),
)
for shape, rate in samples:
yield SampleInput(make_arg(shape), args=(rate,))
def error_inputs_exponential(op, device, **kwargs):
t = torch.zeros([10], device=device)
invalid_rate = 0
yield ErrorInput(
SampleInput(t, args=(invalid_rate,)),
error_type=RuntimeError,
error_regex=fr"exponential_ expects lambda > 0.0, but found lambda={invalid_rate}",
)
def sample_inputs_geometric(op, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=False)
samples = (
((M,), 0.2),
((S, S), 0.5),
((S, S, S), 0.8),
)
for shape, rate in samples:
yield SampleInput(make_arg(shape), args=(rate,))
def error_inputs_geometric(op, device, **kwargs):
t = torch.zeros([10], device=device)
neg_prob = -1
yield ErrorInput(
SampleInput(t, args=(neg_prob,)),
error_type=RuntimeError,
error_regex=fr"geometric_ expects p to be in \(0, 1\), but got p={neg_prob}",
)
def sample_inputs_log_normal(op, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=False)
samples = (
((M,), 0, 0.25),
((S, S), 0.5, 1),
((S, S, S), 0, 0.5),
)
for shape, mean, std in samples:
yield SampleInput(make_arg(shape), args=(mean, std))
def error_inputs_log_normal(op, device, **kwargs):
t = torch.zeros([10], device=device)
invalid_std = 0
yield ErrorInput(
SampleInput(t, args=(0, invalid_std)),
error_type=RuntimeError,
error_regex=fr"log_normal_ expects std > 0.0, but found std={invalid_std}",
)
def sample_inputs_uniform(op, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=False)
samples = (
((M,), -100, 100),
((S, S), 0, 1),
((S, S, S), 1, 2),
)
for shape, hi, lo in samples:
yield SampleInput(make_arg(shape), args=(hi, lo))
def sample_inputs_ones_zeros(op, device, dtype, requires_grad, **kwargs):
# this is a bit messy, as we want the args to be tuples
# so if we pass size as a tuple, we have a tuple containing a tuple
sizes = (
(M,),
(S, S),
)
for size in sizes:
yield SampleInput(size, kwargs={'dtype': dtype, 'device': device})
def sample_inputs_full(op, device, dtype, requires_grad, **kwargs):
def get_val(dtype):
return make_tensor([], dtype=dtype, device="cpu").item()
sizes = (
(M,),
(S, S),
)
fill_values = [get_val(dtype), get_val(torch.int)]
for size, fill_value in product(sizes, fill_values):
yield SampleInput(size, fill_value, dtype=dtype, device=device)
def error_inputs_uniform(op, device, **kwargs):
t = torch.zeros([10], device=device)
yield ErrorInput(
SampleInput(t, args=(3, -1)),
error_type=RuntimeError,
error_regex=r"uniform_ expects to return a \[from, to\) range, but found from=3 > to=-1",
)
def error_inputs_linspace(op, device, **kwargs):
yield ErrorInput(SampleInput(0, args=(3, -1)), error_type=RuntimeError, error_regex='number of steps must be non-negative')
yield ErrorInput(
SampleInput(0, args=(3, 1.)),
error_type=TypeError,
error_regex="received an invalid combination of arguments - got \\(int, int, float",
)
yield ErrorInput(
SampleInput(torch.tensor([1, 1], device=device), args=(torch.tensor([3, 3], device=device), 1)),
error_type=RuntimeError,
error_regex="only supports 0-dimensional start and end tensors"
)
def sample_inputs_linspace(op, device, dtype, requires_grad, **kwargs):
ends = (-3, 0, 1, 4, 50)
starts = (-2., 0, 4.3, 50)
nsteps = (0, 1, 50)
# Extra case to replicate off-by-one issue on CUDA
cases = list(product(starts, ends, nsteps)) + [(0, 7, 50)]
for start, end, nstep in cases:
if dtype == torch.uint8 and (end < 0 or start < 0):
continue
yield SampleInput(start, args=(end, nstep), kwargs={"dtype": dtype, "device": device})
yield SampleInput(1, args=(3, 1))
def sample_inputs_linspace_tensor_overload(op, device, dtype, requires_grad, **kwargs):
ends = (-3, 0, 1, 4, 50)
starts = (-2., 0, 4.3, 50)
nsteps = (0, 1, 50)
is_start_end_tensors = ((True, True), (True, False), (False, True))
make_arg = partial(torch.tensor, device=device, requires_grad=False)
# Extra case to replicate off-by-one issue on CUDA
cases = list(product(starts, ends, nsteps, is_start_end_tensors)) + [(0, 7, 50, (True, True))]
for start, end, nstep, (is_start_tensor, is_end_tensor) in cases:
if dtype == torch.uint8 and (end < 0 or start < 0):
continue
tensor_options = {"dtype": dtype, "device": device}
if is_start_tensor:
start = make_arg(start, dtype=torch.float32 if isinstance(start, float) else torch.int64)
if is_end_tensor:
end = make_arg(end, dtype=torch.float32 if isinstance(end, float) else torch.int64)
yield SampleInput(start, args=(end, nstep), kwargs=tensor_options)
yield SampleInput(1, args=(3, 1))
def sample_inputs_logspace(op, device, dtype, requires_grad, **kwargs):
ends = (-3, 0, 1.2, 2, 4)
starts = (-2., 0, 1, 2, 4.3)
nsteps = (0, 1, 2, 4)
bases = (2., 1.1) if dtype in (torch.int8, torch.uint8) else (None, 2., 3., 1.1, 5.)
for start, end, nstep, base in product(starts, ends, nsteps, bases):
if dtype == torch.uint8 and end < 0 or start < 0:
continue
if nstep == 1 and isinstance(start, float) and not (dtype.is_complex or dtype.is_floating_point):
# https://github.com/pytorch/pytorch/issues/82242
continue
if base is None:
yield SampleInput(start, args=(end, nstep), kwargs={"dtype": dtype, "device": device})
else:
yield SampleInput(start, args=(end, nstep, base), kwargs={"dtype": dtype, "device": device})
yield SampleInput(1, args=(3, 1, 2.))
def sample_inputs_logspace_tensor_overload(op, device, dtype, requires_grad, **kwargs):
ends = (-3, 0, 1.2, 2, 4)
starts = (-2., 0, 1, 2, 4.3)
nsteps = (0, 1, 2, 4)
bases = (2., 1.1) if dtype in (torch.int8, torch.uint8) else (None, 2., 3., 1.1, 5.)
is_start_end_tensors = ((True, True), (True, False), (False, True))
make_arg = partial(torch.tensor, device=device)
for start, end, nstep, base, (is_start_tensor, is_end_tensor) in product(starts, ends, nsteps, bases, is_start_end_tensors):
if dtype == torch.uint8 and end < 0 or start < 0:
continue
if nstep == 1 and isinstance(start, float) and not (dtype.is_complex or dtype.is_floating_point):
# https://github.com/pytorch/pytorch/issues/82242
continue
tensor_options = {"dtype": dtype, "device": device}
if (is_start_tensor):
start = make_arg(start, dtype=torch.float32 if isinstance(start, float) else torch.int64)
if (is_end_tensor):
end = make_arg(end, dtype=torch.float32 if isinstance(end, float) else torch.int64)
if base is None:
yield SampleInput(start, args=(end, nstep), kwargs=tensor_options)
else:
yield SampleInput(start, args=(end, nstep, base), kwargs=tensor_options)
yield SampleInput(1, args=(3, 1, 2.))
def sample_inputs_isclose(op, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_elementwise_binary(op, device, dtype, requires_grad, **kwargs)
# Creates additional inputs to test the rtol, atol, and equal_nan params
rtols = [0., 1e-7]
atols = [0., 1e-7]
equal_nans = [False, True]
products = product(rtols, atols, equal_nans)
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
for rtol, atol, equal_nan in products:
lhs = make_arg((S, S), **op.lhs_make_tensor_kwargs)
rhs = make_arg((S, S), **op.rhs_make_tensor_kwargs)
yield SampleInput(lhs, args=(rhs,),
kwargs=dict(rtol=rtol, atol=atol, equal_nan=equal_nan))
def error_inputs_isclose(op, device, **kwargs):
make_float_arg = partial(make_tensor, device=device, dtype=torch.float, requires_grad=False)
yield ErrorInput(SampleInput(make_float_arg(()), args=(make_float_arg(()),), kwargs={'rtol': -0.4}),
error_type=RuntimeError,
error_regex='rtol must be greater than or equal to zero')
yield ErrorInput(SampleInput(make_float_arg(()), args=(make_float_arg(()),), kwargs={'atol': -0.4}),
error_type=RuntimeError,
error_regex='atol must be greater than or equal to zero')
def sample_inputs_t(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
yield SampleInput(make_arg((1, 2)))
yield SampleInput(make_arg((2,)))
yield SampleInput(make_arg(()))
def sample_inputs_mm(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
def make_arg_conj(size):
return make_arg(size).conj().requires_grad_(requires_grad)
first_shape, second_shape = (S, M), (M, S)
yield SampleInput(make_arg(first_shape), args=(make_arg(second_shape),))
if dtype.is_complex:
yield SampleInput(make_arg(first_shape), args=(make_arg_conj(second_shape),))
# Matmul of empty matrices
yield SampleInput(make_arg((0, S)), args=(make_arg(S, M),))
yield SampleInput(make_arg((S, 0)), args=(make_arg(0, M),))
def sample_inputs_addmm(op_info, device, dtype, requires_grad, **kwargs):
alpha_val = kwargs.get('alpha', 2 + 3j if dtype.is_complex else 0.6)
beta_val = kwargs.get('beta', 1 + 2j if dtype.is_complex else 0.2)
tests_list = [
((2, 3), (2, 2), (2, 3), False),
((3, 3), (3, 3), (3, 3), False),
]
tests_with_lhs_broadcasting = [
((1,), (2, 2), (2, 3), True),
((), (2, 2), (2, 3), True),
]
test_cases = tests_list + tests_with_lhs_broadcasting # type: ignore[operator]
kwargs = dict(alpha=alpha_val, beta=beta_val)
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
for shape_a, shape_b, shape_c, broadcasts_input in test_cases:
yield SampleInput(
make_arg(shape_a),
make_arg(shape_b),
make_arg(shape_c),
**kwargs,
).with_metadata(broadcasts_input=broadcasts_input)
if dtype.is_complex:
shape = (3, 3)
yield SampleInput(
make_arg(shape),
make_arg(shape, requires_grad=False).mH.requires_grad_(requires_grad),
make_arg(shape),
**kwargs,
)
yield SampleInput(
make_arg(shape),
make_arg(shape),
make_arg(shape, requires_grad=False).mH.requires_grad_(requires_grad),
**kwargs,
)
# addmm of empty matrices
if dtype.is_floating_point:
yield SampleInput(make_arg(S, M), make_arg(S, 0), make_arg(0, M), **kwargs)
# empty matmul with broadcastable input
yield SampleInput(make_arg(M), make_arg(S, 0), make_arg(0, M), **kwargs).with_metadata(broadcasts_input=True)
def sample_inputs_sparse_sampled_addmm(op_info, device, dtype, requires_grad, **kwargs):
alpha = 2 + 3j if dtype.is_complex else 0.6
beta = 1 + 2j if dtype.is_complex else 0.2
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# sparse.sampled_addmm performs: alpha * (A @ B) * sparse_ones_like(C) + beta * C
for m, n, k in itertools.product([0, 5], repeat=3):
yield SampleInput(
torch.eye(m, n, device=device, dtype=dtype)
.to_sparse_csr()
.requires_grad_(requires_grad),
make_arg((m, k)),
make_arg((k, n)),
alpha=alpha,
beta=beta,
)
def sample_inputs_sparse_mm_reduce(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
reductions = ["sum", "mean", "amax", "amin"]
for m, k, reduce in product([5, 7], [3, 11], reductions):
yield SampleInput(
torch.eye(m, m)
.to(device=device, dtype=dtype)
.to_sparse_csr()
.requires_grad_(requires_grad),
make_arg((m, k)),
reduce,
)
def sample_inputs_mv(self, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, low=None, high=None, requires_grad=requires_grad)
yield SampleInput(make_arg(S, M), make_arg(M))
def sample_inputs_bmm(self, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, low=None, high=None, requires_grad=requires_grad)
yield SampleInput(make_arg(M, S, M), make_arg(M, M, S))
def sample_inputs_dot_vdot(self, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
def make_arg_conj(size):
return make_arg(size).conj().requires_grad_(requires_grad)
yield SampleInput(make_arg((S, )), make_arg((S, )))
if dtype.is_complex:
# dot/vdot for (conj(input), conj(arg_tensor)) and (conj(input), arg_tensor)
# is tested in test_conj_view (which tests operations with only conjugated input tensor
# -- not conjugated arg tensors)
yield SampleInput(make_arg((S, )), make_arg_conj((S, )))
def error_inputs_dot_vdot(op_info, device, is_ref=False, **kwargs):
make_input = partial(make_tensor, device=device, dtype=torch.float32)
yield ErrorInput(SampleInput(make_input(1), args=(make_input(3, dtype=torch.float16),)),
error_regex='dot : expected both vectors to have same dtype')
yield ErrorInput(SampleInput(make_input(1, 1), args=(make_input(3),)),
error_regex='1D tensors expected')
yield ErrorInput(SampleInput(make_input(9), args=(make_input(3),)),
error_regex='inconsistent tensor size')
if device != "cpu" and not is_ref:
yield ErrorInput(SampleInput(make_input(3), args=(make_input(3, device="cpu"),)),
error_regex='Expected all tensors to be on the same device')
def sample_inputs_addmv(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
test_cases = (((S,), (S, M), (M,), 1, 1, False),
((S,), (S, M), (M,), 0.2, 0.6, False),
)
test_cases_with_broadcast = (((1,), (S, M), (M,), 1, 1, True),
((1,), (S, M), (M,), 0.2, 0.6, True),
((), (S, M), (M,), 1, 1, True),
((), (S, M), (M,), 0.2, 0.6, True),
)
cases = test_cases + test_cases_with_broadcast
# addmv performs: beta * M + alpha * (mat @ vec)
for size, mat, vec, beta, alpha, broadcasts_input in cases:
yield SampleInput(make_arg(size), args=(make_arg(mat), make_arg(vec)),
kwargs=dict(beta=beta, alpha=alpha), broadcasts_input=broadcasts_input)
def sample_inputs_addbmm(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# input_shape, batch1_shape, batch2_shape, beta_val, alpha_val, is_broadcasting
test_cases = [((S, M), (S, S, S), (S, S, M), 1, 1, False),
((1,), (S, S, S), (S, S, M), 1, 1, True),
((S, M), (S, S, S), (S, S, M), 0.6, 0.2, False),
((1,), (S, S, S), (S, S, M), 0.6, 0.2, True),
((), (S, S, S), (S, S, M), 1, 1, True),
((), (S, S, S), (S, S, M), 0.6, 0.2, True),
]
for input_shape, batch1_shape, batch2_shape, beta, alpha, is_broadcasting in test_cases:
if dtype.is_complex:
beta_complex, alpha_complex = beta * (1 + 2j), alpha * (2 + 3j)
yield SampleInput(make_arg(input_shape), args=(make_arg(batch1_shape), make_arg(batch2_shape)),
kwargs=dict(beta=beta_complex, alpha=alpha_complex), broadcasts_input=is_broadcasting)
yield SampleInput(make_arg(input_shape), args=(make_arg(batch1_shape), make_arg(batch2_shape)),
kwargs=dict(beta=beta, alpha=alpha), broadcasts_input=is_broadcasting)
def sample_inputs_addcmul_addcdiv(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
test_cases = [(((S, S), (S, S), (S, S)), False),
(((S, S), (S, 1), (1, S)), False),
(((1,), (S, S, 1), (1, S)), True),
(((), (), ()), False),
(((S, S), (), ()), True),
(((), (S, S, 1), (1, S)), True)
]
for input_args, broadcasts_input in test_cases:
# addcdiv should accept inputs with zero value
# Currently, it throws ZeroDivisionError when the denominator is zero
# TODO: exclude_zeros can be removed after https://github.com/pytorch/pytorch/issues/73638 is fixed
args = tuple(make_arg(arg, exclude_zero=True) if isinstance(arg, tuple) else arg
for arg in input_args)
yield SampleInput(*args).with_metadata(broadcasts_input=broadcasts_input)
# addcdiv should accept inputs with zero value
# Currently, it throws ZeroDivisionError when the denominator is zero
# TODO: exclude_zeros can be removed after https://github.com/pytorch/pytorch/issues/73638 is fixed
args = tuple(make_arg(arg, exclude_zero=True) if isinstance(arg, tuple) else arg
for arg in input_args)
yield SampleInput(
*args, value=3.14 if dtype.is_floating_point or dtype.is_complex else 3
).with_metadata(broadcasts_input=broadcasts_input)
def reference_inputs_addcmul_addcdiv(op_info, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_addcmul_addcdiv(
op_info, device, dtype, requires_grad, **kwargs)
# type promotion cases
supported_dtypes = op_info.supported_dtypes(device)
make_arg = partial(make_tensor, device=device, requires_grad=requires_grad)
types = (
(torch.float64, torch.complex128),
(torch.bfloat16, torch.float32),
)
values = (
None,
True, False,
3.14, 3,
1.0, 1,
0.0, 0,
-3.14, -3,
3.14 + 2.71j,
)
for (type2, type3), value in product(types, values):
if (type2 not in supported_dtypes or
type3 not in supported_dtypes):
continue
# RuntimeError: value cannot be converted without overflow
if (type(value) is complex and
type2 is not torch.complex128):
continue
arg1 = make_arg([5, 5], dtype=dtype)
arg2 = make_arg([5, 5], dtype=type2)
arg3 = make_arg([1, 5], dtype=type3)
# TypeError: addcdiv(): argument 'value' must be Number, not NoneType
if value is not None:
yield SampleInput(arg1, args=(arg2, arg3), kwargs=dict(value=value))
else:
yield SampleInput(arg1, args=(arg2, arg3))
def sample_inputs_baddbmm(op_info, device, dtype, requires_grad, **kwargs):
test_cases = [((S, S, M), (S, S, S), (S, S, M), 1, 1, False),
((1,), (S, S, S), (S, S, M), 1, 1, True),
((S, S, M), (S, S, S), (S, S, M), 0.6, 0.2, False),
((1,), (S, S, S), (S, S, M), 0.6, 0.2, True),
((), (S, S, S), (S, S, M), 1, 1, True),
((), (S, S, S), (S, S, M), 0.6, 0.2, True),
]
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad, low=None, high=None)
for (input_shape, batch1_shape, batch2_shape, alpha, beta, broadcasts_input) in test_cases:
yield SampleInput(
make_arg(input_shape),
make_arg(batch1_shape),
make_arg(batch2_shape),
beta=beta,
alpha=alpha
).with_metadata(broadcasts_input=broadcasts_input)
if dtype.is_complex:
yield SampleInput(
make_arg(input_shape),
make_arg(batch1_shape),
make_arg(batch2_shape),
beta=beta * (1 + 2j),
alpha=alpha * (2 + 3j),
).with_metadata(broadcasts_input=broadcasts_input)
if dtype.is_complex:
shapes = [(S, S, S), (S, M, S), (S, S, M)]
args = tuple(make_arg(s) for s in shapes)
yield SampleInput(
args[0].transpose_(-1, 1),
args[1].transpose(-1, 1).conj().requires_grad_(requires_grad),
args[2].transpose(-1, 1).conj().requires_grad_(requires_grad),
beta=beta * (1 + 2j),
alpha=alpha * (2 + 3j),
)
# TODO: add reduction kwargs
def sample_inputs_multilabel_soft_margin_loss(op_info, device, dtype, requires_grad, **kwargs):
_make_tensor = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
shapes = (
(S,),
(S, S),
)
for shape in shapes:
# Produce one with weight and one without.
yield SampleInput(_make_tensor(shape), args=(_make_tensor(shape, requires_grad=False),), kwargs={})
yield SampleInput(_make_tensor(shape), args=(_make_tensor(shape, requires_grad=False),),
kwargs={'weight': _make_tensor(shape, requires_grad=False)})
def sample_inputs_addr(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(
make_tensor, device=device, dtype=dtype, requires_grad=requires_grad, low=None, high=None
)
yield SampleInput(make_arg(S, M), make_arg(S), make_arg(M))
yield SampleInput(make_arg(), make_arg(S), make_arg(M)).with_metadata(broadcasts_input=True)
if dtype.is_complex:
alpha, beta = 0.1 + 0.3j, 0.4 + 0.6j
elif dtype.is_floating_point:
alpha, beta = 0.2, 0.6
else:
alpha, beta = 2, 3
yield SampleInput(make_arg(S, M), make_arg(S), make_arg(M), beta=beta, alpha=alpha)
yield SampleInput(
make_arg(),
make_arg(S),
make_arg(M),
beta=beta,
alpha=alpha,
).with_metadata(broadcasts_input=True)
# These samples fail gradcheck
if dtype.is_floating_point and not requires_grad:
tensor_options = dict(device=device, dtype=dtype, requires_grad=requires_grad)
yield SampleInput(
torch.tensor([[math.nan]], **tensor_options),
torch.tensor([0.0], **tensor_options),
torch.tensor([0.0], **tensor_options),
beta=0.0,
alpha=0.0,
).with_metadata(broadcasts_input=True)
yield SampleInput(
torch.tensor([[0.0]], **tensor_options),
torch.tensor([math.nan], **tensor_options),
torch.tensor([math.nan], **tensor_options),
beta=0.0,
alpha=0.0,
).with_metadata(broadcasts_input=True)
def sample_inputs_zero_(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
cases = ((), (S, S, S), (S,))
for shape in cases:
yield SampleInput(make_arg(shape))
def sample_inputs_multi_margin_loss(op_info, device, dtype, requires_grad, **kwargs):
_make_tensor = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
make_target = partial(_make_tensor, dtype=torch.long, requires_grad=False)
make_weight = partial(_make_tensor, requires_grad=False)
inputs = (
((), make_target([], low=0, high=1), {}),
((S,), make_target([], low=0, high=S), {"p": 1}),
((S,), make_target([1], low=0, high=S), {"p": 2}),
((S, M), make_target([S], low=0, high=M), {"margin": 1.0}),
((S, M), make_target([S], low=0, high=M), {"margin": -3.14}),
((M, S), make_target([M], low=0, high=S), {"weight": None}),
((M, S), make_target([M], low=0, high=S), {"weight": make_weight([S], low=-10., high=10.)}),
((M, S), make_target([M], low=0, high=S), {"reduction": "none"}),
((M, S), make_target([M], low=0, high=S), {"reduction": "mean"}),
((M, S), make_target([M], low=0, high=S), {"reduction": "sum"}),
)
for input_shape, target, kwargs in inputs:
yield SampleInput(_make_tensor(input_shape), args=(target,), kwargs=kwargs)
def reference_inputs_multi_margin_loss(op_info, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_multi_margin_loss(op_info, device, dtype, requires_grad, **kwargs)
_make_tensor = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
make_target = partial(_make_tensor, dtype=torch.long, requires_grad=False)
make_weight = partial(_make_tensor, requires_grad=False)
inputs = (
((), make_target([], low=0, high=1)),
((S,), make_target([], low=0, high=S)),
((S,), make_target([1], low=0, high=S)),
((M, S), make_target([M], low=0, high=S)),
)
ps = (1, 2)
margins = (0, 7, -3.14)
weights = (False, True)
reductions = (None, "none", "mean", "sum")
for (input_shape, target), p, margin, weight, reduction in product(inputs, ps, margins, weights, reductions):
input = _make_tensor(input_shape)
weight_shape = [input.size(-1)] if input.ndim > 0 else [1]
weight = make_weight(weight_shape, low=-10., high=10.) if weight else None
kwargs = {"p": p, "margin": margin, "weight": weight}
if reduction is not None:
kwargs["reduction"] = reduction
yield SampleInput(input, args=(target,), kwargs=kwargs)
def error_inputs_multi_margin_loss(op, device, **kwargs):
make_input = partial(make_tensor, device=device, dtype=torch.float32)
# invalid reduction
yield ErrorInput(SampleInput(make_input(5, 4), args=(make_input(5,),), kwargs={'reduction': 'abc'}),
error_type=ValueError, error_regex='abc is not a valid value for reduction')
# invalid input
yield ErrorInput(SampleInput(make_input(5, 0), args=(make_input(5,),), kwargs={}),
error_type=RuntimeError,
error_regex=r'Expected non-empty vector or matrix with optional 0-dim batch size, but got: \[5, 0\]')
yield ErrorInput(SampleInput(make_input(0,), args=(make_input(5,),), kwargs={}),
error_type=RuntimeError,
error_regex=r'Expected non-empty vector or matrix with optional 0-dim batch size, but got: \[0\]')
# invalid target
yield ErrorInput(SampleInput(make_input(5, 4), args=(make_input(5, 4),), kwargs={}),
error_type=RuntimeError, error_regex=r'inconsistent target size, expected 5 but got \[5, 4\]')
# invalid target dtype
yield ErrorInput(SampleInput(make_input(5, 4), args=(make_input(5,),), kwargs={}),
error_type=RuntimeError, error_regex='expected scalar type Long but found Float')
# invalid weight
yield ErrorInput(SampleInput(make_input(5, 4), args=(make_input(5,),), kwargs={'weight': make_input(())}),
error_type=ValueError, error_regex='weight must be one-dimensional')
yield ErrorInput(SampleInput(make_input(5, 4), args=(make_input(5,),), kwargs={'weight': make_input(5, 4)}),
error_type=ValueError, error_regex='weight must be one-dimensional')
yield ErrorInput(SampleInput(make_input(5, 4), args=(make_input(5,),), kwargs={'weight': make_input(5,)}),
error_type=RuntimeError, error_regex=r'inconsistent weight size, expected 4 but got \[5\]')
# invalid p
yield ErrorInput(SampleInput(make_input(5, 4), args=(make_input(5,),), kwargs={'p': 3}),
error_type=ValueError, error_regex='only p == 1 and p == 2 supported')
def sample_inputs_logsumexp(self, device, dtype, requires_grad, **kwargs):
inputs = (
((), (0,), True),
((S, S), (1,), True),
((S, S), (1,), False),
((S, S), (-2,), False),
((S, S), (0, 1), False),
)
# Test large inputs to check numerical stability
lows = (None, 1e3, 1e6) if dtype in (torch.float32, torch.float64, torch.complex64, torch.complex128) else (None,)
for low in lows:
high = low * 2 if low is not None else None
for shape, dim, keepdim in inputs:
t = make_tensor(shape, dtype=dtype, device=device,
low=low, high=high,
requires_grad=requires_grad)
yield SampleInput(t, dim, keepdim)
def reference_inputs_logsumexp(op, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_logsumexp(op, device, dtype, requires_grad, **kwargs)
# https://github.com/pytorch/pytorch/issues/91843
t = torch.tensor([20, 30, 100], dtype=dtype, device=device, requires_grad=requires_grad)
yield SampleInput(t, 0, False)
t = torch.tensor((), dtype=dtype, device=device, requires_grad=requires_grad)
yield SampleInput(t, 0, False)
# tests masking
# https://github.com/pytorch/pytorch/pull/91860#pullrequestreview-1241344073
t = torch.tensor(float("inf"))
yield SampleInput(t, 0, True)
def sample_inputs_like_fns(self, device, dtype, requires_grad, **kwargs):
inputs = [
((), {}),
((S, S), {}),
((0, S, 0), {}),
((S,), {'dtype': dtype, 'device': device}),
# Hard-code some dtypes/devices. We want to test cases where the
# (dtype, device) is different from the input's (dtype, device)
((S,), {'dtype': torch.double}),
((S,), {'device': 'cpu'}),
((S,), {'dtype': torch.double, 'device': 'cpu'}),
]
if torch.cuda.is_available():
inputs.append(((S,), {'device': 'cuda'}))
for shape, kwargs in inputs:
t = make_tensor(shape, dtype=dtype, device=device,
low=None, high=None,
requires_grad=requires_grad)
yield SampleInput(t, **kwargs)
def reference_inputs_like_fns(op, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_like_fns(op, device, dtype, requires_grad, **kwargs)
# shape
cases = (
(), (0,), (1, 0), (1, 1, 4, 5), (5, 3, 0, 1), (1, 4, 3, 1, 1)
)
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
for shape in cases:
yield SampleInput(make_arg(shape))
yield SampleInput(make_arg(shape).transpose(0, -1))
yield SampleInput(make_arg(shape, noncontiguous=True))
yield SampleInput(make_arg(shape, noncontiguous=True).transpose(0, -1))
def sample_inputs_multilabel_margin_loss(op_info, device, dtype, requires_grad, **kwargs):
_make_tensor = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
make_target = partial(_make_tensor, dtype=torch.long, requires_grad=False)
inputs = (
([], make_target([], low=0, high=1), {}),
([S], make_target([S], low=0, high=S), {}),
([M, S], make_target([M, S], low=0, high=S), {}),
([M, S], make_target([M, S], low=0, high=S), {"reduction": "none"}),
([M, S], make_target([M, S], low=0, high=S), {"reduction": "mean"}),
([M, S], make_target([M, S], low=0, high=S), {"reduction": "sum"}),
)
for shape, target, kwargs in inputs:
yield SampleInput(_make_tensor(shape), args=(target,), kwargs=kwargs)
def reference_inputs_multilabel_margin_loss(op_info, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_multilabel_margin_loss(op_info, device, dtype, requires_grad, **kwargs)
_make_tensor = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
make_target = partial(_make_tensor, dtype=torch.long, requires_grad=False)
make_target_tensor = partial(torch.tensor, device=device, dtype=torch.long, requires_grad=False)
inputs = (
# random tests including -1 target labels
([], make_target([], low=-1, high=1)),
([S], make_target([S], low=-1, high=S)),
([M, S], make_target([M, S], low=-1, high=S)),
# repeated target labels and -1 (labels after the first -1 are ignored)
([], make_target_tensor(-1)),
([7], make_target_tensor([2, 0, 6, -1, 4, -1, 6])),
([4, 5], make_target_tensor([[4, -1, 0, -1, 2], [0, 0, 4, 1, 4], [-1, 3, -1, 1, 0], [4, 3, 2, 1, 0]])),
)
reductions = (None, "none", "mean", "sum")
for (shape, target), reduction in product(inputs, reductions):
kwargs = {}
if reduction is not None:
kwargs["reduction"] = reduction
yield SampleInput(_make_tensor(shape), args=(target,), kwargs=kwargs)
def error_inputs_multilabel_margin_loss(op, device, **kwargs):
make_input = partial(make_tensor, device=device, dtype=torch.float32)
# invalid reduction
yield ErrorInput(SampleInput(make_input(5, 4), args=(make_input(5, 4),), kwargs={'reduction': 'abc'}),
error_type=ValueError, error_regex='abc is not a valid value for reduction')
# invalid input
yield ErrorInput(SampleInput(make_input(5, 0), args=(make_input(5, 4),), kwargs={}),
error_type=RuntimeError,
error_regex=r'Expected non-empty vector or matrix with optional 0-dim batch size, but got: \[5, 0\]')
yield ErrorInput(SampleInput(make_input(0,), args=(make_input(0,),), kwargs={}),
error_type=RuntimeError,
error_regex=r'Expected non-empty vector or matrix with optional 0-dim batch size, but got: \[0\]')
# invalid target
yield ErrorInput(SampleInput(make_input(5, 4), args=(make_input(4,),), kwargs={}),
error_type=RuntimeError,
error_regex=r'inconsistent target size: \[4\] for input of size: \[5, 4\]')
yield ErrorInput(SampleInput(make_input(5, 4), args=(make_input((),),), kwargs={}),
error_type=RuntimeError,
error_regex=r'inconsistent target size: \[\] for input of size: \[5, 4\]')
def get_independent_tensor(tensor):
return tensor.clone().requires_grad_(tensor.requires_grad)
def sample_inputs_randint(self, device, dtype, requires_grad, **kwargs):
low = 2
high = 10
for sample in sample_inputs_like_fns(self, device, dtype, requires_grad, **kwargs):
sample.kwargs.setdefault('device', device)
# With high
yield SampleInput(high, sample.input.shape, *sample.args, **sample.kwargs)
# With low and high
yield SampleInput(low, high, sample.input.shape, *sample.args, **sample.kwargs)
def sample_inputs_randint_like(self, device, dtype, requires_grad, **kwargs):
low = 2
high = 10
for sample in sample_inputs_like_fns(self, device, dtype, requires_grad, **kwargs):
# With high
yield SampleInput(
sample.input,
high,
*sample.args,
**sample.kwargs)
# With low and high
yield SampleInput(
get_independent_tensor(sample.input),
low,
high,
*sample.args,
**sample.kwargs)
def sample_inputs_margin_ranking_loss(op_info, device, dtype, requires_grad, **kwargs):
_make_tensor = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
shapes = (
(),
(S,),
(S, S),
(S, S, S),
)
margins = (0., 1.)
reductions = ('sum', 'mean', 'none')
for shape in shapes:
for margin, reduction in product(margins, reductions):
kwargs = {'margin': margin, 'reduction': reduction}
yield SampleInput(_make_tensor(shape),
args=(_make_tensor(shape, requires_grad=False),
_make_tensor(shape, requires_grad=False)),
kwargs=kwargs)
def reference_inputs_margin_ranking_loss(op, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_margin_ranking_loss(op, device, dtype, requires_grad, **kwargs)
make_input = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
for reduction in ('sum', 'mean', 'none'):
if dtype.is_floating_point: # only supports ints and floats
# NaN propagation
inp1 = make_input((10, ))
inp1[2] = float('nan')
inp2 = make_input((10, ))
inp2[4] = float('nan')
target = make_input((10, ))
inp2[9] = float('nan')
yield SampleInput(inp1, args=(inp2, target), kwargs={'reduction': reduction})
# Inf handling
inp1 = make_input((10, ))
inp2[1] = float('inf')
inp2 = make_input((10, ))
inp2[4] = float('inf')
target = make_input((10, ))
inp2[7] = float('inf')
yield SampleInput(inp1, args=(inp2, target), kwargs={'reduction': reduction})
# Broadcasting
inp1 = make_input((5, 2))
inp2 = make_input((5, 1))
target = make_input((1, 2))
yield SampleInput(inp1, args=(inp2, target), kwargs={'reduction': reduction})
def error_inputs_margin_ranking_loss(op, device, **kwargs):
make_input = partial(make_tensor, device=device, dtype=torch.float32)
# invalid reduction value.
yield ErrorInput(SampleInput(make_input(5, 4), args=(make_input(5, 4), make_input(5, 4),), kwargs={'reduction': 'abc'}),
error_type=ValueError, error_regex='is not a valid value')
# invalid input shapes
yield ErrorInput(SampleInput(make_input(5, 4), args=(make_input(5, 4), make_input(5,),)),
error_regex='margin_ranking_loss : All input tensors should')
def sample_inputs_new_fns(self, device, dtype, requires_grad, *, is_strided=False, **kwargs):
other_dtype = torch.half if torch.backends.mps.is_available() else torch.double
# input_shape, output_shape, strides, kwargs
# lengths of output_shape and strides must be equal
inputs = [
((), (), (), {}),
((S, S), (2, 0), (3, 4), {}),
((0, S, 0), (3, 2, 2), (1, 2, 3), {}),
((S,), (2, 3), (7, 8), {'dtype': dtype, 'device': device}),
# Hard-code some dtypes/devices. We want to test cases where the
# (dtype, device) is different from the input's (dtype, device)
((S,), (10,), (S,), {'dtype': other_dtype}),
((S,), (1, 1, 12), (S, L, M), {'device': 'cpu'}),
((S,), (2, 2, 2), (L, M, S), {'dtype': other_dtype, 'device': 'cpu'}),
]
if torch.cuda.is_available():
inputs.append(((S,), (7, 2), (3, 4), {'device': 'cuda'}))
for input_shape, output_shape, strides, kwargs in inputs:
t = make_tensor(input_shape, dtype=dtype, device=device,
low=None, high=None,
requires_grad=requires_grad)
if is_strided:
yield SampleInput(t, output_shape, strides, **kwargs)
else:
yield SampleInput(t, output_shape, **kwargs)
def sample_inputs_empty_strided(op, device, dtype, requires_grad=False, **kwargs):
inputs = [
((), (), {'dtype': dtype, 'device': device}),
((S,), (4,), {'dtype': dtype, 'device': device}),
((S, S), (2, 1), {'dtype': dtype, 'device': device}),
((S, S, S), (2, 0, 1), {'dtype': dtype, 'device': device}),
]
for shape, strides, kwargs in inputs:
yield SampleInput(shape, strides, requires_grad=requires_grad, **kwargs)
def sample_inputs_empty(op, device, dtype, requires_grad, **kwargs):
# shape
cases = (
(), (0,), (1,), (1, 3, 5), (5, 3, 1), (1, 0, 5, 1),
)
for case in cases:
yield SampleInput(case, device=device, dtype=dtype, requires_grad=requires_grad)
def sample_inputs_empty_permuted(op, device, dtype, requires_grad, **kwargs):
# shape
cases = (
(), (0,), (1,), (1, 3, 5), (5, 3, 1), (1, 0, 5, 1),
)
for case in cases:
for layout in itertools.permutations(range(len(case))):
yield SampleInput(case, layout, device=device, dtype=dtype, requires_grad=requires_grad)
def error_inputs_empty_permuted(op_info, device, **kwargs):
yield ErrorInput(
SampleInput((2,), args=((0, 1),)),
error_type=RuntimeError,
error_regex="Number of dimensions in size does not match the length of the physical_layout"
)
yield ErrorInput(
SampleInput((2,), args=((3,),)),
error_type=RuntimeError,
error_regex="Dimension out of range"
)
yield ErrorInput(
SampleInput((2, 3), args=((0, 0),)),
error_type=RuntimeError,
error_regex="Duplicate dim not allowed"
)
def sample_inputs_scalar_tensor(op, device, dtype, requires_grad, **kwargs):
# Not including a scalar tensor in vals because meta tests start failing due to
# lack of meta support for _local_scalar_dense
# torch.tensor(2, device=device)
vals = (-5, 0, 1)
for item in vals:
yield SampleInput(item, device=device, dtype=dtype, requires_grad=requires_grad)
def sample_inputs_eye(op, device, dtype, requires_grad, **kwargs):
# only ints >= 0 are allowed for both arguments, unless m is omitted
sizes = (None, 0, 1, 2, 3, 4, 7, L, M, S)
for n, m in product(sizes, sizes):
if n is None:
continue
# TODO: no layout
_kwargs = {'device': device, 'dtype': dtype, 'requires_grad': requires_grad}
if m is None:
yield SampleInput(n, args=(), kwargs=_kwargs)
else:
yield SampleInput(n, args=(m,), kwargs=_kwargs)
def error_inputs_eye(op_info, device, **kwargs):
# TODO: no layout
_kwargs = {'device': device, 'dtype': torch.float32}
yield ErrorInput(
SampleInput(-1, args=(), kwargs=_kwargs),
error_regex="n must be greater or equal to 0, got -1"
)
yield ErrorInput(
SampleInput(-7, args=(42,), kwargs=_kwargs),
error_regex="n must be greater or equal to 0, got -7"
)
yield ErrorInput(
SampleInput(0, args=(-3,), kwargs=_kwargs),
error_regex="m must be greater or equal to 0, got -3"
)
def sample_inputs_new_full(self, device, dtype, requires_grad, **kwargs):
def get_val(dtype):
return make_tensor([], dtype=dtype, device="cpu").item()
for sample in sample_inputs_new_fns(self, device, dtype, requires_grad, **kwargs):
# The scalar we are passing to new_full must be the same dtype
# as the one of the resulting tensor
use_dtype = sample.kwargs['dtype'] if 'dtype' in sample.kwargs else dtype
yield SampleInput(
sample.input, *sample.args, get_val(use_dtype), **sample.kwargs)
def sample_inputs_full_like(self, device, dtype, requires_grad, **kwargs):
def get_val(dtype):
return make_tensor([], dtype=dtype, device="cpu").item()
inputs = [
((), get_val(dtype), {}),
((S, S), get_val(dtype), {}),
((0, S, 0), get_val(dtype), {}),
((S,), get_val(dtype), {'dtype': dtype, 'device': device}),
# Hard-code some dtypes/devices. We want to test cases where the
# (dtype, device) is different from the input's (dtype, device)
((S,), get_val(torch.double), {'dtype': torch.double}),
((S,), get_val(dtype), {'device': 'cpu'}),
((S,), get_val(torch.double), {'dtype': torch.double, 'device': 'cpu'}),
]
if torch.cuda.is_available():
inputs.append(((S,), get_val(dtype), {'device': 'cuda'}))
if not dtype.is_signed:
# For unsigned dtypes, negative values are converted.
inputs.append(((S,), -get_val(dtype), {}))
for shape, fill_value, kwargs in inputs:
t = make_tensor(shape, dtype=dtype, device=device,
low=None, high=None,
requires_grad=requires_grad)
yield SampleInput(t, fill_value, **kwargs)
def sample_inputs_multinomial(self, device, dtype, requires_grad, **kwargs):
cases = [
([3], 3, {}),
([10], 3, {}),
([3, 10], 3, {}),
([3], 3, dict(replacement=False)),
([3], 3, dict(replacement=True)),
([3, 4], 4, dict(replacement=True)),
([3, 4], 4, dict(replacement=False)),
]
for shape, num_samples, kwargs in cases:
t = make_tensor(shape, dtype=dtype, device=device,
low=0, high=None,
requires_grad=requires_grad)
yield SampleInput(t, num_samples, **kwargs)
def sample_inputs_normal_common(self, device, dtype, requires_grad, cases, **kwargs):
def get_value_or_make_tensor(value_or_shape):
if isinstance(value_or_shape, list):
return make_tensor(value_or_shape, dtype=dtype, device=device,
low=0, high=None,
requires_grad=requires_grad)
return value_or_shape
for value_or_mean_shape, value_or_std_shape, kwargs in cases:
mean = get_value_or_make_tensor(value_or_mean_shape)
std = get_value_or_make_tensor(value_or_std_shape)
yield SampleInput(mean, std, **kwargs)
def sample_inputs_normal_tensor_first(self, device, dtype, requires_grad, **kwargs):
# value_or_size, value_or_size, kwargs
cases = [
([], [], {}),
([3], [3], {}),
([3, 4, 2], [3, 4, 2], {}),
([2, 3], 1.1, {}),
([1, 2, 3], [5, 2, 3], {}), # broadcasting
]
return sample_inputs_normal_common(self, device, dtype, requires_grad, cases, **kwargs)
def sample_inputs_normal_tensor_second(self, device, dtype, requires_grad, **kwargs):
yield SampleInput(1.6, 0.3, [2, 3], dtype=dtype, device=device)
yield SampleInput(1.6, 0.3, [2, 2, 2], dtype=dtype, layout=torch.strided, device=device)
yield SampleInput(2.7, make_tensor([4, 3], dtype=dtype, device=device, low=0, high=None, requires_grad=requires_grad))
def sample_inputs_bernoulli(self, device, dtype, requires_grad, **kwargs):
shapes = [
[3],
[],
[0, 3],
[2, 3, 4],
]
for shape in shapes:
t = make_tensor(shape, dtype=dtype, device=device,
low=0, high=1,
requires_grad=requires_grad)
yield SampleInput(t)
def error_inputs_bernoulli(op_info, device, **kwargs):
# more than one element of the written-to tensor refers to a single memory location
x = torch.rand((1,), device=device).expand((6,))
err_msg = 'unsupported operation'
yield ErrorInput(SampleInput(torch.rand_like(x), kwargs={'out': x}),
error_regex=err_msg)
def sample_inputs_logcumsumexp(self, device, dtype, requires_grad, **kwargs):
inputs = (
((S, S, S), 0),
((S, S, S), 1),
((), 0),
)
for large_number in (True, False):
for shape, dim in inputs:
t = make_tensor(shape, dtype=dtype, device=device,
low=None, high=None,
requires_grad=requires_grad)
if large_number and t.dim() > 0:
t[0] = 10000
yield SampleInput(t, dim)
def sample_inputs_trace(self, device, dtype, requires_grad, **kwargs):
yield SampleInput(
make_tensor((S, S), dtype=dtype, device=device,
low=None, high=None,
requires_grad=requires_grad))
def error_inputs_trace(op, device):
yield ErrorInput(SampleInput(make_tensor((3, 4, 5), dtype=torch.float32, device=device)), error_regex="expected a matrix")
def sample_inputs_renorm(self, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
cases = (((S, S, S), (2, 1, 0.5)),
((S, S, S), (2, -1, 0.5)),
((S, S, S), (1, 2, 3)),
((S, S, S), (float('inf'), 2, 0.5)),
)
for shape, args in cases:
yield SampleInput(make_arg(shape), args=args)
def sample_inputs_transpose_swapdims(self, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
cases = (((1, 2, 3), (-1, -2)),
((1, 2, 3), (-1, 2)),
((1, 2, 3), (1, -2)),
((1, 2, 3), (1, 2)),
((), (0, 0)),
((1, ), (0, 0)),
((M, M), (0, 1)),
((S, S, S), (2, 0)), )
for shape, args in cases:
yield SampleInput(make_arg(shape), args=args)
def _numpy_ref_transpose(a, dim0, dim1):
if a.ndim <= 1:
return a
return np.swapaxes(a, dim0, dim1)
def sample_inputs_adjoint(self, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
shapes = ((1, 2, 3), (M, M), (S, S, S), (S, M, S), (M, S, M, S))
return (SampleInput(make_arg(shape)) for shape in shapes)
def sample_inputs_T(self, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
shapes = ((M, M), (M, L))
return (SampleInput(make_arg(shape)) for shape in shapes)
def error_inputs_T(self, device, has_ndims_error=False):
make_arg = partial(make_tensor, device=device, dtype=torch.float32)
# Deprecated behavior in regular PyTorch, but throws an error in primTorch:
# https://github.com/pytorch/pytorch/issues/86968
if has_ndims_error:
# ndims == 1
yield ErrorInput(SampleInput(make_arg(M)),
error_regex=(r'The use of `x\.T` on tensors of dimension other than 0 or 2 '
r'to reverse their shape is not supported\.'))
# ndims > 2
yield ErrorInput(SampleInput(make_arg(M, S, L)),
error_regex=(r'The use of `x\.T` on tensors of dimension other than 0 or 2 '
r'to reverse their shape is not supported\.'))
def sample_inputs_singular_matrix_factors(op_info, device, dtype, requires_grad=False):
"""
This function produces two tensors of shape (*, m, k) and (*, n, k) with k <= min(m, n).
Their matrix product could be used to generate tensor of shape (*, m, n) of rank k.
"""
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
batches = [(), (2,)]
size = [3, 4]
for batch, m, n in product(batches, size, size):
k = 2
a = make_arg((*batch, m, k))
b = make_arg((*batch, n, k))
yield a, b
def sample_inputs_svd_lowrank(op_info, device, dtype, requires_grad=False, **kwargs):
# Function that's well defined on the outputs for complex inputs
def fn(usv):
U, S, V = usv
return U @ V.mH, S
for (a, b) in sample_inputs_singular_matrix_factors(op_info, device, dtype, requires_grad):
*batch, m, k = a.shape
n = b.shape[-2]
# NOTE: since svd_lowrank relies on non rank-revealing SVD,
# it inherits the problem of unstable behavior with repeated
# singular values including zeros.
# Since we want to avoid (repeated) zeros as singular values,
# we can only use k for q.
# This issues could be resolved with using a rank-revealing SVD
# which does not include "zero" singular values.
yield SampleInput(a, b, q=k, M=None).with_metadata(output_process_fn_grad=fn)
for (a, b) in sample_inputs_singular_matrix_factors(op_info, device, dtype, requires_grad):
*batch, m, k = a.shape
n = b.shape[-2]
M = make_tensor((*batch, m, n), dtype=dtype, device=device, requires_grad=requires_grad)
yield SampleInput(a, b, q=k, M=M).with_metadata(output_process_fn_grad=fn)
def chunk_iter(iterable, size):
it = iter(iterable)
while True:
chunk = tuple(islice(it, size))
if not chunk:
break
yield chunk
def sample_inputs_pca_lowrank(op_info, device, dtype, requires_grad=False, **kwargs):
# we reuse samples from svd_lowrank which come in group of two with
# kwarg['M'] = None and with kwarg['M'] = <some tensor>
samples = sample_inputs_svd_lowrank(op_info, device, dtype, requires_grad, **kwargs)
for s1, s2 in chunk_iter(samples, 2):
del s1.kwargs['M']
del s2.kwargs['M']
s1.kwargs['center'] = False
s2.kwargs['center'] = True
yield s1
yield s2
def np_sinc_with_fp16_as_fp32(x):
# Wraps numpy's sinc function so that fp16 values are promoted to fp32
# before sinc is invoked. Context: numpy's sinc returns NaN when evaluated
# at 0 for fp16.
if x.dtype == np.float16:
return np.sinc(x.astype(np.float32))
else:
return np.sinc(x)
def sample_inputs_broadcast_to(op_info, device, dtype, requires_grad, **kwargs):
test_cases = (
((S, 1, 1), (S, S, S)),
((S, 1, S), (S, S, S)),
((S, 1), (S, S, S)),
((1,), (S, S, S)),
((1, S), (1, 1, S)),
((), ()),
((), (1, 3, 2)),
)
return (
SampleInput(
make_tensor(size, dtype=dtype, device=device, low=None, high=None, requires_grad=requires_grad),
shape,
) for size, shape in test_cases)
def sample_inputs_broadcast_tensors(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
test_cases: tuple[tuple] = (((3,), (1, 2, 1), (1, 1), (5, 1, 1),),)
for shape, *other_shapes in test_cases:
yield SampleInput(make_arg(shape), args=tuple(make_arg(s) for s in other_shapes))
def reference_inputs_broadcast_tensors(op, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_broadcast_tensors(op, device, dtype, requires_grad, **kwargs)
m = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
n = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad, noncontiguous=True)
cases = (
((), (1, 1), (1, 1, 7, 1), (3, 1, 1)),
((3, 5, 6), (1, 3, 5, 6), (1, 1, 1, 1, 6), (8, 3, 5, 6))
)
for a, b, c, d in cases:
yield SampleInput(m(a), args=(m(b), m(c), m(d)))
yield SampleInput(n(a), args=(n(b), n(c), n(d)))
def sample_inputs_block_diag(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
test_cases: tuple[tuple] = (
((1, S), (2, S), (3, S),),
((S, 1), (S, 2), (S, 3),),
((1,), (2,), (3,),),
((2, S), (S,))
)
for shape, *other_shapes in test_cases:
yield SampleInput(make_arg(shape), args=tuple(make_arg(s) for s in other_shapes))
# We also want to test mixed complex-non-complex inputs to block_diag
if dtype == torch.complex32 or dtype == torch.complex64:
non_complex_dtype = torch.float32 if dtype == torch.complex32 else torch.float64
make_arg_non_complex = partial(make_tensor, dtype=non_complex_dtype, device=device, requires_grad=requires_grad)
yield SampleInput(make_arg_non_complex(shape), args=tuple(make_arg(s) for s in other_shapes))
def sample_inputs_cdist(op_info, device, dtype, requires_grad, **kwargs):
small_S = 2
test_cases = (
((S, S, 2), (S, S + 1, 2)),
((S, S), (S, S)),
((S, S, S), (S, S, S)),
((3, 5), (3, 5)),
((2, 3, 5), (2, 3, 5)),
((1, 2, 3), (1, 2, 3)),
((1, 1), (S, 1)),
((0, 5), (4, 5)),
((4, 5), (0, 5)),
((0, 4, 5), (3, 5)),
((4, 5), (0, 3, 5)),
((0, 4, 5), (1, 3, 5)),
((1, 4, 5), (0, 3, 5)),
# Using S here would make this one test take 9s
((small_S, small_S, small_S + 1, 2), (small_S, small_S, small_S + 2, 2)),
((small_S, 1, 1, small_S), (1, small_S, small_S)),
((1, 1, small_S), (small_S, 1, small_S, small_S)),
)
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
for cm in ['use_mm_for_euclid_dist', 'donot_use_mm_for_euclid_dist']:
# FIXME add an override for JIT and revert 0. back to 0
# since it's accepted by eager
for p in [0., 1., 2., 3., 0.5, 1.5, 2.5, float("inf")]:
for t1_size, t2_size in test_cases:
# The args should never be non-contiguous as this is not supported in the backward
yield SampleInput(make_arg(t1_size), make_arg(t2_size), p, cm)
def _fill_np(a, value):
a = a.copy()
a.fill(value)
return a
def _fill_sample_kwargs(device, dtype, input):
if dtype is torch.bool:
value = True
else:
value = 3
return ({'value': value}, {'value': value})
def sample_inputs_comparison_ops(op, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_elementwise_binary(op, device, dtype, requires_grad, **kwargs)
# Adds a sample input where both tensors have the same values
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
lhs = make_arg((S, S))
yield SampleInput(lhs, args=(lhs.clone(),))
def sample_inputs_stack(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# shape x number of tensors
cases = (
((3, 4), 1),
((1, 2, 1, 4), 3),
((0, 1, 0), 2),)
for shape, num_tensors in cases:
tensors = [make_arg(shape) for _ in range(num_tensors)]
for dim in range(-1, len(shape) - 1):
yield SampleInput(tensors, args=(dim,))
def sample_inputs_chunk_cat(op_info, device, dtype, requires_grad, **kwargs):
# 1. If input tensors have different ndims, dim should be non-negative and be less than the ndims of every input tensors.
# If all input tensors have the same ndims, we support both negative and non-negative dim.
# 2. For wrapped_dim, all tensors should have the same size for 0,...,wrapped_dim-1 dimensions.
# No requirements for (wrapped_dim, ...)-th dimension.
# 3. Expect positive num_chunks
# 4. Expect non-empty input tensor list and each input tensor should have at least 1 element
# 5. Non-contiguous input tensors are allowed.
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
same_ndim_cases = (
(
[
torch.Size([1, 2, 3]),
torch.Size([1, 2, 3]),
], -1, 5
),
(
[
torch.Size([1, 2, 129]),
torch.Size([1, 2, 297]),
], -1, 5
),
(
[
torch.Size([1, 2, 3]),
torch.Size([1, 2, 3]),
], 1, 5
),
(
[
torch.Size([3, 3, 2, 1]),
torch.Size([1, 4, 2, 2]),
torch.Size([2, 1, 3, 3]),
], 0, 2
),
)
for sizes, dim, num_chunks in same_ndim_cases:
tensors = [make_arg(size) for size in sizes]
yield SampleInput(tensors, args=(dim, num_chunks))
different_ndim_case = [
torch.Size([2, 3, 3]),
torch.Size([2, 3, 1, 2]),
torch.Size([2, 3]),
torch.Size([2, 3, 2]),
torch.Size([2, 3, 271]),
]
max_dim, num_chunks = 2, 3
for dim in range(max_dim):
tensors = []
for size in different_ndim_case:
tensors.append(make_arg(size))
yield SampleInput(tensors, args=(dim, num_chunks))
# non-contiguous
for dim in range(max_dim):
tensors = []
for size in different_ndim_case:
# make the last 2 dims column-major (i.e. non-contiguous)
t = make_arg(size).transpose(-2, -1).contiguous().transpose(-2, -1)
tensors.append(t)
yield SampleInput(tensors, args=(dim, num_chunks))
def error_inputs_chunk_cat(op_info, device, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=torch.float32)
# input tensors have different ndims but dim is negative
sizes, dim, num_chunks = [torch.Size([2, 3]), torch.Size([4,])], -1, 3
tensors = [make_arg(size) for size in sizes]
yield ErrorInput(
SampleInput(tensors, args=(dim, num_chunks)),
error_regex='_chunk_cat expects non-negative dim when input tensors have different ndims',
)
# input tensors have different ndims but dim >= ndim of some input tensors
sizes, dim, num_chunks = [torch.Size([2, 3]), torch.Size([4,])], 1, 3
tensors = [make_arg(size) for size in sizes]
yield ErrorInput(
SampleInput(tensors, args=(dim, num_chunks)),
error_regex='_chunk_cat expects dim < ndim for all input tensors',
)
# some tensors have different sizes for 0, ..., dim-1 dimensions.
sizes, dim, num_chunks = [torch.Size([2, 3, 4]), torch.Size([4, 3])], 1, 3
tensors = [make_arg(size) for size in sizes]
yield ErrorInput(
SampleInput(tensors, args=(dim, num_chunks)),
error_regex='_chunk_cat expects same sizes of 0,...,dim-1 dimensions for all tensors',
)
# negative num_chunks
sizes, dim, num_chunks = [torch.Size([2,]), torch.Size([3,])], 0, -1
tensors = [make_arg(size) for size in sizes]
yield ErrorInput(
SampleInput(tensors, args=(dim, num_chunks)),
error_regex='_chunk_cat expects positive num_chunks',
)
# zero as num_chunks
sizes, dim, num_chunks = [torch.Size([2,]), torch.Size([3,])], 0, 0
tensors = [make_arg(size) for size in sizes]
yield ErrorInput(
SampleInput(tensors, args=(dim, num_chunks)),
error_regex='_chunk_cat expects positive num_chunks',
)
# empty input tensor list
dim, num_chunks = 0, 1
yield ErrorInput(
SampleInput([], args=(dim, num_chunks)),
error_regex='_chunk_cat expects a non-empty input tensor list',
)
# empty input tensor with 0 elements
sizes, dim, num_chunks = [torch.Size([0,]), torch.Size([3,])], 0, 1
tensors = [make_arg(size) for size in sizes]
yield ErrorInput(
SampleInput(tensors, args=(dim, num_chunks)),
error_regex='_chunk_cat expects non-empty tensor',
)
def sample_inputs_cat_concat(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
cases: tuple[tuple, tuple, dict] = ( # type: ignore[assignment]
((S, S), (S, S), {'dim': -1}),
((S, S), (S, S), {'dim': 1}),
((M, S), (S, S), {'dim': 0}), # different shapes
((1, 2, 3), (1, 2, 3), {'dim': -2}),
((0,), (0,), {'dim': 0}), # empty tensor
((0,), (S, S), {'dim': 1}), # empty tensor with unempty and dim=1 (special case for legacy_cat_wrap_dim)
((0, S), (S, S), {'dim': 0}),
((1,), (1,), {}) # dim not passed, fallback to default
)
for input_shape1, input_shape2, kwargs in cases:
yield SampleInput([make_arg(input_shape1), make_arg(input_shape2)], kwargs=kwargs)
# from coat_lite_mini
yield SampleInput([make_arg((2, 2, 2, 2), memory_format=torch.channels_last)], args=(1,),)
def error_inputs_cat(op_info, device, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=torch.float32)
# error inputs for more than one element of the written-to tensor refer to a single memory location
yield ErrorInput(SampleInput([make_arg((S, S)), make_arg((S, S))],
kwargs={'out': make_arg((1, S)).expand((2 * S, S))}),
error_regex='unsupported operation')
# error inputs for empty tensors
yield ErrorInput(SampleInput([], kwargs={'dim': 1}),
error_regex='non-empty list of Tensors')
# error inputs for different sizes
yield ErrorInput(SampleInput([make_arg((S, S, L, L)), make_arg((S, 0, L - 1, L))], kwargs={'dim': 1}),
error_regex='Sizes of tensors must match except in dimension')
yield ErrorInput(SampleInput([make_arg((S, 0, L - 1, L)), make_arg((S, S, L, L))], kwargs={'dim': 1}),
error_regex='Sizes of tensors must match except in dimension')
# error inputs for different dimensions
yield ErrorInput(SampleInput([make_arg((S - 1, 0)), make_arg((S, 0, L - 1, L))], kwargs={'dim': 1}),
error_regex='Tensors must have same number of dimensions')
yield ErrorInput(SampleInput([make_arg((S, 0, L - 1, L)), make_arg((S - 1, 0))], kwargs={'dim': 1}),
error_regex='Tensors must have same number of dimensions')
# error inputs for same memory locations
x = torch.zeros((0), device=device)
y = torch.randn((4, 6), device=device)
err_msg = "the written-to tensor refer to a single memory location"
yield ErrorInput(SampleInput((x, y), kwargs={'dim': 0, 'out': x}),
error_regex=err_msg)
yield ErrorInput(SampleInput((x, y), kwargs={'dim': 0, 'out': y}),
error_regex=err_msg)
z = torch.zeros((4, 6), device=device)
yield ErrorInput(SampleInput((y, z), kwargs={'out': z[:2, :]}),
error_regex=err_msg)
# error inputs for different devices
if torch.device(device).type == 'cuda':
x_cuda = make_tensor((3, 3), device=device, dtype=torch.float32)
y_cpu = make_tensor((3, 3), device='cpu', dtype=torch.float32)
yield ErrorInput(SampleInput((x_cuda, y_cpu)),
error_regex='Expected all tensors to be on the same device')
# error inputs for different input sizes for more than 2 tensors
yield ErrorInput(SampleInput([make_arg((L, 1)), make_arg((L, 1, 1)), make_arg((L, 1, 1))]),
error_regex='Tensors must have same number of dimensions')
yield ErrorInput(SampleInput([make_arg((S, 1, M)), make_arg((S, 1, 1)), make_arg((S, M, 1))],
kwargs={'dim': 1}),
error_regex='Sizes of tensors must match')
# error inputs for None input
yield ErrorInput(SampleInput((make_arg((S, 1, 1)), None)), error_type=TypeError,
error_regex='got None')
# error inputs for zero-dimensional tensors
yield ErrorInput(SampleInput([make_arg(()), make_arg(())]),
error_regex='zero-dimensional.*cannot be concatenated')
# error inputs for different dtype of out tensors
d = make_tensor((2, 3), device=device, dtype=torch.double)
x = make_tensor((2, 3), device=device, dtype=torch.float32)
yield ErrorInput(SampleInput(x, kwargs={'out': d}), error_type=TypeError,
error_regex='invalid combination of arguments')
def reference_inputs_cat(op, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_cat_concat(op, device, dtype, requires_grad, **kwargs)
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# Noncontiguous type promoting tensors
a = make_arg((3, 4, 2))
b = make_arg((3, 2, 2), noncontiguous=True, dtype=torch.double)
c = make_arg((3, 3, 2), dtype=torch.float16).permute(1, 0, 2)
yield SampleInput((a, b, c), kwargs={'dim': 1})
# Special 1D tensor with dim length of 0 case
a = make_arg((0,))
b = make_arg((3, 2, 2))
yield SampleInput((a, b, a))
yield SampleInput((a, a, a))
def _elementwise_type_promo_np(*args, type_promotion_kind):
def _maybe_torch(x):
if isinstance(x, np.ndarray):
return torch.from_numpy(x)
return x
flattened = pytree.arg_tree_leaves(*args)
transformed = tuple(_maybe_torch(a) for a in flattened)
result_dtype, _ = prims.utils.elementwise_dtypes(
*transformed,
type_promotion_kind=type_promotion_kind)
return torch_to_numpy_dtype_dict[result_dtype]
def _cat_np(input_seq, dim=0):
inputs = tuple(a for a in input_seq if not (a.ndim == 1 and a.size == 0))
if len(inputs) == 0:
np_dtype = _elementwise_type_promo_np(
input_seq,
type_promotion_kind=prims.utils.ELEMENTWISE_TYPE_PROMOTION_KIND.NO_OPMATH)
return np.empty(0, dtype=np_dtype)
return np.concatenate(inputs, axis=dim)
def _floor_divide_np(a, b):
dtype = _elementwise_type_promo_np(
a,
b,
type_promotion_kind=prims.utils.ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT)
if isinstance(a, np.ndarray):
a = a.astype(dtype)
if isinstance(b, np.ndarray):
b = b.astype(dtype)
return np.floor_divide(a, b)
def sample_inputs_hstack_dstack_vstack(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
tensor_shapes = (
# First Tensor being 1-D is special
# case for hstack
((S,), (S,), (S,)),
((S, S), (S, S), (S, S)),
)
for s1, s2, s3 in tensor_shapes:
tensors = (make_arg(s1,), make_arg(s2,), make_arg(s3))
yield SampleInput(tensors)
def error_inputs_hstack_dstack_vstack(op, device):
make_arg = partial(make_tensor, dtype=torch.int32, device=device, requires_grad=False)
tensor_shapes = (
((S,), (S, S, S, S), (S,)),
)
for s1, s2, s3 in tensor_shapes:
tensors = (make_arg(s1,), make_arg(s2,), make_arg(s3))
# Different dimension tensor
yield ErrorInput(SampleInput(tensors), error_regex="Tensors must have same number of dimensions")
# empty tensor list
yield ErrorInput(SampleInput(()), error_regex="expects a non-empty TensorList")
def sample_inputs_unbind(op_info, device, dtype, requires_grad, **kwargs):
# Note: we don't do any tests where we unbind along 0-length dims
# because in that case unbind returns and empty tuple, and that breaks
# some assumptions in some backward tests in test_ops.py
shape_dims = (((S,), 0),
((S, S), 0),
((S, S), 1),
((S, S), -1),
((S, 0, S), 0),
((S, S, S), 1),
)
for shape, dim in shape_dims:
yield SampleInput(make_tensor(shape, dtype=dtype, device=device,
requires_grad=requires_grad),
args=(dim,))
def error_inputs_unbind(op_info, device):
make_arg = partial(make_tensor, dtype=torch.int32, device=device, requires_grad=False)
yield ErrorInput(SampleInput(make_arg(()), args=(0,)), error_type=IndexError,
error_regex="Dimension specified as 0 but tensor has no dimensions")
yield ErrorInput(SampleInput(make_arg((2,)), args=(2,)), error_type=IndexError,
error_regex="Dimension out of range")
def reference_unbind(t, dim):
"""A numpy implementation of torch.unbind"""
return tuple(s.squeeze(dim) for s in np.split(t, t.shape[dim], dim))
def sample_inputs_gather(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad, low=None, high=None)
yield SampleInput(
make_arg((M, S)),
0,
gather_variable((S, S), 1, M, True, device=device))
yield SampleInput(
make_arg((M, S)),
0,
gather_variable((S, S), 1, M, True, device=device).to(torch.int32))
yield SampleInput(
make_arg((M, S)),
1,
gather_variable((M, S // 2), 0, S, True, device=device))
# Empty index tensor case, see: https://github.com/pytorch/pytorch/pull/65006
yield SampleInput(
make_arg((S,)),
0,
torch.tensor([], dtype=torch.uint8, device=device))
yield SampleInput(
make_arg((S,)),
0,
torch.tensor([[], []], dtype=torch.uint8, device=device))
# 0D tensor case
yield SampleInput(
make_arg(()),
0,
torch.tensor([0], dtype=torch.int64, device=device))
yield SampleInput(
make_arg(()),
0,
torch.tensor(0, dtype=torch.int64, device=device))
def _fill_indices(idx, dim, dim_size, elems_per_row, m, n, o):
for i in range(1 if dim == 0 else m):
for j in range(1 if dim == 1 else n):
for k in range(1 if dim == 2 else o):
ii = [i, j, k]
ii[dim] = slice(0, idx.size(dim) + 1)
idx[tuple(ii)] = torch.randperm(dim_size)[0:elems_per_row]
def error_inputs_gather(op_info, device, **kwargs):
# src is [1, 2]
# [3, 4]
src = torch.tensor(((1, 2), (3, 4)), device=device, dtype=torch.float32)
# idx is [0, 0]
# [1, 0]
idx = torch.tensor(((0, 0), (1, 0)), device=device, dtype=torch.long)
# Index should be smaller than self except on dimension 1
bad_src = make_tensor((1, 1), device=device, dtype=torch.float32)
yield ErrorInput(SampleInput(bad_src, args=(1, idx,)),
error_regex="Size does not match at dimension 0")
# TODO: FIXME
# out.dtype must match src.dtype
# Creates new src & idx since SampleInputs can't share tensors
src = torch.tensor(((1, 2), (3, 4)), device=device, dtype=torch.float32)
idx = torch.tensor(((0, 0), (1, 0)), device=device, dtype=torch.long)
out = torch.empty((2, 2), device=device, dtype=torch.float64)
yield ErrorInput(SampleInput(src, args=(1, idx), kwargs={'out': out}),
error_regex="Expected out tensor to have dtype")
# src and index tensors must have the same # of dimensions
# idx too few dimensions
src = torch.tensor(((1, 2), (3, 4)), device=device, dtype=torch.float32)
idx = torch.tensor((0, 0), device=device, dtype=torch.long)
yield ErrorInput(SampleInput(src, args=(1, idx)),
error_regex="Index tensor must have the same number of dimensions")
# src too few dimensions
src = torch.tensor((1, 2), device=device, dtype=torch.float32)
idx = torch.tensor(((0, 0), (1, 0)), device=device, dtype=torch.long)
yield ErrorInput(SampleInput(src, args=(0, idx)),
error_regex="Index tensor must have the same number of dimensions")
# index out of bounds
# NOTE: this ErrorInput is guarded because bounds checking does not occur on CUDA devices
if torch.device(device).type == 'cpu':
src = torch.tensor(((1, 2), (3, 4)), device=device, dtype=torch.float32)
idx = torch.tensor(((0, 23), (1, 0)), device=device, dtype=torch.long)
yield ErrorInput(SampleInput(src, args=(1, idx,)),
error_regex="index 23 is out of bounds for dimension")
x = torch.rand((1,), device=device).expand((3,))
src = torch.rand((6,), device=device)
ind = torch.tensor([2, 1, 0], device=device, dtype=torch.int64)
yield ErrorInput(SampleInput(src, args=(0, ind,), kwargs=dict(out=x)),
error_type=RuntimeError,
error_regex='unsupported operation')
yield ErrorInput(SampleInput(src, args=(0, ind,), kwargs=dict(out=src)),
error_type=RuntimeError,
error_regex='unsupported operation')
yield ErrorInput(SampleInput(ind.clone(), args=(0, ind[1:],), kwargs=dict(out=ind[:1])),
error_type=RuntimeError,
error_regex='unsupported operation')
def error_inputs_take(op_info, device, **kwargs):
x = torch.rand((1,), device=device).expand((3,))
src = torch.rand((6,), device=device)
ind = torch.tensor([2, 1, 0], device=device, dtype=torch.int64)
yield ErrorInput(SampleInput(src, args=(ind,), kwargs=dict(out=x)),
error_type=RuntimeError,
error_regex='unsupported operation')
yield ErrorInput(SampleInput(src, args=(ind,), kwargs=dict(out=src)),
error_type=RuntimeError,
error_regex='unsupported operation')
yield ErrorInput(SampleInput(ind.clone(), args=(ind[1:],), kwargs=dict(out=ind[:-1])),
error_type=RuntimeError,
error_regex='unsupported operation')
# Error inputs for scatter
def error_inputs_scatter_and_scatter_add(op_info, device, **kwargs):
# Error when self.dtype != src.dtype (and src is not a scalar)
src = make_tensor((2, 5), device=device, dtype=torch.float32)
idx = torch.tensor(((0, 1), (1, 2)), device=device, dtype=torch.long)
dst = torch.zeros((3, 5), device=device, dtype=torch.double)
yield ErrorInput(SampleInput(dst, args=(0, idx, src)),
error_regex="Expected self.dtype to be equal to src.dtype")
# Index and destination must have the same number of dimensions
src = make_tensor((2, 5), device=device, dtype=torch.float32)
idx = torch.tensor(((0, 1), (1, 2)), device=device, dtype=torch.long)
dst = torch.zeros((3, 5, 3), device=device, dtype=torch.float32)
yield ErrorInput(SampleInput(dst, args=(0, idx, src)),
error_regex="Index tensor must have the same number of dimensions as self tensor")
# Index and src must have the same number of dimensions when src is not a scalar
src = make_tensor((2, 5, 2), device=device, dtype=torch.float32)
idx = torch.tensor(((34, 1), (1, 2)), device=device, dtype=torch.long)
dst = torch.zeros((3, 5), device=device, dtype=torch.float32)
yield ErrorInput(SampleInput(dst, args=(0, idx, src)),
error_regex="Index tensor must have the same number of dimensions as src tensor")
# Index out of bounds
# NOTE: this ErrorInput is guarded because bounds checking does not occur on CUDA devices
if torch.device(device).type == 'cpu':
src = make_tensor((2, 5), device=device, dtype=torch.float32)
idx = torch.tensor(((34, 1), (1, 2)), device=device, dtype=torch.long)
dst = torch.zeros((3, 5), device=device, dtype=torch.float32)
yield ErrorInput(SampleInput(dst, args=(0, idx, src)),
error_regex="index 34 is out of bounds for dimension 0 with size 3")
def error_inputs_renorm(op_info, device, **kwargs):
zero_d = torch.randn((), device=device)
yield ErrorInput(SampleInput(zero_d, args=(0.5, 0, 1.0)), error_type=RuntimeError,
error_regex="needs at least 2 dimensions, got 0 dimensions")
def error_inputs_ormqr(op_info, device, **kwargs):
zero_d = torch.randn((), device=device)
yield ErrorInput(SampleInput(zero_d, args=(zero_d, zero_d)), error_type=RuntimeError,
error_regex="input must have at least 2 dimensions")
# https://github.com/pytorch/pytorch/issues/85218
tensor_0 = torch.full((5, 0,), 1, device=device)
tensor_1 = torch.full((5,), 1, device=device)
tensor_2 = torch.full((5, 5,), 1, device=device)
bool_3 = True
bool_4 = True
yield ErrorInput(SampleInput(tensor_0, args=(tensor_1, tensor_2, bool_3, bool_4)), error_type=RuntimeError,
error_regex=r"tau.shape\[-1\] must be equal to min\(other.shape\[-2\], input.shape\[-1\]\)")
def error_inputs_diag(op_info, device, **kwargs):
zero_d = torch.randn((), device=device)
yield ErrorInput(SampleInput(zero_d, args=(0,)), error_type=RuntimeError,
error_regex="1D or 2D")
zero_d = torch.randn(1, 1, 1, device=device)
yield ErrorInput(SampleInput(zero_d, args=(0,)), error_type=RuntimeError,
error_regex="1D or 2D")
def error_inputs_embedding(op_info, device, **kwargs):
indices = torch.rand(2, 2, device=device).long()
weights = [
torch.tensor(1.0, device=device),
torch.tensor(1.0, device=device).reshape(1, 1, 1),
]
for weight in weights:
yield ErrorInput(SampleInput(weight, args=(indices,)), error_type=RuntimeError,
error_regex="'weight' must be 2-D")
def error_inputs_t(op_info, device, **kwargs):
yield ErrorInput(
SampleInput(torch.randn(2, 3, 4, 5, device=device)),
error_regex="expects a tensor with <= 2",
)
def error_inputs_multinomial(op_info, device, **kwargs):
x = torch.empty(1, 2, 3, dtype=torch.double, device=device)
yield ErrorInput(SampleInput(x, args=(2,)),
error_regex="prob_dist must be 1 or 2 dim")
x = torch.empty(1, 2, dtype=torch.long, device=device)
yield ErrorInput(SampleInput(x, args=(2,)),
error_regex="multinomial only supports floating-point dtypes for input")
x = torch.empty(1, 2, dtype=torch.double, device=device)
y = torch.empty(1, 2, dtype=torch.double, device=device)
yield ErrorInput(SampleInput(x, args=(2,), kwargs=dict(out=y)),
error_regex="multinomial expects Long tensor out")
x = torch.empty(2, dtype=torch.double, device=device)
yield ErrorInput(SampleInput(x, args=(0,)),
error_regex="cannot sample n_sample <= 0 samples")
x = torch.empty(2, dtype=torch.double, device=device)
yield ErrorInput(SampleInput(x, args=(-1,)),
error_regex="cannot sample n_sample <= 0 samples")
x = torch.empty(2, dtype=torch.double, device=device)
yield ErrorInput(SampleInput(x, args=(3, False,)),
error_regex="cannot sample n_sample > prob_dist")
x = torch.empty(16777217, dtype=torch.double, device=device)
yield ErrorInput(SampleInput(x, args=(3,)),
error_regex="number of categories cannot exceed")
inputs = ((1., -1., 1.), (1., inf, 1.), (1., -inf, 1.), (1., 1., nan))
err_msg1 = "probability tensor contains either `inf`, `nan` or element < 0"
err_msg2 = "invalid multinomial distribution"
rep_arg = (False, True) if torch.device(device).type == 'cpu' else (False,)
if torch.device(device).type == 'cpu':
for rep in rep_arg:
kwargs = {'num_samples': 2, 'replacement': rep}
for shape in inputs:
# error case when input tensor contains `inf`, `nan` or negative element
yield ErrorInput(SampleInput(torch.tensor(shape), kwargs=kwargs),
error_regex=err_msg1 if rep is False else err_msg2)
# error case for the invalid multinomial distribution (sum of probabilities <= 0), 1-D input
x = torch.zeros(3, device=device)
yield ErrorInput(SampleInput(x, kwargs=kwargs),
error_regex=err_msg2)
# error case for the invalid multinomial distribution (sum of probabilities <= 0), 2-D input
x = torch.zeros(3, 3, device=device)
yield ErrorInput(SampleInput(x, kwargs=kwargs),
error_regex=err_msg2)
# error case for the invalid multinomial distribution
x[1, :] = 1
yield ErrorInput(SampleInput(x, kwargs=kwargs),
error_regex=err_msg2)
def error_inputs_gradient(op_info, device, **kwargs):
for dtype in [torch.long, torch.float32, torch.complex64]:
t = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]], device=device, dtype=dtype)
dim = (1, 0)
spacing = [0.1]
yield ErrorInput(SampleInput(t, kwargs=dict(spacing=spacing, dim=dim, edge_order=1)),
error_type=RuntimeError,
error_regex='torch.gradient expected spacing to be unspecified, a scalar ')
yield ErrorInput(SampleInput(t, kwargs=dict(edge_order=3)),
error_type=RuntimeError,
error_regex='torch.gradient only supports edge_order=1 and edge_order=2.')
dim = (1, 1)
spacing = 0.1
yield ErrorInput(SampleInput(t, kwargs=dict(spacing=spacing, dim=dim, edge_order=1)),
error_type=RuntimeError,
error_regex='dim 1 appears multiple times in the list of dims')
dim = (0, 1)
coordinates = [torch.tensor([1, 2, 4], device='cpu'), torch.tensor([1, 2, 4], device='meta')]
yield ErrorInput(SampleInput(t, kwargs=dict(spacing=coordinates, dim=dim, edge_order=1)),
error_type=RuntimeError,
error_regex='torch.gradient expected each tensor to be on the same device,')
yield ErrorInput(SampleInput(t, kwargs=dict(dim=3)),
error_type=IndexError, error_regex='')
t = torch.tensor([[1], [2], [3]])
yield ErrorInput(SampleInput(t, kwargs=dict(edge_order=1)),
error_type=RuntimeError,
error_regex='torch.gradient expected each dimension size to be at least')
t = torch.tensor([[1, 2], [3, 4]])
yield ErrorInput(SampleInput(t, kwargs=dict(edge_order=2)),
error_type=RuntimeError,
error_regex='torch.gradient expected each dimension size to be at least')
def sample_inputs_rrelu(op_info, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_elementwise_unary(
op_info, device, dtype, requires_grad, op_kwargs=dict(lower=0., upper=1., training=True))
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
yield SampleInput(make_arg(S))
yield SampleInput(make_arg(S), training=False)
def error_inputs_rrelu(op_info, device, **kwargs):
input = make_tensor((S, S), device=device, dtype=torch.float32)
yield ErrorInput(SampleInput(input, kwargs={'lower': 0.3, 'upper': 0.1}),
error_regex='Lower bound should be less than or equal to the upper bound')
def error_inputs_masked_select(op_info, device, **kwargs):
x = torch.rand((1,), device=device).expand((3,))
y = torch.rand((6,), device=device)
mask = torch.tensor([True, False, True, True, False, False], device=device)
yield ErrorInput(SampleInput(y, args=(mask,), kwargs=dict(out=x)),
error_type=RuntimeError,
error_regex='unsupported operation')
yield ErrorInput(SampleInput(y, args=(mask,), kwargs=dict(out=y)),
error_type=RuntimeError,
error_regex='unsupported operation')
yield ErrorInput(SampleInput(mask.clone(), args=(mask,), kwargs=dict(out=mask)),
error_type=RuntimeError,
error_regex='unsupported operation')
def error_inputs_median(op_info, device, **kwargs):
x = torch.tensor([[[[[[[[[[[[[[[[[[[[[[[[[nan],
[nan]]]]]]]]]]]]]]]]]]]]]]]]], device=device)
if device == 'cuda':
yield ErrorInput(SampleInput(x, kwargs=dict(dim=(-1))),
error_type=RuntimeError,
error_regex='CUDA Tensors cannot have more than 25 dimensions')
else:
return
def error_inputs_index_select(op_info, device, **kwargs):
x = torch.rand((1, 6), device=device).expand((2, 6))
y = torch.rand((3, 6), device=device)
ind = torch.tensor([0, 1], dtype=torch.int64, device=device)
yield ErrorInput(SampleInput(y, args=(1, ind,), kwargs=dict(out=x)),
error_type=RuntimeError,
error_regex='unsupported operation')
def error_inputs_index_add(op_info, device, **kwargs):
result = torch.tensor([[1., 2.], [4., 5.], [7., 8.]])
source = torch.tensor([2., 4.])
yield ErrorInput(SampleInput(result, args=(0, torch.tensor([0, 2]), source)),
error_type=RuntimeError,
error_regex=r'source tensor shape must match self tensor shape, '
r'excluding the specified dimension. Got self.shape = \[3, 2\] source.shape = \[2\]')
def error_inputs_logcumsumexp(op_info, device, **kwargs):
dim = 3
srcs = [torch.randn(5, 2, device=device), torch.randn(0, 2, device=device)]
for src in srcs:
yield ErrorInput(SampleInput(src, args=(dim,)),
error_type=IndexError,
error_regex='Dimension out of range')
def sample_inputs_take_along_dim(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad, low=None, high=None)
yield SampleInput(
make_arg((S, S)), gather_variable((S, S), 1, S, True, device=device), 0)
# `indices` broadcast
yield SampleInput(
make_arg((S, S)), gather_variable((1, S // 2), 0, S, True, device=device), 1)
# `self` broadcast
yield SampleInput(
make_arg((1, S)), gather_variable((S, S // 2), 0, S, True, device=device), 1)
# without `dim` arg
yield SampleInput(
make_arg((S, S)), gather_variable((S, S // 2), 0, S, True, device=device))
def error_inputs_aminmax_amax_amin(op_info, device, is_ref=False, **kwargs):
# Error Inputs for zero-dim tensors, when 'dim' arg is not provided.
shape = (S, 0, S)
err_msg_amax_amin = "reduction"
err_msg_aminmax = "cannot compute aminmax over an empty dimension as the operation has no identity"
if op_info.name in ['amax', 'amin', '_refs.amax', '_refs.amin']:
yield ErrorInput(SampleInput(torch.rand(shape, device=device)), error_regex=err_msg_amax_amin)
elif op_info.name in ['aminmax']:
yield ErrorInput(SampleInput(torch.rand(shape, device=device)), error_regex=err_msg_aminmax)
# Error Inputs for tensors with more than 64 dimension
sizes = [1] * 65
err_msg1 = "only tensors with up to 64 dims are supported"
yield ErrorInput(SampleInput(torch.randn(sizes, device=device), kwargs={'dim': -1}),
error_regex=err_msg1)
yield ErrorInput(SampleInput(torch.randn(sizes, device=device), kwargs={'dim': 64}),
error_regex=err_msg1)
# Error Inputs for repeated 'dim'
if op_info.name in ['amax', 'amin', '_refs.amax', '_refs.amin']:
dims = [(0, 0), (0, -4)]
err_msg2 = "in the list of dims"
x = torch.randn(S, S, S, S, device=device)
for dim in dims:
yield ErrorInput(SampleInput(x, kwargs={'dim': dim}), error_regex=err_msg2)
# Error Input for illegal dtype
input5 = torch.randn(L, L, dtype=torch.float32, device=device)
max_values = torch.empty(L, dtype=torch.float32, device=device)
min_values = torch.empty(L, dtype=torch.double, device=device)
illegal_values = torch.empty(L, dtype=torch.int, device=device)
# Unlike regular PyTorch, amax and amin refs don't require input and out
# dtypes to match exactly:
# https://github.com/pytorch/pytorch/pull/87765#pullrequestreview-1162023824
if is_ref:
err_msg_amax_amin2 = ("Attempting to cast from torch.float32 to out tensor with dtype "
"torch.int32, but this can't be cast because it is not safe!")
else:
err_msg_amax_amin2 = ("Expected the dtype for input and out to match, but got Float "
"for input's dtype and Int for out's dtype.")
err_msg_aminmax2 = "Expected out tensor to have dtype float, but got double instead"
if op_info.name in ['amax', 'amin', '_refs.amax', '_refs.amin']:
yield ErrorInput(SampleInput(input5, kwargs={'dim': 0, 'out': illegal_values}),
error_regex=err_msg_amax_amin2)
elif op_info.name in ['aminmax']:
yield ErrorInput(SampleInput(input5, kwargs={'dim': 0, 'out': (max_values, min_values)}),
error_regex=err_msg_aminmax2)
# Error Inputs for functions to raise an error on specified zero'd dimension as reduction dim
err_msg3 = "reduction"
# FIXME: eager and ref impl throw different types of errors
error_type = IndexError if 'refs' not in op_info.name else RuntimeError
yield ErrorInput(SampleInput(torch.rand(shape, device=device), kwargs={'dim': 1}),
error_type=error_type, error_regex=err_msg3)
def sample_inputs_aminmax(op_info, device, dtype, requires_grad, **kwargs):
test_cases: tuple[tuple, dict] = ( # type: ignore[assignment]
((S, S, S), {}),
((S, S, S), {'dim': 1}),
((S, S, S), {'dim': 1, 'keepdim': True}),
((), {'dim': 0}),
((), {}),
((), {'dim': 0, 'keepdim': True}),
((S, 0, S), {'dim': 0}),
)
for shape, kwargs in test_cases:
yield SampleInput(
make_tensor(shape, dtype=dtype, device=device, requires_grad=requires_grad),
**kwargs)
def error_inputs_diff(op_info, device, **kwargs):
t = torch.rand((1, 3), device=device)
n = -1
yield ErrorInput(SampleInput(t, args=(n, ), kwargs=kwargs),
error_type=RuntimeError,
error_regex=f'order must be non-negative but got {n}')
def sample_inputs_diff(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
test_cases = (
((1,), 0, None, None),
((S,), 0, None, None),
((S, 1), 0, None, None),
((S, 1), 1, None, None),
((S, S), 0, None, None),
((S, S), 1, None, None),
((S, S), 0, (1, S), (2, S)),
((S, S), 0, None, (2, S)),
((XS, XS, XS), 1, None, None),
((XS, XS, XS), 2, None, None),
((XS, XS, XS), 1, (XS, 1, XS), (XS, 1, XS)),
((XS, XS, XS), 2, (XS, XS, 1), (XS, XS, 1)),
((XS, XS, XS), 2, (XS, XS, XS), (XS, XS, XS)),)
for size, dim, size_prepend, size_append in test_cases:
prepend_size = 0 if (size_prepend is None) else size_prepend[dim]
append_size = 0 if (size_append is None) else size_append[dim]
dim_size = size[dim] + prepend_size + append_size
for n in range(dim_size):
input_tensor = make_arg(size)
prepend = make_arg(size_prepend) if size_prepend else None
append = make_arg(size_append) if size_append else None
yield SampleInput(input_tensor, n, dim, prepend, append)
# add some samples with n > dim_size
yield SampleInput(make_arg((XS, XS, XS)), S + 1, 1)
yield SampleInput(make_arg((XS, XS, XS)), S * 3 + 2, 2, make_arg((XS, XS, XS)), make_arg((XS, XS, XS)))
def sample_inputs_histogram(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
sizes = ((), (S,), (S, S), (S, S, S), (S, 1, S), (S, 0, S))
for size, bin_ct, weighted, density in product(sizes, range(1, 5), [False, True], [False, True]):
input_tensor = make_arg(size)
weight_tensor = make_arg(size) if weighted else None
yield SampleInput(input_tensor, bin_ct,
weight=weight_tensor, density=density)
bins_tensor = make_arg((bin_ct + 1,))
sorted_bins, _bins_indices = torch.sort(bins_tensor)
yield SampleInput(input_tensor, sorted_bins,
weight=weight_tensor, density=density)
def sample_inputs_histogramdd(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
sizes = ((S, S), (S, S, S), (S, 1, S), (S, 0, S))
bin_ct_patterns = ((1, 1, 1, 1, 1), (2, 3, 2, 3, 2), (3, 2, 3, 2, 3))
for size, bin_ct_pattern, weighted, density in product(sizes, bin_ct_patterns, [False, True], [False, True]):
input_tensor = make_arg(size)
bin_ct = bin_ct_pattern[:size[-1]]
weight_tensor = make_arg(size[:-1]) if weighted else None
yield SampleInput(input_tensor, bin_ct,
weight=weight_tensor, density=density)
bins_tensor = [make_arg(ct + 1) for ct in bin_ct]
yield SampleInput(input_tensor, bins_tensor,
weight=weight_tensor, density=density)
def error_inputs_histogramdd(opinfo, device, **kwargs):
invalid_bins = [1, 1, 1, 1, 1]
make_arg = partial(make_tensor, dtype=torch.float, device=device, requires_grad=False)
msg = "histogramdd: The size of bins must be equal to the innermost dimension of the input."
yield ErrorInput(SampleInput(make_arg(5, 6), invalid_bins), error_regex=msg)
def sample_inputs_histc(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
sizes = ((), (S,), (S, S), (S, S, S), (S, 1, S), (S, 0, S))
for size, min, max in product(sizes, [0, -10], [0, 10]):
# construct sample input omitting bins arg
yield SampleInput(make_arg(size), min=min, max=max)
# construct sample inputs with a few different bins values
for bins in [1, 3, 10]:
yield SampleInput(make_arg(size), bins=bins, min=min, max=max)
def sample_inputs_bincount(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
for size, weighted in product((S, M), [False, True]):
input_tensor = torch.randint(0, size, (size,), dtype=dtype, device=device)
weight_tensor = make_arg((size,)) if weighted else None
max_val = int(input_tensor.max().item())
for minlength in [0, max_val // 2, max_val, 2 * max_val]:
yield SampleInput(
input_tensor, weights=weight_tensor, minlength=minlength)
def sample_inputs_bucketize(op_info, device, dtype, requires_grad, reference_inputs_mode=False, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
sizes = (((), S), ((S,), S), ((S, S), S), ((S, S, S), S), ((S, 1, S), S), ((S, 0, S), S))
if reference_inputs_mode:
sizes += (((256,), 128), ((128,), 256), ((32, 32), 11), ((32, 4, 32), 33))
for (input_shape, nb), out_int32, right in product(sizes, [False, True], [False, True]):
input_tensor = make_arg(input_shape)
boundaries = make_arg(nb).msort()
yield SampleInput(input_tensor, boundaries,
out_int32=out_int32, right=right)
reference_inputs_bucketize = partial(sample_inputs_bucketize, reference_inputs_mode=True)
def error_inputs_bucketize(opinfo, device, **kwargs):
make_arg = partial(make_tensor, dtype=torch.float, device=device, requires_grad=False)
yield ErrorInput(SampleInput(make_arg((S, S, S)), make_arg((S, S))),
error_regex="boundaries tensor must be 1 dimension")
def sample_inputs_searchsorted(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
# (unsorted tensor size, (input sizes,), is_scalar)
sizes = (
((0,), ((0,),), False),
((M,), ((), (M,), (M, M)), False),
((0, 0), ((0, 0),), False),
((M, M), ((M, M),), False),
((0, 0, 0), ((0, 0, 0),), False),
((M, M, M), ((M, M, M),), False),
((L,), ((),), True),
)
for (size, input_sizes, is_scalar), noncontiguous, out_int32, right in product(
sizes, [False, True], [False, True], [False, True]
):
unsorted_tensor = make_arg(size, noncontiguous=noncontiguous)
for input_size in input_sizes:
input = make_arg(input_size, noncontiguous=noncontiguous)
if is_scalar:
input = input.item()
if np.prod(size) == 0:
boundary_tensor = unsorted_tensor
sorter = make_tensor(size, dtype=torch.int64, device=device, noncontiguous=noncontiguous)
else:
boundary_tensor, sorter = torch.sort(unsorted_tensor)
side = "right" if right else "left"
yield SampleInput(boundary_tensor, input, out_int32=out_int32, right=right)
yield SampleInput(boundary_tensor, input, out_int32=out_int32, side=side)
yield SampleInput(unsorted_tensor, input, out_int32=out_int32, right=right, sorter=sorter)
yield SampleInput(unsorted_tensor, input, out_int32=out_int32, side=side, sorter=sorter)
def sample_inputs_gradient(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad, low=None, high=None)
test_cases_float = (
((S,), None, None, 1),
((S,), 2., None, 1),
((S, S), None, None, 2),
((S, S), [2.0, 2.1], None, 1),
((S, S), [2.0, 2.1], (0, 1), 1),
((4, 4, 4), [2., 1.], (0, 1), 2),
)
for size, spacing, dim, edge_order in test_cases_float:
t = make_arg(size)
yield SampleInput(t, dim=dim, spacing=spacing, edge_order=edge_order)
test_cases_tensor = (
((3, 3, 3), ((1.1, 2.0, 3.5), (4.0, 2, 6.0)), (0, -1), 1),
((3, 3, 3), ((1.0, 3.0, 2.0), (8.0, 6.0, 1.0)), (0, 1), 2),
)
for size, coordinates, dim, edge_order in test_cases_tensor:
t = make_arg(size)
coordinates_tensor_list = []
for coords in coordinates:
# `coords` will always contain floating point values and Python 3.10 does not support this
# implicit conversion to an integer using `__int__`
# TODO: this can be simplified after https://github.com/pytorch/pytorch/issues/69316 is fixed
a = torch.tensor(coords, device=device)
coordinates_tensor_list.append(a.to(dtype))
yield SampleInput(t, dim=dim, spacing=coordinates_tensor_list, edge_order=edge_order)
def sample_inputs_getitem(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
test_args = [
([1, 2],),
(slice(0, 3),),
((slice(0, 3), 1),),
(([0, 2, 3], [1, 3, 3], [0, 0, 2]),),
(([0, 0, 3], [1, 1, 3], [0, 0, 2]),),
((slice(None), slice(None), [0, 3]),),
((slice(None), [0, 3], slice(None)),),
(([0, 3], slice(None), slice(None)),),
(([0, 3], [1, 2], slice(None)),),
(([0, 3], ),),
(([0, 3], slice(None)),),
(([0, 3], Ellipsis),),
(([0, 2, 3], [1, 3, 3], torch.LongTensor([0, 0, 2])),),
(index_variable(2, S, device=device),),
(mask_not_all_zeros((S,)),),
]
for args in test_args:
yield SampleInput(make_arg((S, S, S)), args=args)
yield SampleInput(make_arg((S, S, S, S)), args=((slice(None), [0, 1], slice(None), [0, 1]),))
def sample_inputs_index_put(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
for accumulate in [False, True]:
# Test with indices arg
yield SampleInput(
make_arg((S, S,)),
# As defined in the docs, if accumulate is false, duplicate indices are not supported
(index_variable(2 if accumulate else 1, S, device=device),),
make_arg((2 if accumulate else 1, S)),
accumulate=accumulate)
# Test with mask arg
mask = torch.zeros(S, dtype=torch.bool) if accumulate else mask_not_all_zeros((S,))
yield SampleInput(
make_arg((S, S)), (mask, ), make_arg((S,)), accumulate=accumulate)
def sample_inputs_sort(op_info, device, dtype, requires_grad, **kwargs):
def small_3d_unique():
res = torch.randperm(S * S * S, dtype=torch.int64, device=device).view(S, S, S)
res = res.to(dtype).requires_grad_(requires_grad)
return res
def large_1d_unique():
res = torch.randperm(L * L * L, dtype=torch.int64, device=device)
res = res.to(dtype).requires_grad_(requires_grad)
return res
# Test case for large tensor.
yield SampleInput(large_1d_unique())
# Test cases for small 3d tensors.
# Imitates legacy tests from test/test_torch.py
dims = range(-3, 3)
flag = [True, False]
for dim, descending, stable in product(dims, flag, flag):
# default schema without stable sort
if not (dtype == torch.bool and torch.device(device).type == 'cuda'):
# bool and cuda requires stable sort for stable results, at least
# for the return index
yield SampleInput(small_3d_unique(), dim, descending)
# schema with stable sort, no CUDA support yet
if torch.device(device).type == 'cpu':
yield SampleInput(
small_3d_unique(), dim=dim, descending=descending, stable=stable)
# Test cases for scalar tensor
tensor_opt = dict(dtype=dtype, device=device, requires_grad=requires_grad)
yield SampleInput(torch.tensor(1, **tensor_opt))
yield SampleInput(torch.tensor(1, **tensor_opt), 0)
yield SampleInput(torch.tensor(1, **tensor_opt), 0, True)
# Test cases for empty tensor
yield SampleInput(torch.tensor((), **tensor_opt))
yield SampleInput(torch.tensor((), **tensor_opt), 0)
yield SampleInput(torch.tensor((), **tensor_opt), 0, True)
# Test cases for stable sort
yield SampleInput(small_3d_unique(), stable=True)
yield SampleInput(small_3d_unique(), dim=0, stable=True)
yield SampleInput(small_3d_unique(), dim=0, descending=True, stable=True)
def sample_inputs_threshold(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
sizes = ((), (S,), (S, S), (S, S, S))
for x_size in sizes:
# threshold and values args must be numbers
yield SampleInput(make_arg(x_size), make_arg(()).item(), make_arg(()).item())
def sample_inputs_unique(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
sizes = ((), (S,), (S, S), (S, S, S), (S, 1, S), (S, 0, S))
for shape, sorted, return_inverse, return_counts, dim in \
product(sizes, [False, True], [False, True], [False, True], [None, -2, -1, 0, 1, 2]):
# torch.unique cannot be called if the input tensor has a zero dimension which isn't the selected dim
if 0 in shape and shape.index(0) is not dim:
continue
# skip invalid dim args
if dim is not None and (dim < -len(shape) or dim >= len(shape)):
continue
kwargs = dict(sorted=sorted, return_inverse=return_inverse, return_counts=return_counts, dim=dim)
# construct a test case with only one distinct value
input_t = torch.zeros(shape, dtype=dtype, device=device, requires_grad=requires_grad)
yield SampleInput(input_t, **kwargs)
# construct a test case with mixed 0s and 1s
input_t = make_arg(shape, dtype=torch.bool, requires_grad=False)\
.to(dtype).requires_grad_(requires_grad)
yield SampleInput(input_t, **kwargs)
# construct a test case with many different values
yield SampleInput(make_arg(shape), **kwargs)
def sample_inputs_unique_consecutive(*args, **kwargs):
for sample_input in sample_inputs_unique(*args, **kwargs):
if not sample_input.kwargs["sorted"]:
sample_input.kwargs.pop("sorted")
yield sample_input
def sample_inputs_adaptive_avg_pool1d(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# Ordered as (input shape, output size)
cases = (
((0, 8, 8), (5,)),
((3, 8, 8), 5),
((3, 8, 8), 1)
)
for input_shape, output_size in cases:
# Batched
yield SampleInput(make_arg(input_shape), args=(output_size,))
# Unbatched
yield SampleInput(make_arg(input_shape[1:]), args=(output_size,))
def error_inputs_adaptive_avg_pool1d(opinfo, device, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=torch.float32)
# error inputs for empty output
yield ErrorInput(SampleInput(make_arg((1, 2, 3)), output_size=()),
error_regex="'output_size' should contain one int")
# error inputs for output_size lesser than 0
yield ErrorInput(SampleInput(make_arg((1, 1, 1)), output_size=(-1,)),
error_regex="elements of output_size must be greater than or equal to 0")
def sample_inputs_adaptive_avg_pool2d(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# Ordered as (input shape, output size)
cases = (
((1, 8, 8, 8), (5, 7)),
((2, 8, 8, 8), (None, 7)),
((1, 8, 4, 3), (5, None)),
((1, 8, 4, 3), (None, None)),
((1, 8, 4, 3), (5)),
)
for input_shape, output_size in cases:
# Batched
yield SampleInput(make_arg(input_shape), args=(output_size,))
# Unbatched
yield SampleInput(make_arg(input_shape[1:]), args=(output_size,))
def error_inputs_adaptive_avg_pool2d(opinfo, device, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=torch.float32)
# error inputs for incorrect input dimension
yield ErrorInput(SampleInput(make_arg((2, 2)), output_size=(2, 2)),
error_type=ValueError, error_regex="Input dimension should be at least 3")
# error inputs for empty output
yield ErrorInput(SampleInput(make_arg((1, 2, 3, 4)), output_size=()),
error_regex="output_size must be 2")
# error inputs for output_size lesser than 0
yield ErrorInput(SampleInput(make_arg((1, 1, 1, 1)), output_size=(-1, 0)),
error_regex="elements of output_size must be greater than or equal to 0")
def sample_inputs_adaptive_avg_pool3d(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# Ordered as (input shape, output size)
cases = (
((0, 8, 8, 8, 8), (5, 7, 4)),
((1, 8, 4, 3, 7), (None, None, None)),
((1, 8, 4, 3, 7), (1, 1, 1)),
((3, 3, 8, 8, 6), (5, 7, None)),
((1, 3, 8, 8, 6), (5, None, 2)),
((3, 3, 8, 8, 6), (None, 3, 2)),
)
for input_shape, output_size in cases:
# Batched
yield SampleInput(make_arg(input_shape), args=(output_size,))
# Unbatched
yield SampleInput(make_arg(input_shape[1:]), args=(output_size,))
def error_inputs_adaptive_avg_pool3d(opinfo, device, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=torch.float32)
# error inputs for incorrect input dimension
yield ErrorInput(SampleInput(make_arg((2, 2, 2)), output_size=(2, 2, 2)),
error_type=ValueError, error_regex="Input dimension should be at least 4")
# error inputs for empty output
yield ErrorInput(SampleInput(make_arg((1, 2, 3, 4)), output_size=()),
error_regex="output_size must be 3")
# error inputs for output_size lesser than 0
yield ErrorInput(SampleInput(make_arg((1, 1, 1, 1, 1)), output_size=(-1, 0, 2)),
error_regex="elements of output_size must be greater than or equal to 0")
def sample_inputs_adaptive_max_pool1d(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# Ordered as (input shape, output size)
cases = (
# ((0, 8, 8), (5,)),
# 0 batch size doesn't work, cannot reshape tensor of 0 elements into shape [0, 8, -1]
((3, 4, 4), 3),
((3, 4, 4), 1)
)
for shapes, return_idx in product(cases, (True, False)):
# Batched
yield SampleInput(make_arg(shapes[0]), args=(shapes[1], return_idx))
# Unbatched
yield SampleInput(make_arg(shapes[0][1:]), args=(shapes[1], return_idx))
def error_inputs_adaptive_max_pool1d(opinfo, device, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=torch.float32)
# error inputs for empty output
yield ErrorInput(SampleInput(make_arg((1, 2, 3)), output_size=()),
error_regex="'output_size' should contain one int")
# error inputs for output_size lesser than 0
yield ErrorInput(SampleInput(make_arg((1, 1, 1)), output_size=(-1,)),
error_regex="Trying to create tensor with negative dimension")
def sample_inputs_adaptive_max_pool2d(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# Ordered as (input shape, output size)
cases = (
# ((0, 8, 8, 8), (5, 7)),
# 0 batch size doesn't work, cannot reshape tensor of 0 elements into shape [0, 8, -1]
((1, 4, 4, 4), (2, 3)),
((2, 4, 4, 4), (None, 3)),
((2, 4, 4, 4), (1, 1)),
((1, 4, 4, 3), (3, None)),
((1, 4, 4, 3), (None, None)),
((1, 4, 4, 3), (3)),
)
for shapes, return_idx in product(cases, (True, False)):
# Batched
yield SampleInput(make_arg(shapes[0]), args=(shapes[1], return_idx))
# Unbatched
yield SampleInput(make_arg(shapes[0][1:]), args=(shapes[1], return_idx))
def error_inputs_adaptive_max_pool2d(opinfo, device, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=torch.float32)
# error inputs for incorrect input dimension
yield ErrorInput(SampleInput(make_arg((2, 2)), output_size=(2, 2)),
error_type=ValueError, error_regex="Input dimension should be at least 3")
# error inputs for empty output
yield ErrorInput(SampleInput(make_arg((1, 2, 3, 4)), output_size=()),
error_regex="internal error")
# error inputs for output_size lesser than 0
yield ErrorInput(SampleInput(make_arg((1, 1, 1, 1)), output_size=(-1, 0)),
error_regex="Trying to create tensor with negative dimension")
def sample_inputs_adaptive_max_pool3d(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# Ordered as (input shape, output size)
cases = (
# ((0, 8, 8, 8, 8), (5, 7, 4)),
# 0 batch size doesn't work, cannot reshape tensor of 0 elements into shape [0, 8, -1]
((1, 4, 4, 3, 5), (None, None, None)),
((1, 4, 4, 3, 5), (1, 1, 1)),
((3, 3, 4, 4, 6), (2, 3, None)),
((1, 3, 4, 4, 6), (3, None, 2)),
((3, 3, 4, 4, 6), (None, 3, 2)),
)
for shapes, return_idx in product(cases, (True, False)):
# Batched
yield SampleInput(make_arg(shapes[0]), args=(shapes[1], return_idx))
# Unbatched
yield SampleInput(make_arg(shapes[0][1:]), args=(shapes[1], return_idx))
def error_inputs_adaptive_max_pool3d(opinfo, device, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=torch.float32)
# error inputs for incorrect input dimension
yield ErrorInput(SampleInput(make_arg((2, 2, 2)), output_size=(2, 2, 2)),
error_type=ValueError, error_regex="Input dimension should be at least 4")
# error inputs for empty output
yield ErrorInput(SampleInput(make_arg((1, 2, 3, 4)), output_size=()),
error_regex="internal error")
# error inputs for output_size lesser than 0
yield ErrorInput(SampleInput(make_arg((1, 1, 1, 1, 1)), output_size=(-1, 0, 2)),
error_regex="Trying to create tensor with negative dimension")
class _TestParamsMaxPoolBase:
def __init__(self) -> None:
self.kwargs = {
'kernel_size': [3],
'stride': [2, None],
'ceil_mode': [True, False],
'padding': [0, 1],
'dilation': [1],
'return_indices': [True, False]
}
self.shapes = [
[1, 2, None], # batch
[2], # channels
[3, 6] # signal
]
def _gen_shape(self):
for shape in product(*self.shapes):
# shape[0] is None indicates missing batch dimension
if shape[0] is None:
shape = shape[1:]
yield shape, torch.contiguous_format
# only 2d (N, C, H, W) rank 4 tensors support channels_last memory format
if len(self.shapes) == 4 and len(shape) == 4:
yield shape, torch.channels_last
def _gen_kwargs(self):
keys = self.kwargs.keys()
for values in product(*self.kwargs.values()):
yield dict(zip(keys, values))
def gen_input_params(self):
yield from product(self._gen_shape(), self._gen_kwargs())
class _TestParamsMaxPool1d(_TestParamsMaxPoolBase):
def __init__(self) -> None:
super().__init__()
self.kwargs['kernel_size'] += [(3,)]
self.kwargs['stride'] += [(2,)]
self.kwargs['padding'] += [(1,)]
self.kwargs['dilation'] += [(1,)]
class _TestParamsMaxPool2d(_TestParamsMaxPoolBase):
def __init__(self) -> None:
super().__init__()
self.kwargs['kernel_size'] += [(3, 2)]
self.kwargs['stride'] += [(2, 1)]
self.kwargs['padding'] += [(1, 1)]
self.kwargs['dilation'] += [(1, 2)]
self.shapes.append([6])
class _TestParamsMaxPool3d(_TestParamsMaxPoolBase):
def __init__(self) -> None:
super().__init__()
self.kwargs['kernel_size'] += [(3, 2, 3)]
self.kwargs['stride'] += [(2, 1, 2)]
self.kwargs['dilation'] += [(1, 2, 1)]
self.shapes.append([6])
self.shapes.append([5])
def sample_inputs_max_pool(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=False)
params_generator_type_dict = {
'nn.functional.max_pool1d': _TestParamsMaxPool1d,
'nn.functional.max_pool2d': _TestParamsMaxPool2d,
'nn.functional.max_pool3d': _TestParamsMaxPool3d,
'max_pool2d_with_indices_backward': _TestParamsMaxPool2d,
}
params_generator = params_generator_type_dict[op_info.name]()
for (shape, memory_format), kwargs in params_generator.gen_input_params():
arg = make_arg(shape).to(memory_format=memory_format).requires_grad_(requires_grad)
yield SampleInput(arg, kwargs=kwargs)
def max_pool2d_backward(*args, kernel_size=(), stride=(), padding=(0,), dilation=(1,), ceil_mode=False, **kwargs):
out, indices = torch.nn.functional.max_pool2d_with_indices(
*args, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, ceil_mode=ceil_mode, return_indices=True)
grad_out = torch.ones_like(out)
if stride is None:
stride = kernel_size
out_b = torch.ops.aten.max_pool2d_with_indices_backward.default(
grad_out, *args, kernel_size, stride, padding, dilation, ceil_mode, indices)
return out_b
def error_inputs_max_pool1d(op_info, device, **kwargs):
# Toggle requires_grad because `max_pool1d` has different path
# based on whether `requires_grad` is set or not.
for requires_grad in (True, False):
make_arg = partial(make_tensor, device=device, dtype=torch.float, requires_grad=requires_grad)
# error inputs when pad is negative
x = make_arg((0, 1, 49))
yield ErrorInput(SampleInput(x, kwargs={'kernel_size': 2, 'stride': 50, 'padding': -1, 'return_indices': True}),
error_regex='pad must be non-negative')
# error inputs when pad > kernel_size / 2
yield ErrorInput(SampleInput(x, kwargs={'kernel_size': 2, 'stride': 50, 'padding': 4, 'return_indices': True}),
error_regex='pad should be at most half of effective kernel size')
# error inputs when pad > ((kernel_size - 1) * dilation + 1) / 2, when dilation is not default
yield ErrorInput(SampleInput(x,
kwargs={'kernel_size': 3, 'dilation': 2, 'stride': 1, 'padding': 3, 'return_indices': True}),
error_regex='pad should be at most half of effective kernel size')
# error inputs for input tensor
error_msg = r'Expected 2D or 3D \(batch mode\) tensor with optional 0 dim batch size for input'
yield ErrorInput(SampleInput(make_arg((), requires_grad=requires_grad), kwargs={'kernel_size': 1}),
error_regex=error_msg)
# error inputs for empty input
yield ErrorInput(SampleInput(torch.tensor([], device=device, requires_grad=requires_grad),
kwargs={'kernel_size': 1}),
error_regex=error_msg)
# error: unbatched input with 0 sized non-batch dims.
yield ErrorInput(SampleInput(make_arg((0, 10), requires_grad=requires_grad),
kwargs={'kernel_size': 1}),
error_regex=error_msg)
# error: batched input with 0 sized non-batch dims.
yield ErrorInput(SampleInput(make_arg((1, 10, 0), requires_grad=requires_grad),
kwargs={'kernel_size': 1}),
error_regex=error_msg)
# error inputs for empty input with stride=0
error_msg = 'stride must be greater than zero, but got 0'
yield ErrorInput(SampleInput(make_arg((3, 3, 3)), kwargs={'kernel_size': 1, 'stride': 0}),
error_regex=error_msg)
# error inputs for empty input with dilation=0
error_msg = 'dilation must be greater than zero, but got 0'
yield ErrorInput(SampleInput(make_arg((3, 3, 3)),
kwargs={'kernel_size': 1, 'stride': 1, 'padding': 0, 'dilation': 0}),
error_regex=error_msg)
# error inputs for invalid output size
error_msg = 'Invalid computed output size: -2'
yield ErrorInput(SampleInput(make_arg((2, 2, 2)),
kwargs={'kernel_size': 5, 'stride': 1, 'padding': 0, 'dilation': 1}),
error_regex=error_msg)
# error inputs when kernel_size=0
error_msg = 'kernel_size must be greater than zero'
yield ErrorInput(SampleInput(x, kwargs={'kernel_size': 0}),
error_regex=error_msg)
# error inputs for strides > 0
error_msg = 'stride must be greater than zero'
yield ErrorInput(SampleInput(x, kwargs={'kernel_size': 2, 'stride': 0}),
error_regex=error_msg)
def error_inputs_max_pool2d(op_info, device, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=torch.float, requires_grad=False)
# error inputs when pad is negative
x = make_arg((0, 1, 49))
yield ErrorInput(SampleInput(x, kwargs={'kernel_size': 2, 'stride': 50, 'padding': -1, 'return_indices': True}),
error_regex='pad must be non-negative')
# 2-dimensional kernel
yield ErrorInput(SampleInput(x, kwargs={'kernel_size': (3, 2), 'stride': 50, 'padding': -1, 'return_indices': True}),
error_regex='pad must be non-negative')
# error inputs when pad > kernel_size / 2 (kernel_size : int)
yield ErrorInput(SampleInput(x, kwargs={'kernel_size': 2, 'stride': 50, 'padding': 4, 'return_indices': True}),
error_regex='pad should be at most half of effective kernel size')
# error inputs when pad > kernel_size / 2 (kernel_size : tuple)
yield ErrorInput(SampleInput(x, kwargs={'kernel_size': (3, 2), 'stride': 50, 'padding': 4, 'return_indices': True}),
error_regex='pad should be at most half of effective kernel size')
# error: unbatched input with 0 sized non-batch dims.
err_msg = r'Expected 3D or 4D \(batch mode\) tensor with optional 0 dim batch size for input'
yield ErrorInput(SampleInput(make_arg((1, 0, 10)),
kwargs={'kernel_size': 1}),
error_regex=err_msg)
# error: batched input with 0 sized non-batch dims.
yield ErrorInput(SampleInput(make_arg((2, 1, 10, 0)),
kwargs={'kernel_size': 1}),
error_regex=err_msg)
def error_inputs_max_pool3d(op_info, device, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=torch.float, requires_grad=False)
# error inputs when pad is negative
x = make_arg((0, 1, 49, 50))
yield ErrorInput(SampleInput(x, kwargs={'kernel_size': 2, 'stride': 50, 'padding': -1, 'return_indices': True}),
error_regex='pad must be non-negative')
# 3-dimensional kernel
yield ErrorInput(SampleInput(x, kwargs={'kernel_size': (3, 2, 2), 'stride': 50,
'padding': -1, 'return_indices': True}),
error_regex='pad must be non-negative')
# error inputs when pad > kernel_size / 2 (kernel_size: int)
yield ErrorInput(SampleInput(x, kwargs={'kernel_size': 2, 'stride': 50, 'padding': 4, 'return_indices': True}),
error_regex='pad should be at most half of effective kernel size')
# error inputs when pad > kernel_size / 2 (kernel_size: tuple)
yield ErrorInput(SampleInput(x, kwargs={'kernel_size': (3, 2, 2), 'stride': 50,
'padding': 4, 'return_indices': True}),
error_regex='pad should be at most half of effective kernel size')
# error: unbatched input with 0 sized non-batch dims.
err_msg = r'Expected input\'s non-batch dimensions to have positive length'
yield ErrorInput(SampleInput(make_arg((0, 1, 2, 10)),
kwargs={'kernel_size': 1}),
error_regex=err_msg)
# error: batched inputs with 0 sized non-batch dims.
yield ErrorInput(SampleInput(make_arg((2, 1, 0, 1, 2)),
kwargs={'kernel_size': 1}),
error_regex=err_msg)
def sample_inputs_normalize(self, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, low=-1, high=1, device=device, dtype=dtype, requires_grad=requires_grad)
cases: tuple[tuple[int], dict] = ( # type: ignore[assignment]
((2, 1, 4, 5), {'p': 1., 'dim': 2}),
((2, 3, 4, 5), {'p': 2., 'dim': 1}),
((1, 2, 4, 5), {'p': 0.5, 'dim': 0}),
((1, 3, 4, 5), {'p': -1., 'dim': 1}),
((1, 3, 4, 5), {'p': 0., 'dim': -1}),
((), {'p': 1.2, 'dim': 0}),
((2, 3, 4, 5), {}),
((2, 3, 4, 5), {'eps': 1e-4}))
for input_shape, kwargs in cases:
yield SampleInput(make_arg(input_shape), kwargs=kwargs)
def complex_conv(fn, input_size, weight, grad_output, stride, padding, dilation, groups):
# conv(W, x, b) = conv(Wr, xr, br) - conv(Wi, xi, 0) + i(conv(Wi, xr, bi) + conv(Wr, xi, 0))
# a = conv(Wr, xr, br),
# b = conv(Wi, xi, 0),
# c = conv(Wr + Wi, xr + xi, br + bi)
# conv(W, x, b) = a - b + i(c - a - b)
grad_output_ = torch.view_as_real(grad_output)
grad_output_r = grad_output_[..., 0]
grad_output_i = grad_output_[..., 1]
weight_ = torch.view_as_real(weight)
weight_r = weight_[..., 0]
weight_i = weight_[..., 1]
a = fn(input_size, weight_r, grad_output_r, stride, padding, dilation, groups)
b = fn(input_size, weight_i, grad_output_i, stride, padding, dilation, groups)
c = fn(input_size, weight_r + weight_i, grad_output_r + grad_output_i, stride, padding, dilation, groups)
return (a - b) + 1j * (c - a - b)
def conv_transpose_ref(input, weight, bias, stride=1, padding=0,
output_padding=0, dilation=1, groups=1,
fn=None):
# Derivative of `conv` is `conv_transpose`.
# To verify the correctness of `conv_transpose`,
# we rely `torch.nn.grad` implementation (which is tested in test_nn.py)
# for floating dtypes.
assert fn is not None
grad_fn_map = {torch.nn.functional.conv_transpose1d: torch.nn.grad.conv1d_input,
torch.nn.functional.conv_transpose2d: torch.nn.grad.conv2d_input,
torch.nn.functional.conv_transpose3d: torch.nn.grad.conv3d_input}
batched_dim_map = {torch.nn.functional.conv_transpose1d: 3,
torch.nn.functional.conv_transpose2d: 4,
torch.nn.functional.conv_transpose3d: 5}
# Input for `ref` is ndarray.
input, weight = torch.from_numpy(input), torch.from_numpy(weight)
is_batched = len(input.shape) == batched_dim_map[fn]
if not is_batched:
input = input.unsqueeze(0)
if bias is not None:
bias = torch.from_numpy(bias)
unsqueeze_dims = input.ndim - 2
for _ in range(unsqueeze_dims):
bias = bias.unsqueeze(1)
grad_output = input
# Get the input shape for grad_fn.
conv_transpose_output = fn(grad_output.to('meta'), weight.to('meta'), None,
stride=stride, padding=padding, output_padding=output_padding,
groups=groups, dilation=dilation)
input_size = conv_transpose_output.shape
grad_fn = grad_fn_map[fn]
if weight.dtype.is_complex:
out = complex_conv(grad_fn, input_size, weight, grad_output, stride, padding, dilation, groups)
else: # Floating
out = grad_fn(input_size, weight, grad_output, stride, padding, dilation, groups)
if bias is not None:
out = out + bias
return out.squeeze(0) if not is_batched else out
def sample_inputs_conv_transpose1d(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# Ordered as shapes for input, weight, bias
# and a dict of values of (stride, padding, output_padding, groups, dilation)
cases: tuple[tuple[int], tuple[int], tuple[int], dict] = ( # type: ignore[assignment]
((1, 3, 4), (3, 3, 3), (3,),
{'stride': (2,), 'padding': 2, 'output_padding': (1,), 'groups': 1}),
((2, 2, 4), (2, 2, 4), (4,),
{'stride': (3,), 'padding': (1,), 'output_padding': (2,), 'groups': 2, 'dilation': (4,)}),
((1, 1, 4), (1, 1, 4), (1,),
{'stride': 2, 'padding': 1, 'output_padding': 1, 'groups': 1, 'dilation': (2,)}),
((1, 1, 4), (1, 2, 3), None,
{'stride': 2, 'padding': 1, 'output_padding': 1, 'groups': 1}),
((1, 4, 5), (4, 8, 3), None,
{})
)
for input_shape, weight, bias, kwargs in cases:
# Batched
yield SampleInput(make_arg(input_shape), args=(
make_arg(weight),
make_arg(bias) if bias is not None else bias
), kwargs=kwargs)
# Unbatched
yield SampleInput(make_arg(input_shape[1:]), args=(
make_arg(weight),
make_arg(bias) if bias is not None else bias
), kwargs=kwargs)
def sample_inputs_conv_transpose2d(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# Ordered as shapes for input, weight, bias
# and a dict of values of (stride, padding, output_padding, groups, dilation)
cases: tuple[tuple[int], tuple[int], tuple[int], dict] = ( # type: ignore[assignment]
((1, 3, 4, 4), (3, 3, 3, 3), (3,),
{'stride': (2, 2), 'padding': 2, 'output_padding': (1, 1), 'groups': 1}),
((2, 2, 4, 4), (2, 2, 4, 5), (4,),
{'stride': (3, 2), 'padding': (1, 2), 'output_padding': (2, 3), 'groups': 2, 'dilation': (4, 4)}),
((1, 1, 4, 5), (1, 1, 4, 3), (1,),
{'stride': 2, 'padding': 1, 'output_padding': 1, 'groups': 1, 'dilation': (2, 3)}),
((1, 1, 4, 3), (1, 2, 3, 4), None,
{'stride': 2, 'padding': 1, 'output_padding': 1, 'groups': 1}),
((2, 4, 4, 4), (4, 1, 3, 3), None, {'groups': 4}),
((1, 2, 5, 5), (2, 4, 3, 3), None, {})
)
for input_shape, weight, bias, kwargs in cases:
# Batched
yield SampleInput(make_arg(input_shape), args=(
make_arg(weight),
make_arg(bias) if bias is not None else bias
), kwargs=kwargs)
# Unbatched
yield SampleInput(make_arg(input_shape[1:]), args=(
make_arg(weight),
make_arg(bias) if bias is not None else bias
), kwargs=kwargs)
def sample_inputs_conv_transpose3d(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# Ordered as shapes for input, weight, bias
# and a dict of values of (stride, padding, output_padding, groups, dilation)
cases: tuple[tuple[int], tuple[int], tuple[int], dict] = ( # type: ignore[assignment]
((1, 3, 4, 4, 4), (3, 3, 3, 3, 3), (3,),
{'stride': (2, 2, 2), 'padding': 2, 'output_padding': (1, 1, 1), 'groups': 1}),
((2, 2, 4, 4, 4), (2, 2, 4, 5, 6), (4,),
{'stride': (3, 2, 1), 'padding': (1, 2, 3), 'output_padding': (2, 3, 1), 'groups': 2, 'dilation': (4, 4, 4)}),
((1, 1, 4, 5, 2), (1, 1, 4, 3, 1), (1,),
{'stride': 2, 'padding': 1, 'output_padding': 1, 'groups': 1, 'dilation': (2, 3, 2)}),
((1, 1, 4, 3, 4), (1, 2, 3, 4, 5), None,
{'stride': 2, 'padding': 1, 'output_padding': 1, 'groups': 1}),
((1, 4, 5, 5, 5), (4, 8, 3, 3, 3), None,
{})
)
for input_shape, weight, bias, kwargs in cases:
# Batched
yield SampleInput(make_arg(input_shape), args=(
make_arg(weight),
make_arg(bias) if bias is not None else bias
), kwargs=kwargs)
# Unbatched
yield SampleInput(make_arg(input_shape[1:]), args=(
make_arg(weight),
make_arg(bias) if bias is not None else bias
), kwargs=kwargs)
def sample_inputs_conv1d(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# Ordered as shapes for input, weight, bias,
# and a dict of values of (stride, padding, dilation, groups)
cases: tuple = (
((1, 3, 4), (3, 3, 3), (3,), {'stride': (2,), 'padding': 2, 'groups': 1}),
((2, 4, 8), (2, 2, 3), (2,), {'stride': 3, 'padding': 1, 'groups': 2, 'dilation': 2}),
((1, 4, 5), (1, 4, 3), None, {'stride': (2,), 'padding': 'valid'}),
((2, 2, 4), (2, 1, 4), (2,), {'stride': (1,), 'padding': 'same', 'groups': 2, 'dilation': (2,)}),
# With defaults
((1, 4, 5), (3, 4, 3), None, {}),
)
for input_shape, weight, bias, kwargs in cases:
# Batched
yield SampleInput(make_arg(input_shape), args=(
make_arg(weight),
make_arg(bias) if bias is not None else bias
), kwargs=kwargs)
# Unbatched
yield SampleInput(make_arg(input_shape[1:]), args=(
make_arg(weight),
make_arg(bias) if bias is not None else bias
), kwargs=kwargs)
def error_inputs_conv1d(opinfo, device, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=torch.float64)
make_int_arg = partial(make_tensor, device=device, dtype=torch.int64)
make_complex_arg = partial(make_tensor, device=device, dtype=torch.complex128)
# error inputs for different dtypes of input tensor and bias
yield ErrorInput(
SampleInput(make_int_arg((1, 1, 4)), args=(make_int_arg((1, 1, 2)), make_arg((1,)))),
error_regex="should be the same")
# error inputs for different dtypes of input tensor and bias
yield ErrorInput(
SampleInput(make_arg((1, 1, 4)), args=(make_arg((1, 1, 2)), make_complex_arg((1,)))),
error_regex="should be the same")
# error inputs for negative strides
yield ErrorInput(
SampleInput(make_arg((1, 1, 4)), args=(make_arg((1, 2, 2)), make_arg((1,))),
kwargs={'stride': (-1,)}), error_regex="non-positive stride is not supported")
# error inputs for negative padding
yield ErrorInput(
SampleInput(make_arg((1, 1, 4)), args=(make_arg((1, 2, 2)), make_arg((1,))),
kwargs={'padding': (-1,)}), error_regex="negative padding is not supported")
# error inputs for negative dilation
yield ErrorInput(
SampleInput(make_arg((1, 1, 4)), args=(make_arg((1, 1, 2)), make_arg((1,))),
kwargs={'dilation': (-1,)}), error_regex="dilation should be greater than zero")
# FIXME: https://github.com/pytorch/pytorch/issues/85656
# error inputs for bias shape not equal to the output channels
# yield ErrorInput(SampleInput(make_arg((1, 1, 4)), args=(make_arg((1, 1, 3)), make_arg((2,)))),
# error_regex="expected bias to be 1-dimensional with 1 elements")
# error inputs for input.ndim != weight.ndim
yield ErrorInput(SampleInput(make_arg((1, 1, 4)), args=(make_arg((1, 2)), make_arg((1,)))),
error_regex="weight should have at least three dimensions")
# error inputs for the weight[0] are less than the number of groups
yield ErrorInput(
SampleInput(make_arg((2, 2, 4)), args=(make_arg((2, 2, 2)), make_arg((2,))),
kwargs={'padding': 'same', 'groups': 3}), error_regex="expected weight to be at least 3 at dimension 0")
# error inputs for the weight[0] are less than the number of groups
yield ErrorInput(
SampleInput(make_arg((2, 2, 4)), args=(make_arg((2, 2, 2)), make_arg((2,))),
kwargs={'groups': 3}), error_regex="expected weight to be at least 3 at dimension 0")
# error inputs for invalid groups
yield ErrorInput(
SampleInput(make_arg((2, 2, 4)), args=(make_arg((2, 2, 2)), make_arg((2,))),
kwargs={'padding': 'same', 'groups': -1}), error_regex="non-positive groups is not supported")
# error inputs for invalid groups
yield ErrorInput(
SampleInput(make_arg((2, 2, 4)), args=(make_arg((2, 2, 2)), make_arg((2,))),
kwargs={'padding': 'same', 'groups': 0}), error_regex="non-positive groups is not supported")
def error_inputs_conv2d(opinfo, device, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=torch.float64)
make_int_arg = partial(make_tensor, device=device, dtype=torch.int64)
make_complex_arg = partial(make_tensor, device=device, dtype=torch.complex128)
# error inputs for different dtypes of input tensor and bias
yield ErrorInput(
SampleInput(make_int_arg((2, 4, 4)), args=(make_int_arg((3, 2, 3, 3)), make_arg((3,)))),
error_regex="should be the same")
# error inputs for different dtypes of input tensor and bias
yield ErrorInput(
SampleInput(make_arg((2, 4, 4)), args=(make_arg((3, 2, 3, 3)), make_complex_arg((3,)))),
error_regex="should be the same")
# error inputs for negative strides
yield ErrorInput(
SampleInput(make_arg((1, 1, 4, 4)), args=(make_arg((1, 2, 2, 3)), make_arg((1,))),
kwargs={'stride': (-1,)}), error_regex="non-positive stride is not supported")
# error inputs for negative padding
yield ErrorInput(
SampleInput(make_arg((1, 1, 4, 3)), args=(make_arg((1, 2, 2, 4)), make_arg((1,))),
kwargs={'padding': (-1,)}), error_regex="negative padding is not supported")
# error inputs for negative dilation
yield ErrorInput(
SampleInput(make_arg((1, 1, 4, 2)), args=(make_arg((1, 1, 2, 5)), make_arg((1,))),
kwargs={'dilation': (-1,)}), error_regex="dilation should be greater than zero")
# FIXME: https://github.com/pytorch/pytorch/issues/85656
# error inputs for bias shape not equal to the output channels
# yield ErrorInput(SampleInput(make_arg((1, 1, 4, 4)), args=(make_arg((1, 1, 3, 2)), make_arg((2,)))),
# error_regex="expected bias to be 1-dimensional with 1 elements")
# error inputs for input.ndim != weight.ndim
yield ErrorInput(
SampleInput(make_arg((1, 1, 4, 3)), args=(make_arg((1, 2, 2)), make_arg((1,))),
kwargs={'padding': 'same'}), error_regex="Expected 3-dimensional input for 3-dimensional weight")
# error inputs for the weight[0] are less than the number of groups
yield ErrorInput(
SampleInput(make_arg((2, 2, 4, 3)), args=(make_arg((2, 2, 1, 3)), make_arg((2,))),
kwargs={'groups': 3}), error_regex="expected weight to be at least 3 at dimension 0")
# error inputs for groups the weight[0] are less than the number of groups
yield ErrorInput(
SampleInput(make_arg((2, 2, 4, 3)), args=(make_arg((2, 2, 1, 3)), make_arg((2,))),
kwargs={'padding': 'same', 'groups': 3}), error_regex="expected weight to be at least 3 at dimension 0")
# error inputs for invalid groups
yield ErrorInput(
SampleInput(make_arg((2, 2, 4, 5)), args=(make_arg((2, 2, 1, 4)), make_arg((2,))),
kwargs={'padding': 'same', 'groups': -1}), error_regex="non-positive groups is not supported")
# error inputs for invalid groups
yield ErrorInput(
SampleInput(make_arg((2, 2, 4, 3)), args=(make_arg((2, 2, 4, 3)), make_arg((2,))),
kwargs={'padding': 'same', 'groups': 0}), error_regex="non-positive groups is not supported")
def sample_inputs_conv2d(op_info, device, dtype, requires_grad, jit_fail_sample=False, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# Ordered as shapes for input, weight, bias
# and a dict of values of (stride, padding, groups, dilation)
cases: tuple = (
((1, 3, 4, 4), (3, 3, 3, 3), (3,),
{'stride': (2, 2), 'padding': 2, 'groups': 1}),
((2, 4, 8, 8), (2, 2, 3, 3), (2,),
{'stride': (3, 2), 'padding': (2, 1), 'groups': 2, 'dilation': (4, 4)}),
((1, 4, 5, 5), (1, 4, 2, 3), (1,),
{'stride': 2, 'padding': 1, 'groups': 1, 'dilation': (2, 3)}),
((1, 4, 5, 5), (1, 4, 2, 3), (1,),
{'stride': 2, 'padding': 1, 'groups': 1, 'dilation': (2, 3)}),
((1, 2, 4, 3), (4, 2, 3, 4), None,
{'stride': 2, 'padding': 1, 'groups': 1}),
((1, 4, 5, 5), (1, 4, 2, 3), (1,),
{'stride': 2, 'padding': "valid"}),
((1, 4, 5, 5), (1, 4, 2, 3), (1,),
{'stride': 1, 'padding': "same", 'dilation': 3}),
# Below are the group related samples from common_nn.py
((2, 4, 6, 6), (4, 1, 3, 3), (4,), {'groups': 4}),
((2, 4, 6, 6), (8, 1, 3, 3), (8,), {'groups': 4}),
((2, 4, 6, 6), (8, 1, 3, 3), None, {'groups': 4}),
((2, 4, 6, 6), (4, 1, 3, 3), (4,), {'groups': 4, 'stride': (3, 2)}),
((2, 4, 6, 6), (4, 1, 3, 3), (4,), {'groups': 4, 'padding': (1, 1)}),
((2, 4, 5, 5), (4, 1, 2, 2), (4,), {'groups': 4, 'dilation': (2, 2)}),
((2, 4, 6, 5), (6, 2, 3, 2), (6,), {'groups': 2}),
# With defaults
((1, 4, 5, 5), (3, 4, 3, 3), None, {}),
)
for input_shape, weight, bias, kwargs in cases:
# Batched
yield SampleInput(make_arg(input_shape), args=(
make_arg(weight),
make_arg(bias) if bias is not None else bias
), kwargs=kwargs)
# Unbatched
yield SampleInput(make_arg(input_shape[1:]), args=(
make_arg(weight),
make_arg(bias) if bias is not None else bias
), kwargs=kwargs)
def sample_inputs_conv3d(opinfo, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# Ordered as shapes for input, weight, bias
# and dict of values of (stride, padding, dilation, groups)
cases: tuple = (
((1, 1, 4, 4, 4), (1, 1, 1, 1, 1), (1,), {'padding': 'same'}),
((1, 1, 4, 4, 4), (1, 1, 4, 4, 4), (1,), {'stride': (2, 2, 2)}),
((1, 1, 5, 5, 5), (1, 1, 3, 3, 3), (1,), {'dilation': 2}),
((1, 1, 1, 1, 10), (1, 1, 1, 1, 4), None, {'padding': 'valid'}),
((1, 1, 10, 11, 12), (1, 1, 1, 2, 5), None, {'padding': 'same'}),
((1, 1, 10, 11, 12), (1, 1, 1, 2, 5), None, {'padding': 'same', 'dilation': 2}),
((1, 1, 10, 11, 12), (1, 1, 4, 4, 4), None, {'padding': 'same', 'dilation': 3}),
((1, 1, 1, 1, 10), (1, 1, 1, 1, 4), None, {'padding': 'valid'}),
((3, 9, 3, 1, 9), (3, 3, 3, 1, 9), (3,), {'groups': 3}),
((3, 9, 3, 1, 9), (3, 3, 3, 1, 9), (3,), {'stride': (2, 2, 2), 'dilation': 1, 'groups': 3}),
)
for input_shape, weight, bias, kwargs in cases:
# Batched
yield SampleInput(make_arg(input_shape), args=(
make_arg(weight),
make_arg(bias) if bias is not None else bias
), kwargs=kwargs)
# Unbatched
yield SampleInput(make_arg(input_shape[1:]), args=(
make_arg(weight),
make_arg(bias) if bias is not None else bias
), kwargs=kwargs)
def error_inputs_conv3d(opinfo, device, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=torch.float64)
make_int_arg = partial(make_tensor, device=device, dtype=torch.int64)
make_complex_arg = partial(make_tensor, device=device, dtype=torch.complex128)
# error inputs for different dtypes of input tensor and bias
yield ErrorInput(
SampleInput(make_int_arg((1, 1, 4, 4, 4)), args=(make_int_arg((1, 1, 2, 2, 2)), make_arg((1,)))),
error_regex="should be the same")
# error inputs for different dtypes of input tensor and bias
yield ErrorInput(
SampleInput(make_arg((1, 1, 4, 4, 4)), args=(make_arg((1, 1, 2, 2, 2)), make_complex_arg((1,)))),
error_regex="should be the same")
# error inputs for negative strides
yield ErrorInput(
SampleInput(make_arg((1, 1, 4, 4, 4)), args=(make_arg((1, 1, 2, 2, 2)), make_arg((1,))),
kwargs={'stride': (-1,)}), error_regex="non-positive stride is not supported")
# error inputs for negative padding
yield ErrorInput(
SampleInput(make_arg((1, 1, 4, 4, 4)), args=(make_arg((1, 1, 2, 2, 2)), make_arg((1,))),
kwargs={'padding': (-1,)}), error_regex="negative padding is not supported")
# error inputs for negative dilation
yield ErrorInput(
SampleInput(make_arg((1, 1, 4, 4, 4)), args=(make_arg((1, 1, 2, 2, 2)), make_arg((1,))),
kwargs={'dilation': (-1,)}), error_regex="dilation should be greater than zero")
# FIXME: https://github.com/pytorch/pytorch/issues/85656
# error inputs for bias shape not equal to the output channels
# yield ErrorInput(SampleInput(make_arg((1, 1, 4, 4, 4)), args=(make_arg((1, 1, 3, 3, 3)), make_arg((2,)))),
# error_regex="expected bias to be 1-dimensional with 1 elements")
# error inputs for input.ndim != weight.ndim
yield ErrorInput(
SampleInput(make_arg((1, 1, 3, 4, 5)), args=(make_arg((1, 1, 4, 3)), make_arg((1,))),
kwargs={'padding': 'same'}), error_regex="Expected 4-dimensional input for 4-dimensional weight")
# error inputs for the weight[0] are less than the number of groups
yield ErrorInput(
SampleInput(make_arg((2, 2, 3, 4, 5)), args=(make_arg((2, 2, 4, 3, 3)),
make_arg((2,))), kwargs={'groups': 3}),
error_regex="expected weight to be at least 3 at dimension 0")
# error inputs for the weight[0] are less than the number of groups
yield ErrorInput(
SampleInput(make_arg((2, 2, 3, 4, 5)), args=(make_arg((2, 2, 4, 3, 3)),
make_arg((2,))), kwargs={'padding': 'same', 'groups': 3}),
error_regex="expected weight to be at least 3 at dimension 0")
# error inputs for invalid groups
yield ErrorInput(
SampleInput(make_arg((2, 2, 3, 4, 5)), args=(make_arg((2, 2, 4, 3, 3)),
make_arg((2,))), kwargs={'padding': 'same', 'groups': 0}),
error_regex="non-positive groups is not supported")
# error inputs for padding='same' not supported by strided convolutions
yield ErrorInput(
SampleInput(make_arg((18, 27, 9, 1, 9)), args=(make_arg((9, 9, 9, 1, 9)),
make_arg((9,))), kwargs={'stride': 2, 'padding': 'same', 'groups': 3}),
error_regex="padding='same' is not supported for strided convolutions")
def sample_inputs_group_norm(opinfo, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# Ordered as input shape, num groups, and kwargs for eps
cases: tuple[tuple[int], int, float] = ( # type: ignore[assignment]
((1, 6, 3), 2, {'eps' : 0.5}),
((2, 6, 3), 2, {'eps' : -0.5}),
((1, 3), 1, {'eps' : 1e-5}),
((0, 2), 1, {'eps' : 1e-5}),
((S, S, S), 1, {'eps' : 0.5}),
)
# num_channels is inferred to be input.shape[1] dimension
for input_shape, num_groups, kwargs in cases:
# Shape of weight and bias should be the same as num_channels
channels = input_shape[1] if len(input_shape) > 1 else 0
weight_tensor = make_arg(channels)
bias_tensor = make_arg(channels)
# Checking for permutations of weights and biases as `None`
weights = [weight_tensor, None]
biases = [bias_tensor, None]
for weight, bias in itertools.product(weights, biases):
kwargs = {
'weight': weight,
'bias': bias,
**kwargs
}
yield SampleInput(make_arg(input_shape), num_groups, **kwargs)
# Without any optional args
yield SampleInput(make_arg((1, 2)), args=(1,))
def reference_inputs_group_norm(op_info, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_group_norm(
op_info, device, dtype, requires_grad, **kwargs)
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# Ordered as input shape, num groups, and kwargs for eps
cases: tuple[tuple[int], int, float] = ( # type: ignore[assignment]
((20, 6, 10, 10), 3, {'eps' : 1e-5}),
# equivalent with InstanceNorm
# GroupNorm(C, num_groups=C) == InstanceNorm(num_features=C)
((20, 6, 10, 10), 6, {'eps' : 1e-5}),
# equivalent with LayerNorm
# GroupNorm(C, num_groups=1, affine=False) == LayerNorm(normalized_shape=[C, H, W], elementwise_affine=False)
((20, 6, 10, 10), 1, {'eps' : 1e-5}),
)
# num_channels is inferred to be input.shape[1] dimension
for input_shape, num_groups, kwargs in cases:
# Shape of weight and bias should be the same as num_channels
channels = input_shape[1] if len(input_shape) > 1 else 0
input_tensor = make_arg(input_shape)
weight_tensor = make_arg(channels)
bias_tensor = make_arg(channels)
# Checking for permutations of weights and biases as `None`
weights = [weight_tensor, None]
biases = [bias_tensor, None]
for weight, bias in itertools.product(weights, biases):
kwargs = {
'weight': weight,
'bias': bias,
**kwargs
}
yield SampleInput(input_tensor, num_groups, **kwargs)
def sample_inputs_instance_norm(opinfo, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
make_arg_without_requires_grad = partial(make_tensor, device=device, dtype=dtype, requires_grad=False)
# Ordered as: input shape, kwargs for momentum, eps
cases: tuple[tuple[int], dict] = ( # type: ignore[assignment]
((S, S, S), {'momentum': 0.5, 'eps': 0.6}),
((S, S, S), {'momentum': 0.5, 'eps': 0.6, 'use_input_stats': True}),
((3, 2, 4), {'momentum': -1.2}),
((3, 2, 4), {'momentum': 0.0}),
((3, 2, 3, 4), {'momentum': -1.0, 'eps': 0.5}),
((3, 2, 3, 4), {'momentum': -1.0, 'eps': 0.5}),
)
for input_shape, kwargs in cases:
# args: running mean, running var, weight and bias should necessarily be of shape: (channels,)
channels = input_shape[1]
weight = make_arg(channels)
bias = make_arg(channels)
running_mean = make_arg_without_requires_grad(channels, low=0)
running_var = make_arg_without_requires_grad(channels, low=0)
new_kwargs = {
'running_mean': running_mean,
'running_var': running_var,
'weight': weight,
'bias': bias,
**kwargs
}
yield SampleInput(
make_arg(input_shape),
args=(),
kwargs=new_kwargs
)
# Checking for permutations of weights and biases as `None`
# instance_norm assumes that if there's a bias, there's a weight
weights = [channels, None]
biases = [None, None]
for weight_channels, bias_channels in zip(weights, biases):
running_mean = make_arg_without_requires_grad(channels, low=0)
running_var = make_arg_without_requires_grad(channels, low=0)
yield SampleInput(
make_arg(input_shape),
args=(),
kwargs={
'running_mean': running_mean,
'running_var': running_var,
'weight': make_arg(weight_channels) if weight_channels is not None else None,
'bias': make_arg(bias_channels) if bias_channels is not None else None
}
)
# Test case for no optional kwargs
yield SampleInput(make_arg((1, 2, 3)), kwargs={})
def sample_inputs_safe_softmax(opinfo, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=False)
def make_bool_mask(*shape):
return torch.randint(0, 2, shape, device=device, dtype=torch.bool)
def mask_two_rows(rows, cols):
mask_two_rows = torch.ones((rows, cols), dtype=torch.bool, device=device)
mask_two_rows[rows - 1] = False
mask_two_rows[rows - 3] = False
return mask_two_rows
def convert_to_float_mask(mask: torch.Tensor) -> torch.Tensor:
return torch.where(~mask, float('-inf'), 0.0)
def with_requires_grad(tensor):
return tensor.requires_grad_(requires_grad)
def generate_input_from_mask(mask_shape, dim):
mask = make_bool_mask(*mask_shape)
input_tensor = make_arg(mask_shape)
masked_input = input_tensor + convert_to_float_mask(mask)
return SampleInput(with_requires_grad(masked_input), kwargs={'dim': dim})
samples = [
# Basic 3D tensor with mask
generate_input_from_mask((2, 3, 4), dim=1),
# 2D tensor with mask, testing different dim
generate_input_from_mask((5, 5), dim=0),
# 4D tensor, testing with a different dim
generate_input_from_mask((2, 3, 4, 5), dim=2),
# Edge case: 1D tensor
generate_input_from_mask((10,), dim=0),
# Edge case: tensor with one dimension of size 1
generate_input_from_mask((1, 5, 5), dim=1),
# Testing with all elements masked
SampleInput(
with_requires_grad(
make_arg((3, 3))
+ convert_to_float_mask(
torch.zeros((3, 3), dtype=torch.bool, device=device)
)
),
kwargs={"dim": 1},
),
# Testing with no elements masked
SampleInput(
with_requires_grad(
make_arg((3, 3))
+ convert_to_float_mask(
torch.ones((3, 3), dtype=torch.bool, device=device)
)
),
kwargs={"dim": 1},
),
# Testing with two rows masked
SampleInput(
with_requires_grad(
make_arg((6, 3)) + convert_to_float_mask(mask_two_rows(6, 3))
),
kwargs={"dim": 1},
),
]
yield from samples
def sample_inputs_layer_norm(opinfo, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# Ordered as input shape, normalized_shape and a kwarg dict for eps
cases: tuple[tuple[int], tuple[int], dict] = ( # type: ignore[assignment]
((1, 2, 3), (1, 2, 3), {'eps': 0.5}),
((2, 2, 3), (2, 3), {'eps': -0.5}),
((1,), (1,), {}),
((1, 2), (2,), {}),
((0, 1), (1,), {}),
)
for input_shape, normalized_shape, kwargs in cases:
# Shape of weight and bias should be the same as normalized_shape
weight = make_arg(normalized_shape)
bias = make_arg(normalized_shape)
yield SampleInput(
make_arg(input_shape),
args=(normalized_shape, weight, bias),
kwargs=kwargs
)
# Without any optional args
yield SampleInput(make_arg((1, 2)), args=((2,),))
# TODO: @krshrimali, once to_numpy method in SampleInput class is modified to take None inputs,
# enable these inputs; see https://github.com/pytorch/pytorch/pull/63276#discussion_r691950400
# With weight and a `None` bias
# yield SampleInput(make_arg((1, 2)), args=((2,), make_arg((2,)), None))
# With `None` weight and bias (tests failing for this, see the link above)
# yield SampleInput(make_arg((1, 2)), args=((2,), None, make_arg((2,))))
def sample_inputs_native_layer_norm(opinfo, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# Ordered as input shape, normalized_shape, eps
cases: tuple[tuple[int], tuple[int], float] = ( # type: ignore[assignment]
((1, 2, 3), (1, 2, 3), 0.5),
((2, 2, 3), (2, 3), -0.5),
((1,), (1,), 1e-5),
((1, 2), (2,), 1e-5),
((0, 1), (1,), 1e-5),
)
for input_shape, normalized_shape, eps in cases:
# Shape of weight and bias should be the same as normalized_shape
weight = make_arg(normalized_shape)
bias = make_arg(normalized_shape)
yield SampleInput(
make_arg(input_shape),
args=(normalized_shape, weight, bias, eps),
)
yield SampleInput(
make_arg(input_shape),
args=(normalized_shape, None, bias, eps),
)
yield SampleInput(
make_arg(input_shape),
args=(normalized_shape, weight, None, eps),
)
yield SampleInput(
make_arg(input_shape),
args=(normalized_shape, None, None, eps),
)
def sample_inputs_rms_norm(opinfo, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad, high=1000)
# Ordered as input shape, normalized_shape and a kwarg dict for eps
cases: tuple[tuple[int], tuple[int], dict] = ( # type: ignore[assignment]
((1, 2, 3), (1, 2, 3), {'eps': 0.5}),
((2, 2, 3), (2, 3), {'eps': -0.5}),
((1,), (1,), {}),
((1, 2), (2,), {}),
((0, 1), (1,), {}),
)
for input_shape, normalized_shape, kwargs in cases:
# Shape of weight and bias should be the same as normalized_shape
weight = make_arg(normalized_shape)
yield SampleInput(
make_arg(input_shape),
args=(normalized_shape, weight),
kwargs=kwargs
)
# Without any optional args
yield SampleInput(make_arg((1, 2)), args=((2,),))
def error_inputs_group_norm(opinfo, device, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=torch.float32, requires_grad=False)
# check that input has minimum number of dimensions
err_msg1 = "Expected at least 2 dimensions for input tensor but received"
s1 = SampleInput(make_arg(1), args=(1,))
yield ErrorInput(s1, error_regex=err_msg1)
# check that the channels dimension is compatible with number of groups
err_msg2 = "Expected number of channels in input to be divisible by num_groups, but got input of shape"
s2 = SampleInput(make_arg((2, 7, 4)), args=(2,))
yield ErrorInput(s2, error_regex=err_msg2)
def error_inputs_native_layer_norm(opinfo, device, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=torch.float32, requires_grad=False)
input_shape = (1, 2, 3)
err_msg1 = "Expected normalized_shape to be at least 1-dimensional"
s1 = SampleInput(
make_arg(input_shape), args=((), None, None, 1e-5)
)
yield ErrorInput(s1, error_regex=err_msg1)
normalized_shape = (1, 2, 3)
weight = make_arg((1, 2))
err_msg2 = "Expected weight to be of same shape as normalized_shape"
s2 = SampleInput(
make_arg(input_shape), args=(normalized_shape, weight, None, 1e-5)
)
yield ErrorInput(s2, error_regex=err_msg2)
bias = make_arg((1, 2))
err_msg3 = "Expected bias to be of same shape as normalized_shape"
s3 = SampleInput(
make_arg(input_shape), args=(normalized_shape, None, bias, 1e-5)
)
yield ErrorInput(s3, error_regex=err_msg3)
err_msg4 = "Given normalized_shape="
s4 = SampleInput(
make_arg((2, 2, 3)), args=((2, 2), None, None, 1e-5)
)
yield ErrorInput(s4, error_regex=err_msg4)
def error_inputs_rms_norm(opinfo, device, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=torch.float32, requires_grad=False)
input_shape = (1, 2, 3)
err_msg1 = "Expected normalized_shape to be at least 1-dimensional"
s1 = SampleInput(
make_arg(input_shape), args=((), None, 1e-5)
)
yield ErrorInput(s1, error_regex=err_msg1)
normalized_shape = (1, 2, 3)
weight = make_arg((1, 2))
err_msg2 = "Expected weight to be of same shape as normalized_shape"
s2 = SampleInput(
make_arg(input_shape), args=(normalized_shape, weight, 1e-5)
)
yield ErrorInput(s2, error_regex=err_msg2)
err_msg4 = "Given normalized_shape="
s4 = SampleInput(
make_arg((2, 2, 3)), args=((2, 2), None, 1e-5)
)
yield ErrorInput(s4, error_regex=err_msg4)
def sample_inputs_local_response_norm(opinfo, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# Ordered as input shape, size and a kwarg dict for alpha, beta, and k
cases: tuple[tuple[int], tuple[int], dict] = ( # type: ignore[assignment]
((1, 6, 3), 2, {'alpha': 3e-05, 'beta': 0.5, 'k': 1.25}),
((1, 6, 3), 2, {'beta': 0.5, 'k': 1.25}),
((1, 6, 3), 2, {'alpha': 3e-05, 'k': 1.25}),
((1, 6, 3), 2, {'alpha': 3e-05, 'beta': 0.5}),
((1, 6, 3), 2, {'alpha': 3e-05}),
((1, 6, 3), 2, {'beta': 0.5}),
((1, 6, 3), 2, {'k': 1.25}),
((1, 6, 3), 2, {}),
((2, 6, 3), 2, {'alpha': 3e-05, 'beta': 0.5, 'k': 1.25}),
((1, 1, 2), 1, {'alpha': 3e-05, 'beta': 0.5, 'k': 1.25}),
((0, 1, 2), 1, {'alpha': 3e-05, 'beta': 0.5, 'k': 1.25}),
)
for input_shape, size, kwargs in cases:
yield SampleInput(make_arg(input_shape), args=(size,), kwargs=kwargs)
def sample_inputs_hardswish(self, device, dtype, requires_grad, **kwargs):
N = 5
# make sure we are testing -3 -> 3 range. default is -10 -> 10 so maybe unnecessary ?
make_arg = partial(make_tensor, device=device, dtype=dtype,
requires_grad=requires_grad, low=-5, high=5)
return (SampleInput(make_arg((N * 2, N * 2))) for _ in range(1, N))
def sample_inputs_linear(self, device, dtype, requires_grad, **kwargs):
features_options = [[3, 4], [8, 8]]
batch_options: list[list[int]] = [
[], # no batch
[0],
[8],
[2, 3],
]
create_tensor = partial(make_tensor, device=device, dtype=dtype,
requires_grad=requires_grad, low=-2, high=2)
for has_bias, (in_feat, out_feat), batch_shape in \
itertools.product([True, False], features_options, batch_options):
input_tensor = create_tensor(batch_shape + [in_feat])
weight = create_tensor([out_feat, in_feat])
if not has_bias:
yield SampleInput(input_tensor, weight)
continue
bias = create_tensor([out_feat])
yield SampleInput(input_tensor, weight, bias)
# 5D tensor, used to crash on MPS, see https://github.com/pytorch/pytorch/issues/114942
yield SampleInput(create_tensor(2, 1, 2, 1, 2), create_tensor(4, 2))
yield SampleInput(create_tensor(2, 1, 2, 1, 2), create_tensor(4, 2), create_tensor(4))
def sample_inputs_bilinear(self, device, dtype, requires_grad, **kwargs):
features_options = [[3, 4, 5], [8, 8, 8]]
batch_options: list[list[int]] = [
[], # no batch
[0],
[8],
[2, 3],
]
create_tensor = partial(make_tensor, device=device, dtype=dtype,
requires_grad=requires_grad, low=-2, high=2)
for has_bias, (in_feat1, in_feat2, out_feat), batch_shape in \
itertools.product([True, False], features_options, batch_options):
input_tensor1 = create_tensor(batch_shape + [in_feat1])
input_tensor2 = create_tensor(batch_shape + [in_feat2])
weight = create_tensor([out_feat, in_feat1, in_feat2])
if not has_bias:
yield SampleInput(input_tensor1, input_tensor2, weight)
continue
bias = create_tensor([out_feat])
yield SampleInput(input_tensor1, input_tensor2, weight, bias)
def sample_inputs_glu(self, device, dtype, requires_grad, **kwargs):
features_options = [[2], [2, 4], [8, 8], [3, 6, 8], [1, 4, 6, 7]]
batch_options: list[list[int]] = [
[], # no batch
[0],
[8],
[2, 3],
]
create_tensor = partial(make_tensor, device=device, dtype=dtype,
requires_grad=requires_grad, low=-2, high=2)
for features, batch_shape in itertools.product(features_options, batch_options):
ndim = len(features) + len(batch_shape)
for dim in range(ndim):
input_tensor = create_tensor(batch_shape + features)
dim_size = input_tensor.size(dim)
if dim_size > 0 and dim_size % 2 == 0:
yield SampleInput(input_tensor, dim)
def sample_inputs_interpolate(mode, self, device, dtype, requires_grad, **kwargs):
N, C = 2, 3
D = 4
S = 3
L = 5
align_corners_options: tuple[Any, ...] = (None,)
if mode in ('linear', 'bilinear', 'bicubic', 'trilinear'):
align_corners_options = (True, False, None)
ranks_for_mode = {
'nearest': [1, 2, 3],
'nearest-exact': [1, 2, 3],
'linear': [1],
'bilinear': [2],
'bicubic': [2],
'trilinear': [3],
'area': [1, 2, 3]
}
def shape(size, rank, with_batch_channel=True):
if with_batch_channel:
return tuple([N, C] + ([size] * rank))
return tuple([size] * rank)
if mode in ('bilinear', 'bicubic') and dtype == torch.uint8:
make_arg = partial(
make_tensor,
device=device,
dtype=dtype,
requires_grad=requires_grad,
# we pick more realistic upper bound 256 instead of default 10 for uint8 dtype
high=256 if dtype == torch.uint8 else None,
)
# provide few samples for a more close to typical image processing usage
rank = 2
for memory_format in [torch.contiguous_format, torch.channels_last]:
yield SampleInput(
make_arg(shape(270, rank), memory_format=memory_format),
shape(130, rank, False),
scale_factor=None,
mode=mode,
align_corners=False,
)
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
for align_corners in align_corners_options:
for rank in ranks_for_mode[mode]:
yield SampleInput(
make_arg(shape(D, rank)),
shape(S, rank, False),
scale_factor=None,
mode=mode,
align_corners=align_corners,
)
yield SampleInput(
make_arg(shape(D, rank)),
shape(L, rank, False),
scale_factor=None,
mode=mode,
align_corners=align_corners,
)
for recompute_scale_factor in [False, True]:
for scale_factor in [1.7, 0.6]:
yield SampleInput(
make_arg(shape(D, rank)),
size=None,
scale_factor=scale_factor,
mode=mode,
align_corners=align_corners,
recompute_scale_factor=recompute_scale_factor,
)
def reference_inputs_interpolate(mode, self, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_interpolate(mode, self, device, dtype, requires_grad, **kwargs)
if mode in ('bilinear', 'bicubic'):
make_arg = partial(
make_tensor,
device=device,
dtype=dtype,
requires_grad=requires_grad,
# we pick more realistic upper bound 256 instead of default 10 for uint8 dtype
high=256 if dtype == torch.uint8 else None,
)
# provide few samples for more typical image processing usage
for memory_format in [torch.contiguous_format, torch.channels_last]:
for aa in [True, False]:
yield SampleInput(
make_arg((2, 3, 345, 456), memory_format=memory_format),
(270, 270),
scale_factor=None,
mode=mode,
align_corners=False,
antialias=aa,
)
def sample_inputs_upsample(mode, self, device, dtype, requires_grad, **kwargs):
N, C = 2, 3
D = 4
S = 3
L = 5
ranks_for_mode = {
'nearest': [1, 2, 3],
'bilinear': [2],
}
def shape(size, rank, with_batch_channel=True):
if with_batch_channel:
return torch.Size([N, C] + ([size] * rank))
return torch.Size([size] * rank)
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
for rank in ranks_for_mode[mode]:
yield SampleInput(make_arg(shape(D, rank)), size=shape(S, rank, False))
yield SampleInput(make_arg(shape(D, rank)), size=shape(L, rank, False))
yield SampleInput(make_arg(shape(D, rank)), scale_factor=1.7)
yield SampleInput(make_arg(shape(D, rank)), scale_factor=0.6)
def reference_inputs_upsample(mode, self, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_upsample(mode, self, device, dtype, requires_grad, **kwargs)
if mode in ('bilinear', ):
make_arg = partial(
make_tensor,
device=device,
dtype=dtype,
requires_grad=requires_grad,
# we pick more realistic upper bound 256 instead of default 10 for uint8 dtype
high=256 if dtype == torch.uint8 else None,
)
# provide a single sample for more typical image processing usage
for memory_format in [torch.contiguous_format, torch.channels_last]:
yield SampleInput(
make_arg((2, 3, 345, 456), memory_format=memory_format),
(270, 270),
)
def sample_inputs_upsample_aa(mode, self, device, dtype, requires_grad, **kwargs):
N = 6
C = 3
H = 10
W = 20
S = 3
L = 5
input_tensor = make_tensor(torch.Size([N, C, H, W]), device=device, dtype=dtype, requires_grad=requires_grad)
yield SampleInput(input_tensor, output_size=torch.Size([S, S]), align_corners=False, scale_factors=None)
yield SampleInput(input_tensor, output_size=torch.Size([L, L]), align_corners=False, scale_factors=None)
yield SampleInput(input_tensor, output_size=None, align_corners=False, scale_factors=[1.7, 0.9])
yield SampleInput(input_tensor, output_size=None, align_corners=True, scale_factors=[0.8, 1.0])
yield SampleInput(input_tensor, output_size=torch.Size([S, S]), align_corners=False, scales_h=None, scales_w=None)
yield SampleInput(input_tensor, output_size=torch.Size([S, S]), align_corners=False, scales_h=1.7, scales_w=0.9)
yield SampleInput(input_tensor, output_size=torch.Size([S, S]), align_corners=True, scales_h=1.7, scales_w=0.9)
def sample_inputs_gelu(self, device, dtype, requires_grad, **kwargs):
N = 5
for _ in range(1, N):
for approximate in ['none', 'tanh']:
yield SampleInput(
make_tensor((N * 2, N * 2), device=device, dtype=dtype,
requires_grad=requires_grad, low=-3, high=3),
approximate=approximate)
def error_inputs_gelu(op, device, **kwargs):
# Tests that gelu errors out when passed an approximation we don't know.
yield ErrorInput(SampleInput(make_tensor((), dtype=torch.float, device=device), kwargs={"approximate": "asdf"}),
error_regex="approximate argument must be either")
def sample_inputs_max_min_reduction_with_dim(op_info, device, dtype, requires_grad, **kwargs):
args_for_reduction_with_dim = (
((S, S, S), (1,),),
((S, S, S), (1, True, ),),
((), (0,),),
((), (0, True,),),
)
return ((SampleInput(make_tensor(input_tensor, dtype=dtype, device=device,
low=None, high=None,
requires_grad=requires_grad),
*args))
for input_tensor, args in args_for_reduction_with_dim)
def sample_inputs_max_min_reduction_no_dim(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad, low=None, high=None)
yield SampleInput(make_arg((S, S, S)))
yield SampleInput(make_arg(()))
def _generate_nan_reduction_inputs(device, dtype, requires_grad, **kwargs):
yield from _generate_reduction_inputs(device, dtype, requires_grad)
# NaN only exists for floating point numbers
if dtype.is_complex or dtype.is_floating_point:
yield torch.tensor([2, torch.nan, -1], device=device, dtype=dtype, requires_grad=requires_grad)
yield torch.tensor([[torch.nan, 2], [0, 1]], device=device, dtype=dtype, requires_grad=requires_grad)
def sample_inputs_nan_reduction(supports_multiple_dims):
# Generates sample inputs for reduction ops that contain the input tensor
# and dim and keepdim kwargs. If a reduction op needs to test additional
# args/kwargs then create a separate sample_inputs function
def fn(op_info, device, dtype, requires_grad, **kwargs):
for t in _generate_nan_reduction_inputs(device, dtype, requires_grad):
# Add case without dim and keepdim kwargs
yield SampleInput(t.clone().requires_grad_(requires_grad))
for kwargs in _generate_reduction_kwargs(t.ndim, supports_multiple_dims):
yield SampleInput(t.clone().requires_grad_(requires_grad), **kwargs)
return fn
def sample_inputs_reduction_quantile(op_info, device, dtype, requires_grad, **kwargs):
test_quantiles = (0.5, make_tensor((2,), dtype=dtype, device=device, low=0, high=1, requires_grad=requires_grad))
test_interpolations = ['linear', 'midpoint']
for quantiles in test_quantiles:
for t in _generate_reduction_inputs(device, dtype, requires_grad):
# Add case without dim and keepdim kwargs
input = t.clone().requires_grad_(requires_grad)
yield SampleInput(input, quantiles)
for kwargs in _generate_reduction_kwargs(t.ndim, supports_multiple_dims=False):
# Interpolation kwarg for now is only supported when providing both dim and keepdim
kwargs.setdefault('dim', 0)
kwargs.setdefault('keepdim', False)
for interpolation in test_interpolations:
kwargs['interpolation'] = interpolation
input = t.clone().requires_grad_(requires_grad)
yield SampleInput(input, quantiles, **kwargs)
def sample_inputs_reduction_count_nonzero(*args, **kwargs):
"""Sample inputs for count_nonzero"""
# count_nonzero does not support keepdim yet
for sample in sample_inputs_reduction(*args, **kwargs):
sample.kwargs.pop('keepdim', None)
yield sample
def sample_inputs_leaky_relu(op_info, device, dtype, requires_grad, **kwargs):
N = 10
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
return (SampleInput(make_arg((N, N))) for _ in range(1, N))
def sample_inputs_fractional_max_pool2d(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# Order: input_shape, kernel_size
cases = (((1, 3, 9, 9), 3),
((1, 3, 9, 9), (4, 4)),
((1, 3, 9, 9), (6, 6)),
((2, 3, 9, 9), (3, 3)),
((1, 1, 4, 4), (2, 2)),
((1, 2, 6, 6), (4, 4)))
for input_shape, kernel_size in cases:
for return_indices in [False, True]:
# test case passing a single output size
yield SampleInput(
make_arg(input_shape),
kernel_size,
output_size=2,
return_indices=return_indices,
)
# test case passing a tuple output size
yield SampleInput(
make_arg(input_shape),
kernel_size,
output_size=(2, 3),
return_indices=return_indices,
)
# test case passing an output ratio
yield SampleInput(
make_arg(input_shape),
kernel_size,
output_ratio=(0.5, 0.5),
return_indices=return_indices,
)
yield SampleInput(
make_arg((1, 1, 16, 16)),
(1, 1),
output_ratio=(0.5, 0.5),
return_indices=True,
_random_samples=make_tensor((1, 1, 2), device=device, dtype=dtype, requires_grad=False),
)
def sample_inputs_fractional_max_pool3d(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# Order: input_shape, kernel_size
cases = (((2, 3, 5, 5, 5), (2, 2, 2)),
((1, 2, 6, 5, 4), 2),
((1, 2, 5, 6, 5), (2, 3, 2)),
((1, 2, 6, 6, 6), (2, 3, 2)),
((1, 1, 7, 6, 7), (2, 3, 4)),
((1, 1, 4, 5, 4), (2, 2, 1)),
((1, 1, 8, 7, 6), (4, 3, 2)),
((0, 1, 4, 5, 4), (2, 2, 1)))
for input_shape, kernel_size in cases:
for return_indices in [False, True]:
# test case passing a single output size
yield SampleInput(
make_arg(input_shape),
kernel_size,
output_size=2,
return_indices=return_indices,
)
# test case passing a tuple output size
yield SampleInput(
make_arg(input_shape),
kernel_size,
output_size=(2, 3, 2),
return_indices=return_indices,
)
# test case passing an output ratio
yield SampleInput(
make_arg(input_shape),
kernel_size,
output_ratio=(0.5, 0.5, 0.5),
return_indices=return_indices,
)
def sample_inputs_avgpool2d(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# Order: input_shape, kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_override
cases = (((1, 3, 9, 9), 3, 1, 1, True, False, 2),
((1, 3, 9, 9), (4, 4), (2, 3), 1, True, False, 2),
((1, 3, 9, 9), (6, 6), (3, 3), (2, 3), True, True, 2),
((2, 3, 9, 9), (3, 3), (1, 1), (1, ), True, False, 2),
((1, 1, 4, 4), (2, 2), (), (0, ), False, True, -2),
((1, 2, 6, 6), (4, 4), (2, 2), (2, ), True, True, None))
for input_shape, kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_override in cases:
yield SampleInput(make_arg(input_shape),
args=(kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_override))
# Case with just input_shape and kernel_size
yield SampleInput(make_arg((1, 3, 9, 9)), args=((3, 3)))
def sample_inputs_avgpool1d(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# Order: input_shape, kernel_size, kwargs
cases: list[tuple[tuple[int, ...], Union[int, tuple[int, ...]], dict]] = [
((2, 3, 9), (3,), {}),
((1, 3, 9), 3, dict(stride=1, padding=1, ceil_mode=True, count_include_pad=False)),
((1, 3, 9), (6,), dict(stride=(3,), padding=(2,), ceil_mode=True, count_include_pad=True)),
((2, 3, 9), (3,), dict(stride=(1,), padding=(1,), ceil_mode=False, count_include_pad=True)),
((0, 3, 9), (6,), dict(stride=(3,), padding=(2,), ceil_mode=False, count_include_pad=True)),
((1, 2, 9), (7,), dict(stride=(3,), padding=(2,), ceil_mode=False)),
((1, 2, 9), (7,), dict(stride=(3,), padding=(3,), ceil_mode=True)),
((1, 2, 9), (7,), dict(stride=(3,), ceil_mode=False)),
((1, 2, 9), (7,), dict(stride=(3,), ceil_mode=True)),
]
for input_shape, kernel_size, kwargs in cases:
yield SampleInput(make_arg(input_shape), args=(kernel_size,), kwargs=kwargs)
def sample_inputs_avgpool3d(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# Order: input_shape, kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_override
cases: list[tuple[tuple[int, ...], Union[int, tuple[int, ...]], dict]] = [
((2, 3, 3, 4, 4), (2, 2, 2), {}),
((1, 2, 4, 4, 4), 2, dict(stride=1, padding=1, ceil_mode=True,
count_include_pad=False, divisor_override=2)),
((1, 2, 5, 5, 5), (2, 3, 4), dict(stride=(1, 2, 2), padding=(0, 1, 2), ceil_mode=True,
count_include_pad=True, divisor_override=2)),
((1, 2, 5, 5, 5), (2, 3, 4), dict(stride=(1, 2, 2), padding=(0, 1, 2), ceil_mode=False)),
((1, 1, 7, 5, 7), (6, 3, 4), dict(stride=(2, 3, 2), padding=(3, 1, 0), ceil_mode=False,
count_include_pad=False, divisor_override=2)),
((1, 1, 4, 5, 4), (2, 2, 3), dict(stride=(2, 2, 1), padding=0, ceil_mode=False,
count_include_pad=True, divisor_override=-2)),
((1, 1, 6, 5, 6), (4, 5, 6), dict(stride=(2, 3, 2), padding=2, ceil_mode=True,
count_include_pad=True, divisor_override=None)),
((0, 1, 4, 5, 4), (2, 3, 1), dict(stride=(2, 1, 2), padding=0, ceil_mode=False,
count_include_pad=True, divisor_override=None)),
]
for input_shape, kernel_size, kwargs in cases:
yield SampleInput(make_arg(input_shape), args=(kernel_size,), kwargs=kwargs)
def error_inputs_avg_pool1d(op_info, device, **kwargs):
# error inputs when pad is negative
x = torch.rand([0, 1, 49], dtype=torch.float32)
yield ErrorInput(SampleInput(x, kwargs={'kernel_size': 2, 'stride': 50, 'padding': -1}),
error_regex='pad must be non-negative')
# error inputs when pad > kernel_size / 2
yield ErrorInput(SampleInput(x, kwargs={'kernel_size': 2, 'stride': 50, 'padding': 4}),
error_regex='pad should be at most half of effective kernel size')
def error_inputs_avg_pool2d(op_info, device, **kwargs):
# error inputs when pad is negative
x = torch.rand([0, 1, 49], dtype=torch.float32)
yield ErrorInput(SampleInput(x, kwargs={'kernel_size': 2, 'stride': 50, 'padding': -1}),
error_regex='pad must be non-negative')
# 2-dimensional kernel
yield ErrorInput(SampleInput(x, kwargs={'kernel_size': (3, 2), 'stride': 50, 'padding': -1}),
error_regex='pad must be non-negative')
# error inputs when pad > kernel_size / 2
yield ErrorInput(SampleInput(x, kwargs={'kernel_size': 2, 'stride': 50, 'padding': 4}),
error_regex='pad should be at most half of effective kernel size')
# 2-dimensional kernel
yield ErrorInput(SampleInput(x, kwargs={'kernel_size': (3, 2), 'stride': 50, 'padding': 4}),
error_regex='pad should be at most half of effective kernel size')
# error inputs for zero divisor
x = torch.zeros(3, 3, 3)
yield ErrorInput(SampleInput(x, kwargs={'kernel_size': (2, 2), 'divisor_override': 0}),
error_regex='divisor must be not zero')
def error_inputs_avg_pool3d(op_info, device, **kwargs):
# error inputs when pad is negative
x = torch.rand([0, 1, 49, 50], dtype=torch.float32)
yield ErrorInput(SampleInput(x, kwargs={'kernel_size': 2, 'stride': 50, 'padding': -1}),
error_regex='pad must be non-negative')
# 3-dimensional kernel
yield ErrorInput(SampleInput(x, kwargs={'kernel_size': (3, 2, 2), 'stride': 50, 'padding': -1}),
error_regex='pad must be non-negative')
# error inputs when pad > kernel_size / 2
yield ErrorInput(SampleInput(x, kwargs={'kernel_size': 2, 'stride': 50, 'padding': 4}),
error_regex='pad should be at most half of effective kernel size')
# 3-dimensional kernel
yield ErrorInput(SampleInput(x, kwargs={'kernel_size': (3, 2, 2), 'stride': 50, 'padding': 4}),
error_regex='pad should be at most half of effective kernel size')
# error inputs for zero divisor
x = torch.zeros(3, 3, 3, 3)
yield ErrorInput(SampleInput(x, kwargs={'kernel_size': (2, 2, 2), 'divisor_override': 0}),
error_regex='divisor must be not zero')
# error inputs for invalid input dimension
x = torch.rand([0, 1, 49], dtype=torch.float32)
yield ErrorInput(SampleInput(x, kwargs={'kernel_size': 2, 'stride': 50, 'padding': 0}),
error_regex='non-empty 4D or 5D')
def sample_inputs_to(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# test_multiple_devices_to_cuda would fail if we use a different device than given
devices = [device]
if torch.device(device).type == 'cpu':
devices = [torch.device('cpu'), torch.device('cuda:0')] if torch.cuda.is_available() else devices
memory_formats = [torch.preserve_format, torch.channels_last]
# TODO: can't switch `to.device` overload to use positional arguments
# https://github.com/pytorch/pytorch/issues/84265
# to.device overload
for device, nb, cp, mem_f in product(devices, [True, False], [True, False], memory_formats):
kwargs = {
"memory_format": mem_f,
}
yield SampleInput(make_arg((S, S, S, S)), args=(device, torch.float64, nb, cp), kwargs=kwargs)
# to.dtype overload
for nb, cp, mem_f in product([True, False], [True, False], memory_formats):
kwargs = {
"memory_format": mem_f,
}
yield SampleInput(make_arg((S, S, S, S)), args=(torch.float64, nb, cp), kwargs=kwargs)
# to.other overload
for device, nb, cp, mem_f in product(devices, [True, False], [True, False], memory_formats):
kwargs = {
"memory_format": mem_f,
}
other = make_arg((S, S, S, S), dtype=torch.float64, device=device)
yield SampleInput(make_arg((S, S, S, S)), args=(other, nb, cp), kwargs=kwargs)
def sample_inputs_topk(op_info, device, dtype, requires_grad, **kwargs):
def get_tensor_input(size):
return make_tensor(size, dtype=dtype, device=device, requires_grad=requires_grad)
yield SampleInput(get_tensor_input((S, M, S)), 3)
yield SampleInput(get_tensor_input((S, M, S)), 3, 1)
yield SampleInput(get_tensor_input((S, M, S)), 3, -2)
yield SampleInput(get_tensor_input((S, M, S)), 3, 1, True)
yield SampleInput(get_tensor_input((S, M, S)), 3, -2, True)
yield SampleInput(get_tensor_input((S, M, S)), 3, 1, True, True)
yield SampleInput(get_tensor_input((S, M, S)), 3, -2, True, True)
yield SampleInput(get_tensor_input(()), 1)
yield SampleInput(get_tensor_input(()), 1, 0)
yield SampleInput(get_tensor_input(()), 1, -1)
yield SampleInput(get_tensor_input(()), 1, 0, True)
yield SampleInput(get_tensor_input(()), 1, -1, True)
yield SampleInput(get_tensor_input(()), 1, 0, True, True)
yield SampleInput(get_tensor_input(()), 1, -1, True, True)
def sample_inputs_outer(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
yield SampleInput(make_arg(S), make_arg(M))
def sample_inputs_dist(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
sizes = ((S, S, S), (S,), (S, 1, S), (), (S, S))
ps = (2, 4)
for size_x, size_y, p in product(sizes, sizes, ps):
yield SampleInput(make_arg(size_x), args=(make_arg(size_y), p))
# Missing to test the nondeterminism of the operation
# https://github.com/pytorch/pytorch/issues/53352
def sample_inputs_index(op_info, device, dtype, requires_grad, reference=False, **kwargs):
# target.index_add(dim, idx, source, *, alpha=1)
add = "index_add" in op_info.name
# target.index_copy(dim, idx, source)
copy = "index_copy" in op_info.name
# target.index_fill(dim, idx, value)
fill = "index_fill" in op_info.name
# Extended reference inputs. We generate that exercise atomic adds / writing
# several times to one location
if reference:
make_arg = partial(torch.ones, device=device, dtype=dtype, requires_grad=requires_grad)
make_idx = partial(torch.zeros, device=device, dtype=torch.int64)
else:
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# idx They need to be different for copy and add to be deterministic
if copy or add:
make_idx = partial(torch.randperm, device=device, dtype=torch.int64)
else:
def make_idx(n):
return make_tensor((n,), device=device, dtype=torch.int64, low=0, high=n)
shapes = [(), (1,), (S, S)]
# extra parameter for add
if add:
if dtype == torch.bool:
alphas = (True, False)
else:
alphas = (-1, 0, 2)
else:
alphas = (None,)
if fill:
# A weird number to catch errors.
# The former one tests `index_fill.int_Scalar`, and the latter one tests `index_fill.int_Tensor`.
values = (make_arg((1,)).item(), make_arg(()))
else:
values = (None,)
for shape, alpha, value in product(shapes, alphas, values):
t = make_arg(shape)
args = []
# dim. We handle the scalar case
dim = -1 if t.ndim == 2 else 0
args.append(dim)
idx = make_idx(t.shape[dim] if t.ndim != 0 else 1)
args.append(idx)
# source
if copy or add:
args.append(make_arg(shape))
elif fill:
args.append(value)
args = tuple(args)
kwargs = {} if alpha is None else {"alpha": alpha}
yield SampleInput(t, args=args, kwargs=kwargs)
def sample_inputs_index_reduce(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
def make_idx(n, m):
return make_tensor((n,), device=device, dtype=torch.int64, low=0, high=m)
shapes = [((), ()), ((1,), (1,)), ((S, S), (S, M)), ((S, S, S), (S, M, S))]
include_selfs = (True, False)
reduce = op_info.variant_test_name
assert reduce in ('prod', 'mean', 'amin', 'amax')
for shape, include_self in product(shapes, include_selfs):
self_shape, src_shape = shape
# dim. We handle the scalar case
dim = 1 if len(self_shape) >= 2 else 0
idx = make_idx(src_shape[dim] if len(src_shape) != 0 else 1,
self_shape[dim] if len(self_shape) != 0 else 1)
args = (dim, idx, make_arg(src_shape), reduce)
yield SampleInput(make_arg(self_shape),
args=args,
kwargs={'include_self' : include_self})
# Sample inputs to test edge cases for backward
if requires_grad and reduce == 'prod':
# Check that gradients are propagated correctly for prod when zeros in self/src are reduced
# This sample tests gradients for the following cases
# (a) 1 zero reduced (from source (self[0, 1]), from self (self[0, 0]))
# (b) 2 zeros reduced (1 from src and 1 from self (self[1, 0], self[1, 1])
# (c) no zeros reduced (self[2, 1], self[2, 2])
# (d) 2 zeros reduced (both from src) is tested in test/test_autograd.py
# test_scatter_index_reduce_prod_gradgrad_error as this case is not supported for gradgrad
input = torch.tensor([[0, 13], [0, 0], [15, 19]], dtype=dtype, device=device, requires_grad=requires_grad)
src = torch.tensor([[2, 0], [0, 0], [2, 3], [2, 2]], dtype=dtype, device=device, requires_grad=requires_grad)
idx = torch.tensor([0, 1, 2, 0], dtype=torch.long, device=device)
yield SampleInput(input,
args=(0, idx, src, reduce),
kwargs={'include_self': True})
def sample_inputs__unsafe_masked_index(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
def make_idx(n, m, dim, d):
view_shape = [1] * dim
view_shape[d] = n
return make_tensor((n,), device=device, dtype=torch.int64, low=0, high=m).view(view_shape)
cases = [
((S, S), S, M),
((S, S), M, S),
((S, S, S), S, M),
]
fill_value = make_tensor([], dtype=dtype, device="cpu").item()
for c in cases:
self_shape, high, idx_size = c
dim = len(self_shape)
indices = [make_idx(idx_size, high, dim, d) for d in range(dim)]
masks = [torch.logical_and(idx >= 0, idx < self_shape[i]) for i, idx in enumerate(indices) if idx is not None]
mask = functools.reduce(torch.logical_and, masks)
yield SampleInput(make_arg(self_shape), mask, indices, fill_value)
masks = [torch.logical_and(idx >= 1, idx < self_shape[i] - 1) for i, idx in enumerate(indices) if idx is not None]
mask = functools.reduce(torch.logical_and, masks)
yield SampleInput(make_arg(self_shape), mask, indices, fill_value)
def sample_inputs__unsafe_masked_index_put_accumulate(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
def make_idx(n, m, dim, d):
view_shape = [1] * dim
view_shape[d] = n
return make_tensor((n,), device=device, dtype=torch.int64, low=0, high=m).view(view_shape)
cases = [
((S, S), S, (M, M)),
((S, S), M, (S, S + 1)),
((S, S, S), S, (M, M - 1, M + 1)),
]
for c in cases:
self_shape, high, idx_sizes = c
dim = len(self_shape)
indices = [make_idx(idx_sizes[d], high, dim, d) for d in range(dim)]
masks = [torch.logical_and(idx >= 0, idx < self_shape[i]) for i, idx in enumerate(indices) if idx is not None]
mask = functools.reduce(torch.logical_and, masks)
values = make_arg(idx_sizes)
yield SampleInput(make_arg(self_shape), mask, indices, values)
masks = [torch.logical_and(idx >= 1, idx < self_shape[i] - 1) for i, idx in enumerate(indices) if idx is not None]
mask = functools.reduce(torch.logical_and, masks)
yield SampleInput(make_arg(self_shape), mask, indices, values)
def sample_inputs_mode(op_info, device, dtype, requires_grad, **kwargs):
args = (
((S, S, S), (),),
((S, S, S), (1, ),),
((S, S, S), (1, True, ),),
((), (),),
((), (0,),),
((), (0, True,),),
# Non-fused mode kernel on CUDA
((3000,), ()),
)
make_arg = partial(make_tensor, dtype=dtype, device=device,
requires_grad=requires_grad, low=None, high=None)
return (SampleInput(make_arg(input_tensor), *args)
for input_tensor, args in args)
# Missing to test the nondeterminism of the operation
# https://github.com/pytorch/pytorch/issues/53352
def sample_inputs_put(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
make_idx = partial(make_tensor, low=0, dtype=torch.int64, device=device, requires_grad=False)
S = 3
# Generic inputs
idx = torch.randperm(S * S, device=device, dtype=torch.int64)[:S]
idx_list = [idx, -idx - 1]
for idx, acc in product(idx_list, (True, False)):
yield SampleInput(input=make_arg((S, S)),
args=(idx.clone(),
make_arg((S,)),
acc))
# Scalar cases
scalar_sizes = [(), (1,)]
tgt_gen = (make_arg(size) for size in scalar_sizes)
idx_gen = (make_idx(size, high=1) for size in scalar_sizes)
src_gen = (make_arg(size) for size in scalar_sizes)
for tgt, idx, src, acc in product(tgt_gen, idx_gen, src_gen, (True, False)):
yield SampleInput(input=tgt.clone().requires_grad_(requires_grad),
args=(idx.clone(),
src.clone().requires_grad_(requires_grad),
acc))
# Empty cases
tgt_sizes = [(0,), (), (1,), (3, 2)]
tgt_gen = (make_arg(size) for size in tgt_sizes)
idx = make_idx((0,), high=1)
src = make_arg((0,))
for tgt, acc in product(tgt_gen, (True, False)):
yield SampleInput(input=tgt.clone().requires_grad_(requires_grad),
args=(idx.clone(),
src.clone().requires_grad_(requires_grad),
acc))
def sample_inputs_take(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
make_idx = partial(make_tensor, low=0, dtype=torch.int64, device=device, requires_grad=False)
S = 3
# Generic inputs: take S elements out of S * S
index = make_idx((S,), high=(S * S))
for idx in (index, -index - 1):
yield SampleInput(input=make_arg((S, S)), args=(idx,))
# Scalar cases
scalar_sizes = [(), (1,)]
src_gen = (make_arg(size) for size in scalar_sizes)
idx_gen = (make_idx(size, high=1) for size in scalar_sizes)
for src, idx in product(src_gen, idx_gen):
yield SampleInput(input=src.clone().requires_grad_(requires_grad),
args=(idx.clone(),))
# Empty cases
src_sizes = [(0,), (), (1,), (3, 2)]
src_gen = (make_arg(size) for size in src_sizes)
idx = make_idx((0,), high=1)
for src in src_gen:
yield SampleInput(input=src.clone().requires_grad_(requires_grad),
args=(idx.clone(),))
def sample_movedim_moveaxis(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, low=None, high=None, requires_grad=requires_grad)
yield SampleInput(make_arg((4, 3, 2, 1)), [0, 1, 2, 3], [3, 2, 1, 0])
yield SampleInput(make_arg((4, 3, 2, 1)), [0, -1, -2, -3], [-3, -2, -1, -0])
def reference_movedim_moveaxis(op_info, device, dtype, requires_grad, **kwargs):
yield from sample_movedim_moveaxis(op_info, device, dtype, requires_grad, **kwargs)
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# shape, source, destination
args = (
# empty inputs
((), (), ()),
# int inputs, negative
((3, 5, 7, 2), -2, 1),
# swap bounds
((3, 5, 7, 2), (-1, 0), (0, -1)),
# non-sequential, negative
((2, 3, 4, 5, 6), (3, -3, 4), (1, 0, -1)),
# idempotence, negative
((2, 3, 4, 5, 6), (-3, 4, 3, 1), (-3, 4, 3, 1)),
# reverse, sequential, positive
((6, 2, 3, 5, 4), (4, 3, 2, 1, 0), (0, 1, 2, 3, 4)),
# reverse, non-sequential
((6, 2, 3, 5, 4), (-3, -2, -4, -5, -1), (2, 1, 3, 4, 0)),
# reverse, sequential, negative
((6, 2, 3, 5, 4), (4, -2, 2, -4, -5), (-5, 1, 2, -2, -1)),
)
for shape, source, destination in args:
yield SampleInput(make_arg(shape), args=(source, destination))
def error_movedim_moveaxis(op_info, device, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=torch.float32)
# source length < destination length
yield ErrorInput(
SampleInput(make_arg(2, 3, 4, 5, 6), args=((3, -3), (1, 0, -1))),
error_regex=(r"movedim: Invalid source or destination dims: source "
r"\(\[3, -3\] dims\) should contain the same number of "
r"dims as destination \(\[1, 0, -1\] dims\)"),
)
# source length > destination length
yield ErrorInput(
SampleInput(make_arg(2, 3, 4, 5, 6), args=((3, -3, 4), (1, 0))),
error_regex=(r"movedim: Invalid source or destination dims: source "
r"\(\[3, -3, 4\] dims\) should contain the same number of "
r"dims as destination \(\[1, 0\] dims\)"),
)
# repeated source dim, with negative indices
yield ErrorInput(
SampleInput(make_arg(2, 3, 4, 5, 6), args=((0, 4, -5), (1, 0, 2))),
error_regex=r"movedim: repeated dim in `source` \(\[0, 4, -5\]\)",
)
# repeated destination dim, with negative indices
yield ErrorInput(
SampleInput(make_arg(2, 3, 4, 5, 6), args=((1, 0, 2), (0, 4, -5))),
error_regex=r"movedim: repeated dim in `destination` \(\[0, 4, -5\]\)",
)
# repeated dim (both), with negative indices
yield ErrorInput(
SampleInput(make_arg(2, 3, 4, 5, 6), args=((1, 0, -4), (0, 4, -5))),
error_regex=r"movedim: repeated dim in `source` \(\[1, 0, -4\]\)",
)
# out of bounds source inputs, with negative indices
yield ErrorInput(
SampleInput(make_arg(2, 3, 4, 5, 6), args=((0, 1, -6), (1, 4, 2))),
error_regex=r"Dimension out of range \(expected to be in range of \[-5, 4\], but got -6\)",
error_type=IndexError,
)
# out of bounds destination inputs, with negative indices
yield ErrorInput(
SampleInput(make_arg(2, 3, 4, 5, 6), args=((1, 4, 2), (0, 1, -6))),
error_regex=r"Dimension out of range \(expected to be in range of \[-5, 4\], but got -6\)",
error_type=IndexError,
)
# out of bounds source input, int
yield ErrorInput(
SampleInput(make_arg(2, 3, 4, 5, 6), args=(-6, 1)),
error_regex=r"Dimension out of range \(expected to be in range of \[-5, 4\], but got -6\)",
error_type=IndexError,
)
# out of bounds destination input, int
yield ErrorInput(
SampleInput(make_arg(2, 3, 4, 5, 6), args=(3, -6)),
error_regex=r"Dimension out of range \(expected to be in range of \[-5, 4\], but got -6\)",
error_type=IndexError,
)
def sample_repeat_tile(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
rep_dims = ((), (0, ), (1, ), (0, 2), (1, 1), (2, 3), (2, 3, 2), (0, 2, 3), (2, 1, 1, 1),)
shapes = ((), (0,), (2,), (3, 0), (3, 2), (3, 0, 1))
if requires_grad:
# Tests for variant_consistency_jit, grad, gradgrad
# are slower. Use smaller bags of `rep_dims` and `shapes`
# in this case.
rep_dims = ((), (0, ), (0, 2), (1, 1), (2, 3), (1, 3, 2), (3, 1, 1)) # type: ignore[assignment]
shapes = ((), (0,), (2,), (3, 2)) # type: ignore[assignment]
is_repeat_op = op_info.name in ['repeat', '_refs.repeat']
for rep_dim, shape in product(rep_dims, shapes):
# `torch.repeat` errors for `len(rep_dims) < t.dim()`,
# so we filter such combinations.
if is_repeat_op and len(rep_dim) < len(shape):
continue
yield SampleInput(make_arg(shape), rep_dim)
def sample_inputs_narrow_narrow_copy(op_info, device, dtype, requires_grad, *, is_narrow, **kwargs):
shapes_and_args = (
((S, S, S), 1, 2, 2),
((S, S, S), -1, 2, 2),
((S, S, S), 1, 0, 0),
((S, S, S), -1, 0, 0),
((S, S, S), 2, 1, 2),
)
for shape, dim, start, length in shapes_and_args:
tensor = make_tensor(shape, dtype=dtype, device=device, low=None, high=None,
requires_grad=requires_grad)
yield SampleInput(tensor, dim, start, length)
# narrow also accepts the start argument being a Tensor
if is_narrow:
yield SampleInput(tensor, dim, torch.tensor(start), length)
def reference_inputs_narrow_narrow_copy(op_info, device, dtype, requires_grad, *, is_narrow, **kwargs):
yield from sample_inputs_narrow_narrow_copy(op_info, device, dtype, requires_grad, is_narrow=is_narrow, **kwargs)
shapes_and_args = (
# 1-dim
((M,), 0, 0, 0), # 0 elems from the left
((M,), -1, -1, 0), # 0 elems from the right
((M,), 0, 5, 3), # 3 elems from the left
((M,), 0, -5, 2), # 2 elems from the right
((M,), -1, 0, M), # M elems from the left
((M,), 0, -M, M), # M elems from the right
# 2-dim
((M, S), 1, 0, 0), # dim 1, 0 elems from the left
((S, M), -2, -1, 0), # dim 0, 0 elems from the right
((L, S), 1, 2, 3), # dim 1, 3 elems from the left
((L, S), -1, 3, 2), # dim 1, 2 elems from the left
((M, L), 0, 0, M), # dim 0, M elems from the left
((M, L), -1, -L, L), # dim 1, L elems from the right
# 3-dim
((L, M, S), 2, 0, 0), # dim 2, 0 elems from the left
((M, S, L), -1, -1, 0), # dim 2, 0 elems from the right
((S, L, M), 2, 0, M), # dim 2, M elems from the left
((L, S, M), -1, -M, M), # dim 2, M elems from the right
((S, L, M), 1, 0, 0), # dim 1, 0 elems from the left
((S, L, M), 0, 2, 1), # dim 0, 1 elem from the left
((M, S, M), -1, -5, 4), # dim 2, 4 elems from the right
)
for shape, dim, start, length in shapes_and_args:
tensor = make_tensor(shape, dtype=dtype, device=device, low=None, high=None,
requires_grad=requires_grad)
yield SampleInput(tensor, dim, start, length)
# narrow also accepts the start argument being a Tensor
if is_narrow:
yield SampleInput(tensor, dim, torch.tensor(start), length)
def error_inputs_narrow_narrow_copy(op_info, device, *, is_narrow, is_ref):
make_arg = partial(make_tensor, device=device, dtype=torch.float32)
# 0-dim
yield ErrorInput(SampleInput(make_arg(()), 0, 0, 1),
error_type=RuntimeError,
error_regex=r"narrow\(\) cannot be applied to a 0-dim tensor\.")
# out of bounds dim
if not is_narrow and not is_ref and torch.device(device).type == 'cpu':
# narrow_copy_dense_cpu_out
yield ErrorInput(SampleInput(make_arg((M, S, L)), 3, 0, 0),
error_type=RuntimeError,
error_regex=r"Expected dim < static_cast<int64_t>\(self_sizes.size\(\)\) to be true, but got false\.")
else:
yield ErrorInput(SampleInput(make_arg((M, S, L)), 3, 0, 0),
error_type=IndexError,
error_regex=r"Dimension out of range \(expected to be in range of \[-3, 2\], but got 3\)")
# out of bounds dim (negative)
yield ErrorInput(SampleInput(make_arg((L, S, M)), -4, 0, 0),
error_type=IndexError,
error_regex=r"Dimension out of range \(expected to be in range of \[-3, 2\], but got -4\)")
# out of bounds start
yield ErrorInput(SampleInput(make_arg((L, M, S)), 1, M + 1, 0),
error_type=IndexError,
error_regex=r"start out of range \(expected to be in range of \[-10, 10\], but got 11\)")
# out of bounds start (negative)
yield ErrorInput(SampleInput(make_arg((L, M, S)), 1, -M - 1, 0),
error_type=IndexError,
error_regex=r"start out of range \(expected to be in range of \[-10, 10\], but got -11\)")
# out of bounds length
yield ErrorInput(SampleInput(make_arg((S, L, M)), 2, 0, M + 1),
error_type=RuntimeError,
error_regex=r"start \(0\) \+ length \(11\) exceeds dimension size \(10\)\.")
# out of bounds length (negative)
if not is_narrow and not is_ref and torch.device(device).type == 'cpu':
# narrow_copy_dense_cpu_out
yield ErrorInput(SampleInput(make_arg((M,)), 0, 0, -1),
error_type=RuntimeError,
error_regex=r"start \(0\) \+ length \(-1\) exceeds dimension size \(10\)\.")
else:
yield ErrorInput(SampleInput(make_arg((M,)), 0, 0, -1),
error_type=RuntimeError,
error_regex=r"narrow\(\): length must be non-negative\.")
# Test Tensor overload that was added for XLA. Start must be an 0-dim
# integral Tensor. narrow_copy doesn't have this overload.
# https://github.com/pytorch/pytorch/issues/31558
if is_narrow:
# *1-dim* integral Tensor
yield ErrorInput(SampleInput(make_arg((L, M, S)), 1, make_arg(S, dtype=torch.int), 2),
error_type=RuntimeError,
error_regex=r"start must be an 0-dim integral Tensor\.")
# 0-dim *bool* Tensor (bools are not allowed)
yield ErrorInput(SampleInput(make_arg((L, M, S)), -3, make_arg((), dtype=torch.bool), 3),
error_type=RuntimeError,
error_regex=r"start must be an 0-dim integral Tensor\.")
def sample_trapezoid(op_info, device, dtype, requires_grad, **kwargs):
y_shape_x_shape_and_kwargs = [
((2, 3), (2, 3), {}),
((2, 3), (2, 3), {'dim': 1}),
((6,), (6,), {}),
((6,), None, {}),
# When 'trapezoid' is called with an empty input, it does not produce an output with requires_grad
# See Issue #{61619}
# ((6,0), (6,0), {}),
((2, 3), (1, 3), {}),
((3, 3), (3, 3), {}),
((3, 3), (3, 3), {'dim': -2}),
((5,), None, {'dx': 2.0}),
((2, 2), None, {'dx': 3.0})
]
make_arg = partial(make_tensor, dtype=dtype, device=device, low=None, high=None,
requires_grad=requires_grad)
for y_shape, x_shape, kwarg in y_shape_x_shape_and_kwargs:
y_tensor = make_arg(y_shape)
if x_shape is not None:
x_tensor = make_arg(x_shape)
yield SampleInput(y_tensor, x_tensor, **kwarg)
else:
yield SampleInput(y_tensor, **kwarg)
def sample_cumulative_trapezoid(op_info, device, dtype, requires_grad, **kwargs):
y_shape_x_shape_and_kwargs = [
((2, 3), (2, 3), {}),
((2, 3), (2, 3), {'dim': 1}),
((6,), (6,), {}),
((6,), None, {}),
# When 'cumulative_trapezoid' is called with an empty input, it does not produce an output with requires_grad
# See Issue #{61619}
# ((6,0), (6,0), {}),
((2, 3), (1, 3), {}),
((3, 3), (3, 3), {}),
((3, 3), (3, 3), {'dim': -2}),
((5,), None, {'dx': 2.0}),
((2, 2), None, {'dx': 3.0})
]
make_arg = partial(make_tensor, device=device, dtype=dtype,
requires_grad=requires_grad, low=None, high=None)
for y_shape, x_shape, kwarg in y_shape_x_shape_and_kwargs:
y_tensor = make_arg(y_shape)
if x_shape is not None:
x_tensor = make_arg(x_shape)
yield SampleInput(y_tensor, x_tensor, **kwarg)
else:
yield SampleInput(y_tensor, **kwarg)
def sample_unsqueeze(op_info, device, dtype, requires_grad, **kwargs):
shapes_and_axes = [
((3, 4, 5), 0),
((3, 4, 5), 1),
((3, 4, 5), 3),
((3, 4, 5), -1),
((3, 4, 5), -3),
((), 0),
((), -1),
((1,), 0),
((1,), -1),
]
for shape, axis in shapes_and_axes:
tensor = make_tensor(shape, dtype=dtype, device=device, low=None, high=None,
requires_grad=requires_grad)
yield SampleInput(tensor, axis)
def sample_inputs_nn_unfold(op_info, device, dtype, requires_grad, **kwargs):
shapes = ((0, 1, 5, 5), (2, 3, 5, 5))
kernel_sizes = (2, (2, 2), (2, 3))
dilations = (1, 2, (1, 2))
paddings = (0, 1, (1, 2))
strides = (1, 2, (1, 2))
cases = product(shapes, kernel_sizes, dilations, paddings, strides)
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
for shape, kernel_size, dilation, padding, stride in cases:
tensor = make_arg(shape)
yield SampleInput(tensor, kernel_size, dilation, padding, stride)
# With default args
yield SampleInput(make_arg((1, 1, 5, 5)), (3, 3))
def sample_inputs_squeeze(op_info, device, dtype, requires_grad, **kwargs):
shapes_and_args = (
((S, 1, S, 1), ()),
((1, 1, 1, 1), ()),
((1, 1, 1, 1), (0,)),
((S, 1, S, 1), (1,)),
((S, 1, S, 1), (-1,)),
((S, 1, S, 1), (2,)),
((S, 1, S, 1), (-2,)),
((), (0, )),
)
for shape, args in shapes_and_args:
tensor = make_tensor(shape, dtype=dtype, device=device, low=None, high=None,
requires_grad=requires_grad)
yield SampleInput(tensor, args=args)
def sample_inputs_squeeze_multiple(op_info, device, dtype, requires_grad, **kwargs):
shapes_and_args = (
((1, 1, 1, 1), ()),
((S, 1, S, 1), (1,)),
((S, 1, S, 1), (-1,)),
((S, 1, S, 1), (1, 3)),
((S, 1, S, 1), (1, 2,)),
((), (0,)),
)
for shape, dims in shapes_and_args:
tensor = make_tensor(shape, dtype=dtype, device=device, low=None, high=None,
requires_grad=requires_grad)
yield SampleInput(tensor, dims)
def _squeeze_ref(x, axis=None):
# NumPy doesn't allow squeezing scalars
if x.ndim == 0:
return x
if isinstance(axis, Sequence):
# Numpy doesn't allow specifying non-singular dimensions
axis = tuple(a for a in axis if x.shape[a] == 1)
if isinstance(axis, int) and x.shape[axis] != 1:
return x
return np.squeeze(x, axis)
def sample_inputs_nn_pad(op_info, device, dtype, requires_grad, mode, **kwargs):
assert mode in ('constant', 'reflect', 'replicate', 'circular')
if mode in ['reflect', 'replicate']:
cases: tuple = ( # ignore
((1, 3), (1, 2)),
((1, 3), (0, 1)),
((0, 3, 3), (1, 2)),
((0, 3, 3), (0, 1)),
((1, 3, 3), (1, 2)),
((1, 3, 3), (0, 1)),
((1, 3, 3), (0, 2, 0, 1)),
((0, 3, 3, 3), (0, 2, 0, 1)),
((3, 3, 5, 5), (0, 2, 0, 1)),
((3, 3, 5, 5), (1, 1, 1, 1, 1, 1)),
((1, 3, 3, 3, 3), (1, 1, 1, 1, 1, 1)),
((1, 3, 4, 4), (-1, 1, -2, 1)),
)
elif mode == 'constant':
cases = (
((1, 3), (1, 2)),
((1, 3), (0, 1)),
((1, 3), (0, 2, 0, 1)),
((0, 3, 3), (1, 2)),
((0, 3, 3), (0, 1)),
((0, 3, 3), (0, 2, 0, 1)),
((0, 3, 3), (1, 1, 1, 1, 1, 1)),
((1, 3, 3), (1, 2)),
((1, 3, 3), (0, 1)),
((1, 3, 3), (0, 2, 0, 1)),
((1, 3, 3), (1, 1, 1, 1, 1, 1)),
((0, 3, 3, 3), (1, 2)),
((0, 3, 3, 3), (0, 1)),
((0, 3, 3, 3), (0, 2, 0, 1)),
((0, 3, 3, 3), (1, 1, 1, 1, 1, 1)),
((3, 3, 5, 5), (1, 2)),
((3, 3, 5, 5), (0, 1)),
((3, 3, 5, 5), (0, 2, 0, 1)),
((3, 3, 5, 5), (1, 1, 1, 1, 1, 1)),
((1, 3, 3, 3, 3), (1, 2)),
((1, 3, 3, 3, 3), (0, 1)),
((1, 3, 3, 3, 3), (0, 2, 0, 1)),
((1, 3, 3, 3, 3), (1, 1, 1, 1, 1, 1)),
((1, 3, 4, 4), (-1, 1, -2, 1)),
)
else: # mode == 'circular'
if dtype == torch.bool:
# test_dtypes fails on ASAN with for the case ab
# runtime error: load of value 190, which is not a valid value for type 'bool'
# Reference: https://github.com/pytorch/pytorch/pull/62814#issuecomment-894156562
# Reference Issue: https://github.com/pytorch/pytorch/issues/63034
cases = (
((2, 3, 3), (1, 2)),
((1, 3, 3), (1, 2)),
)
else:
cases = (
((0, 3, 3), (1, 2)),
((0, 3, 3), (0, 1)),
((1, 3, 3), (1, 2)),
((1, 3, 3), (0, 1)),
((0, 3, 3, 3), (0, 2, 0, 1)),
((3, 3, 5, 5), (0, 2, 0, 1)),
((1, 3, 3, 3, 3), (1, 1, 1, 1, 1, 1)),
((1, 3, 4, 4), (-1, 1, -2, 1)),
)
make_inp = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
if mode == 'constant':
# Default args
yield SampleInput(make_inp((1, 3, 3)), args=((2, 2),))
if mode in ['reflect', 'replicate', 'circular']:
for shape, pad in cases:
yield SampleInput(make_inp(shape), args=(pad, mode))
else: # mode == 'constant'
for pad_value in (1., 2.):
for shape, pad in cases:
yield SampleInput(make_inp(shape), args=(pad, mode, pad_value))
def sample_inputs_nn_pad_replicate_negative(op_info, device, dtype, requires_grad, **kwargs):
cases: tuple = (
((5, 3, 4, 4), (-4, 5, 0, 0)),
((6, 2, 4, 4), (0, 0, 2, -4)),
((5, 6, 4, 4), (5, -4, -4, 3)),
((4, 2, 5, 5), (-2, -1, 4, 6)),
((2, 6, 5, 5), (8, -1, -1, -3)),
((8, 1, 5, 5), (-2, -1, -1, -3)),
)
make_inp = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
for shape, pad in cases:
yield SampleInput(make_inp(shape), args=(pad, 'replicate'))
def sample_inputs_constant_pad_nd(op_info, device, dtype, *args, **kwargs):
# Inherit sample inputs from nn.pad, but transform them to fit
# constant_pad_nd's interface
nn_samples = sample_inputs_nn_pad(op_info, device, dtype, *args,
mode='constant', **kwargs)
# NOTE: primTorch is more strict about the type of the fill value argument
# So we must cast it to the correct dtype
from torch._prims_common import dtype_to_type
scalar_type = dtype_to_type(dtype)
def drop_mode_argument(input, pad, mode=None, value=None):
if value is None:
return SampleInput(input, args=(pad,))
else:
return SampleInput(input, args=(pad, scalar_type(value)))
for sample in nn_samples:
yield drop_mode_argument(sample.input, *sample.args, **sample.kwargs)
def sample_inputs_repeat_interleave(op_info, device, dtype, requires_grad, **kwargs):
make_input = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
yield SampleInput(make_input(()), repeats=2)
yield SampleInput(make_input((2, 3, 4)), repeats=2)
yield SampleInput(make_input((2, 3, 4)), repeats=2, dim=1)
yield SampleInput(make_input((2, 3, 4)), repeats=torch.arange(3, device=device), dim=1)
def sample_inputs_stft(op_info, device, dtype, requires_grad, **kwargs):
def mt(shape, **kwargs):
return make_tensor(shape, device=device, dtype=dtype,
requires_grad=requires_grad, **kwargs)
yield SampleInput(mt(100), n_fft=10, return_complex=True)
yield SampleInput(mt(100), n_fft=10, return_complex=False)
if dtype.is_complex:
yield SampleInput(mt(100), n_fft=10)
for center in [False, True]:
yield SampleInput(mt(10), n_fft=7, center=center, return_complex=True)
yield SampleInput(mt((10, 100)), n_fft=16, hop_length=4,
center=center, return_complex=True)
window = mt(16, low=.5, high=2.0)
yield SampleInput(
mt((2, 100)), kwargs=dict(n_fft=16, window=window, return_complex=True, center=center))
yield SampleInput(
mt((3, 100)), kwargs=dict(n_fft=16, window=window, return_complex=True, center=center))
if not dtype.is_complex:
yield SampleInput(
mt((10, 100)), n_fft=16, window=window, onesided=False,
return_complex=True)
def sample_inputs_istft(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
def mt(shape, **kwargs):
real_shape = shape if dtype.is_complex else shape + (2,)
return make_arg(real_shape, **kwargs)
yield SampleInput(mt((10, 2)), kwargs=dict(n_fft=10))
yield SampleInput(mt((6, 3)), kwargs=dict(n_fft=6, onesided=False))
yield SampleInput(mt((6, 4)), kwargs=dict(n_fft=10, onesided=True))
for center in [False, True]:
yield SampleInput(mt((10, 10, 6)), kwargs=dict(n_fft=10, center=center))
yield SampleInput(mt((1, 9, 10)), kwargs=dict(n_fft=16, hop_length=4, center=center))
window = make_arg(10, low=.5, high=2.0)
yield SampleInput(mt((10, 10, 6)), kwargs=dict(
n_fft=10, window=window, center=center, return_complex=dtype.is_complex))
yield SampleInput(mt((10, 10, 10)), kwargs=dict(
n_fft=10, window=window[:8], win_length=8, center=center, return_complex=True))
real_window = window if not dtype.is_complex else window.real
yield SampleInput(mt((10, 5, 6)), kwargs=dict(n_fft=8, window=real_window[:8], center=center))
def sample_inputs_ormqr(op_info, device, dtype, requires_grad, **kwargs):
# create a helper function wrapping `make_tensor`
make_input = partial(make_tensor, dtype=dtype, device=device, low=-1, high=1)
batches = [(), (0, ), (2, ), (2, 1)]
ns = [5, 2, 0]
tf = [True, False]
for batch, (m, n), left, transpose in product(batches, product(ns, ns), tf, tf):
input = make_input((*batch, m, n))
reflectors, tau = torch.geqrf(input)
reflectors.requires_grad_(requires_grad)
tau.requires_grad_(requires_grad)
other_matrix_shape = (m, n) if left else (n, m)
other = make_input((*batch, *other_matrix_shape), requires_grad=requires_grad)
yield SampleInput(reflectors, tau, other, left=left, transpose=transpose)
def sample_inputs_cholesky_solve(op_info, device, dtype, requires_grad=False, **kwargs):
cholesky_inverse_samples = sample_inputs_linalg_cholesky_inverse(
op_info, device, dtype, requires_grad=False
)
for sample in cholesky_inverse_samples:
psd_matrix = sample.input
sample.input = make_tensor(psd_matrix.shape, dtype=dtype, device=device, requires_grad=requires_grad, low=None, high=None)
sample.args = (psd_matrix.requires_grad_(requires_grad),)
yield sample
def sample_inputs_lu(op_info, device, dtype, requires_grad=False, **kwargs):
make_arg = partial(make_fullrank_matrices_with_distinct_singular_values,
dtype=dtype, device=device, requires_grad=requires_grad)
# not needed once OpInfo tests support Iterables
batch_shapes = ((), (3,), (3, 3))
for batch_shape, get_infos, size_delta in product(batch_shapes, (True, False), (-2, -1, 0, +1, +2)):
shape = batch_shape + (S + size_delta, S)
input = make_arg(*shape)
yield SampleInput(input, args=(True, get_infos))
def sample_inputs_lu_unpack(op_info, device, dtype, requires_grad=False, **kwargs):
def out_fn(output):
return output[1], output[2]
for lu_sample in sample_inputs_linalg_lu(op_info, device, dtype, requires_grad, **kwargs):
lu_data, pivots = torch.linalg.lu_factor(lu_sample.input)
lu_data.requires_grad_(requires_grad)
yield SampleInput(lu_data, pivots).with_metadata(output_process_fn_grad=out_fn)
def sample_inputs_roll(op_info, device, dtype, requires_grad=False, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
args = ((0, 0), (1, 2), (0, 2), (2, 0), (-1, 0), (10000, 1), (2,), ((1, 2, -1), (0, 1, 2)))
for arg in args:
yield SampleInput(make_arg((0, 0, 0)), args=arg)
yield SampleInput(make_arg((S, S, S)), args=arg)
# Scalar tensor
yield SampleInput(make_arg(()), args=(10, ))
def error_inputs_roll(op_info, device, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=torch.float32)
err_msg1 = "`shifts` required"
s1 = SampleInput(make_arg((S,)), ())
yield ErrorInput(s1, error_regex=err_msg1)
err_msg2 = ("shifts and dimensions must align")
s2 = SampleInput(make_arg((S, S)), (2, 1), 0)
yield ErrorInput(s2, error_regex=err_msg2)
err_msg3 = ("out of range")
s3 = SampleInput(make_arg((S, )), 0, 2)
yield ErrorInput(s3, error_regex=err_msg3, error_type=IndexError)
err_msg4 = ("Dimension specified as 0")
s4 = SampleInput(make_arg(()), 0, 0)
yield ErrorInput(s4, error_regex=err_msg4, error_type=IndexError)
def sample_inputs_rot90(op_info, device, dtype, requires_grad=False, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
args = itertools.product(range(-5, 6), [(0, 1), (1, 2), (1, -1)])
yield SampleInput(make_arg((S, S, S)))
for arg in args:
yield SampleInput(make_arg((S, S, S)), args=arg)
def error_inputs_rot90(op_info, device, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=torch.float32)
err_msg1 = "expected total rotation dims"
s1 = SampleInput(make_arg((S, S)), dims=(0,))
yield ErrorInput(s1, error_regex=err_msg1)
err_msg2 = "expected total dims >= 2"
s2 = SampleInput(make_arg((S,)))
yield ErrorInput(s2, error_regex=err_msg2)
err_msg3 = "expected rotation dims to be different"
s3 = SampleInput(make_arg((S, S)), dims=(1, 1))
yield ErrorInput(s3, error_regex=err_msg3)
def sample_inputs_std_var(op_info, device, dtype, requires_grad, **kwargs):
tensor_nd = partial(make_tensor, (S, S, S), device=device, dtype=dtype,
requires_grad=requires_grad)
tensor_1d = partial(make_tensor, (S,), device=device, dtype=dtype,
requires_grad=requires_grad)
yield SampleInput(tensor_nd())
yield SampleInput(tensor_nd(), dim=1)
yield SampleInput(tensor_nd(), dim=1, unbiased=True, keepdim=True)
yield SampleInput(tensor_1d(), dim=0, unbiased=True, keepdim=True)
yield SampleInput(tensor_1d(), dim=0, unbiased=False, keepdim=False)
yield SampleInput(tensor_nd(), dim=(1,), correction=1.3)
yield SampleInput(tensor_nd(), dim=(1,), correction=S // 2)
yield SampleInput(tensor_nd(), dim=None, correction=0, keepdim=True)
yield SampleInput(tensor_nd(), dim=None, correction=None)
yield SampleInput(tensor_nd(), correction=0, keepdim=True)
yield SampleInput(make_tensor(3, 4, 5, device=device, dtype=dtype, requires_grad=requires_grad), dim=-3)
def sample_inputs_std_var_unbiased(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype,
requires_grad=requires_grad)
# Test var_mean(Tensor self, bool unbiased=True) -> (Tensor, Tensor)
yield SampleInput(make_arg((S, S)), True)
yield SampleInput(make_arg((S,)), False)
def _generate_correlation_inputs(device, dtype, requires_grad, **kwargs):
shapes = [(2,), (1, 2), (3, 2), (2, 3)]
for shape in shapes:
yield make_tensor(shape, dtype=dtype, device=device, requires_grad=requires_grad)
def sample_inputs_corrcoef(op_info, device, dtype, requires_grad, **kwargs):
return (SampleInput(t) for t in _generate_correlation_inputs(device, dtype, requires_grad))
def sample_inputs_copysign(op_info, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_elementwise_binary(op_info, device, dtype, requires_grad, **kwargs)
if dtype.is_floating_point:
yield SampleInput(make_tensor(5, dtype=dtype, device=device, requires_grad=requires_grad), -3.14)
def sample_inputs_cov(op_info, device, dtype, requires_grad, **kwargs):
for t in _generate_correlation_inputs(device, dtype, requires_grad):
yield SampleInput(t)
num_observations = t.numel() if t.ndimension() < 2 else t.size(1)
fweights = make_tensor((num_observations,), dtype=torch.int, device=device, low=1, high=10)
aweights = make_tensor((num_observations,), dtype=torch.float, device=device, low=0, high=1, requires_grad=requires_grad)
for correction, fw, aw in product(range(num_observations), [None, fweights], [None, aweights]):
yield SampleInput(t.clone().requires_grad_(requires_grad),
correction=correction, fweights=fw, aweights=aw)
def error_inputs_cov(op_info, device, **kwargs):
a = torch.rand(S, device=device)
yield ErrorInput(
SampleInput(torch.rand(S, S, S, device=device)),
error_regex="expected input to have two or fewer dimensions")
yield ErrorInput(
SampleInput(a, fweights=torch.rand(S, S, device=device)),
error_regex="expected fweights to have one or fewer dimensions")
yield ErrorInput(
SampleInput(a, aweights=torch.rand(S, S, device=device)),
error_regex="expected aweights to have one or fewer dimensions")
yield ErrorInput(
SampleInput(a, fweights=torch.rand(S, device=device)),
error_regex="expected fweights to have integral dtype")
yield ErrorInput(
SampleInput(a, aweights=torch.tensor([1, 1], device=device)),
error_regex="expected aweights to have floating point dtype")
yield ErrorInput(
SampleInput(a, fweights=torch.tensor([1], device=device)),
error_regex="expected fweights to have the same numel")
yield ErrorInput(
SampleInput(a, aweights=torch.rand(1, device=device)),
error_regex="expected aweights to have the same numel")
yield ErrorInput(
SampleInput(a, fweights=torch.tensor([-1, -2, -3, -4 , -5], device=device)),
error_regex="fweights cannot be negative")
yield ErrorInput(
SampleInput(a, aweights=torch.tensor([-1., -2., -3., -4., -5.], device=device)),
error_regex="aweights cannot be negative")
def sample_inputs_permute(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
cases = [((1, 2, 3, 4), (0, 2, 3, 1)),
((1, 2, 3, 4), (0, -2, -1, 1)),
((), ()),
((1, 2, 3, 4), (2, 1, 3, 0))]
for shape, args in cases:
yield SampleInput(make_arg(shape), args=(args,))
def reference_inputs_permute(op, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_permute(op, device, dtype, requires_grad, **kwargs)
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
cases = (
((), ()),
((1,), (0,)),
((2, 2), (1, 0)),
((2, 2), (0, 1)),
((2, 0, 1), (0, 2, 1)),
((3, 4, 2), (2, 1, 0)),
((3, 4, 2), (1, 0, 2)),
((3, 4, 2), (0, 1, 2)),
)
# Adds tricky permutations and permutations with noncontiguity
for shape, permutation in cases:
for p in itertools.permutations(permutation):
a = make_arg(shape).permute(p)
yield SampleInput(a, args=(permutation,))
a = make_arg(shape, noncontiguous=True).permute(p)
yield SampleInput(a, args=(permutation,))
def error_inputs_softshrink(op, device, **kwargs):
yield ErrorInput(SampleInput(make_tensor((1,), dtype=torch.float, device=device), kwargs={"lambd": -0.5}),
error_regex="lambda must be greater or equal to 0, but found to be -0.5")
def sample_inputs_softshrink(op_info, device, dtype, requires_grad=False, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# The additional sample is to check additional values of lambd beyond the default
# value (what is already checked by sample_inputs_elementwise_unary)
for lbda in (0., 0.5):
yield SampleInput(make_arg(S, S), kwargs={"lambd": lbda})
yield from sample_inputs_elementwise_unary(op_info, device, dtype, requires_grad)
def sample_inputs_hardshrink(op_info, device, dtype, requires_grad=False, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# The additional sample is to check additional values of lambd beyond the default
# value (what is already checked by sample_inputs_elementwise_unary)
# Note that unlike softshrink, lambd is allowed to be negative for hardshrink
for lbda in (-0.5, 0., 0.5):
yield SampleInput(make_arg(S, S), kwargs={"lambd": lbda})
yield from sample_inputs_elementwise_unary(op_info, device, dtype, requires_grad)
def sample_inputs_hardtanh(op_info, device, dtype, requires_grad=False, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# The additional sample is to check additional values of min_val and max_val beyond the default
# value (what is already checked by sample_inputs_elementwise_unary)
for max_val, min_val in ((0.5, -0.5), (0., 0.)):
yield SampleInput(make_arg(S, S), kwargs={"min_val": min_val, "max_val": max_val})
yield from sample_inputs_elementwise_unary(op_info, device, dtype, requires_grad)
def error_inputs_hardtanh(op_info, device, **kwargs):
# Tests that hardtanh errors out when passed min_val > max_val.
yield ErrorInput(SampleInput(make_tensor((1,), dtype=torch.float, device=device), kwargs={"min_val": 0.5, "max_val": -0.5}),
error_type=ValueError, error_regex="min_val cannot be greater than max_val")
def sample_inputs_einsum(op_info, device, dtype, requires_grad=False, **kwargs):
def c(t):
return t.clone().requires_grad_(requires_grad)
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
x = make_arg((3,))
y = make_arg((4,))
A = make_arg((2, 3,))
B = make_arg((1, 3,))
C = make_arg((1, 2, 3,))
D = make_arg((1, 3, 4,))
E = make_arg((4, 4,))
H = make_arg((3, 3,))
I = make_arg((1, 3, 1,))
# Vector operations
yield SampleInput([c(x)], 'i->') # sum
yield SampleInput([c(x), c(y)], 'i,j->ij') # outer
# Matrix operations
yield SampleInput([c(A)], "ij->i") # col sum
yield SampleInput([c(A), c(B)], "ij,kj->ik") # matmul
yield SampleInput([c(A), c(E)], "ij,Ab->ijAb") # matrix outer product
# Tensor operations
yield SampleInput([c(C), c(D)], "aij,ajk->aik") # batch matmul
yield SampleInput([c(D), c(E)], "aij,jk->aik") # tensor matrix contraction
yield SampleInput([c(C), c(B)], "ijk,ik->j") # non contiguous
# Test diagonals
yield SampleInput([c(I)], 'iji->j') # non-contiguous trace
# Test ellipsis
yield SampleInput([c(H)], "i...->...")
yield SampleInput([c(C), c(x)], '...ik, ...j -> ij')
def sample_inputs_flip(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
sizes = ((S, M, S), (S, 0, M))
all_dims = ((0, 1, 2), (0,), (0, 2), (-1,), ())
for size, dims in product(sizes, all_dims):
yield SampleInput(make_arg(size), kwargs={"dims": dims})
def sample_inputs_fliplr_flipud(op_info, device, dtype, requires_grad, **kwargs):
shapes = [
(S, M, S),
(S, 0, M),
]
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
return (SampleInput(make_arg(shape, low=None, high=None)) for shape in shapes)
def error_inputs_fliplr(op, device, **kwargs):
yield ErrorInput(SampleInput(make_tensor((1,), dtype=torch.float, device=device)),
error_regex="Input must be >= 2-d.")
def error_inputs_flipud(op, device, **kwargs):
yield ErrorInput(SampleInput(make_tensor((), dtype=torch.float, device=device)),
error_regex="Input must be >= 1-d.")
def sample_inputs_clamp(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, low=None, high=None, requires_grad=requires_grad)
make_integral_arg = partial(make_tensor, dtype=torch.int32, device=device, low=None, high=None, requires_grad=False)
shape = (S, M, S)
yield SampleInput(make_arg(shape), args=(make_arg(shape), make_arg(shape)))
yield SampleInput(make_arg(shape), args=(make_arg(shape[1:]), make_arg(shape[1:])))
yield SampleInput(make_arg(shape), args=(make_arg((S, 1, S)),))
yield SampleInput(make_arg(shape), args=(None, make_arg(shape)))
yield SampleInput(make_arg(shape), args=(make_arg(shape), None))
# test type promotion
yield SampleInput(make_arg(shape), args=(make_integral_arg(shape), None))
yield SampleInput(make_arg(shape), args=(make_arg(shape), make_integral_arg(shape)))
def reference_inputs_elementwise_ternary(op, device, dtype, requires_grad, *, sample_inputs_func, supports_scalars=False, **kwargs):
yield from sample_inputs_func(op, device, dtype, requires_grad, **kwargs)
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
make_scalar_tensor = partial(make_tensor, (), device='cpu', dtype=dtype, requires_grad=requires_grad)
supported_dtypes = op.supported_dtypes(device)
# broadcasting and oncontiguous cases
cases = (
((4, 4), (4, 4), (4, 4)),
((4, 4), (1, 4, 4), (4, 4)),
((4, 4), (1, 4, 4), (4, 1, 4)),
((4, 4, 1), (1, 4, 4), (4, 4)),
((4, 1), (1, 4, 4), (1, 4)),
((4, 4), (), (4, 4)),
((4, 4), (), ()),
((), (4, 4), (1, 4, 4)),
)
for a, b, c in cases:
yield SampleInput(make_arg(a), args=(make_arg(b), make_arg(c)))
yield SampleInput(make_arg(a, noncontiguous=True),
args=(make_arg(b).transpose(0, -1), make_arg(c, noncontiguous=True).transpose(0, -1)))
# scalar cases
if supports_scalars:
cases = [
((), 1, 2,),
((), 1., 2),
((4, 4), 1., 2,),
((3, 4), make_scalar_tensor(), make_scalar_tensor()),
]
if torch.complex64 in supported_dtypes:
cases.extend([
((3, 1, 4), complex(1, 2), 3.),
])
for a, b, c in cases:
yield SampleInput(make_arg(a), args=(b, c))
# type promotion cases
# int x float
if torch.float in supported_dtypes and torch.long in supported_dtypes:
a = make_arg((), dtype=torch.long)
b = make_arg((1, 4), dtype=torch.float)
c = make_arg((3, 4))
cases = (
(a, b, c),
(c, a, b),
)
for a, b, c in cases:
yield SampleInput(a, args=(b, c))
# NaN propagation
if dtype.is_floating_point or dtype.is_complex:
nan = float('nan') if dtype.is_floating_point else complex(float('nan'), float('nan'))
a = make_arg((12,))
a[4] = nan
a[7] = nan
b = make_arg((12,))
b[1] = nan
b[7] = nan
c = make_arg((12,))
c[9] = nan
yield SampleInput(a, args=(b, c))
def _clamp_min_numpy(a, min=None):
return np.maximum(a, min)
def _clamp_max_numpy(a, max=None):
return np.minimum(a, max)
def _clamp_numpy(a, min=None, max=None):
if min is None:
return np.minimum(a, max)
if max is None:
return np.maximum(a, min)
return np.minimum(max, np.maximum(a, min))
def sample_inputs_cumprod(op_info, device, dtype, requires_grad, **kwargs):
def make_arg(shape):
# shrink values to be in the interval [-1, +1] for better precision in gradgradcheck
return make_tensor(shape, dtype=dtype, device=device, low=-1, high=+1, requires_grad=requires_grad)
def prod_zeros(dim_select):
assert len(dim_select) == 2
result = make_arg(3 * (S,))
result.narrow(dim_select[0], 0, 1).narrow(dim_select[1], 1, 1).zero_()
result.narrow(dim_select[0], 2, 1).narrow(dim_select[1], 3, 1).zero_()
result.narrow(dim_select[0], 4, 1).narrow(dim_select[1], 3, 1).zero_()
return result
for dim in range(3):
yield SampleInput(make_arg((S, S, S)), args=(dim,))
# Scalar tensors and empty tensor
for size in [(), (1,), (0,)]:
yield SampleInput(make_arg(size), args=(0,))
yield SampleInput(prod_zeros([0, 1]), args=(1,))
yield SampleInput(prod_zeros([0, 2]), args=(1,))
yield SampleInput(prod_zeros([1, 2]), args=(1,))
# test dtype kwarg
yield SampleInput(prod_zeros([1, 2]), args=(1,), kwargs={'dtype': dtype})
def sample_inputs_view_as_complex(op_info, device, dtype, requires_grad, **kwargs):
yield SampleInput(make_tensor((S, 2), dtype=dtype, device=device, requires_grad=requires_grad))
def sample_inputs_view_as_real(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
sizes = ((S, S), ())
return (SampleInput(make_arg(size)) for size in sizes)
def error_inputs_complex(op_info, device, is_ref=False, **kwargs):
make_arg = partial(make_tensor, dtype=torch.float32, device=device)
if is_ref:
error_float = "Expected both inputs to be Half, Float or Double tensors but got torch.float32 and torch.int32"
error_dtype = "Expected object of scalar type torch.float32 but got scalar type torch.float64 for second argument"
error_out = "Expected out tensor to have dtype torch.complex128 but got torch.complex64 instead"
else:
error_float = "Expected both inputs to be Half, Float or Double tensors but got Float and Int"
error_dtype = "Expected object of scalar type Float but got scalar type Double for second argument"
error_out = "Expected object of scalar type ComplexDouble but got scalar type ComplexFloat for argument 'out'"
yield ErrorInput(SampleInput(make_arg(M, S), make_arg(M, S, dtype=torch.int)),
error_type=RuntimeError, error_regex=error_float)
yield ErrorInput(SampleInput(make_arg(M, S), make_arg(M, S, dtype=torch.float64)),
error_type=RuntimeError, error_regex=error_dtype)
yield ErrorInput(SampleInput(make_arg(M, S, dtype=torch.float64), make_arg(M, S, dtype=torch.float64),
out=make_arg(M, S, dtype=torch.complex64)),
error_type=RuntimeError, error_regex=error_out)
def sample_inputs_logaddexp(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
shape = (S, S)
yield SampleInput(make_arg(shape), make_arg(shape))
def sample_inputs_prod(op_info, device, dtype, requires_grad, **kwargs):
def make_arg(shape):
# shrink values to be in the interval [-1, +1] for better precision in gradgradcheck
return make_tensor(shape, dtype=dtype, device=device, low=-1, high=+1, requires_grad=requires_grad)
def prod_single_zero():
result = make_arg(2 * (S,))
result[0, 1] = 0
return result
for sample in sample_inputs_cumprod(op_info, device, dtype, requires_grad):
# only Tensor, ignore other inputs
yield SampleInput(sample.input.clone().requires_grad_(requires_grad))
yield sample
# Generates samples with keepdim = True
for sample in sample_inputs_cumprod(op_info, device, dtype, requires_grad):
sample.kwargs['keepdim'] = True
yield sample
yield SampleInput(prod_single_zero())
yield SampleInput(make_arg((3, 3, 3)), args=(1,))
yield SampleInput(make_arg((3, 3, 3)), args=(1,), kwargs={'keepdim': True})
yield SampleInput(make_arg((3, 0)), args=(1,))
yield SampleInput(make_arg((3, 0)), args=(1,), kwargs={'keepdim': True})
yield SampleInput(torch.tensor([2., 3, 0, 0], dtype=dtype, device=device, requires_grad=requires_grad))
# test zero scalar tensor
zero = make_arg(())
zero.zero_()
yield SampleInput(zero.clone().requires_grad_(requires_grad))
yield SampleInput(zero.clone().requires_grad_(requires_grad), args=(0,))
yield SampleInput(zero.clone().requires_grad_(requires_grad),
args=(0,),
kwargs={'keepdim': True})
def error_inputs_neg(op_info, device, **kwargs):
si = SampleInput(torch.tensor((False, True), device=device))
msg = ("Negation, the `\\-` operator, on a bool tensor is not supported."
" If you are trying to invert a mask, use the `\\~` or"
" `logical_not\\(\\)` operator instead.")
yield ErrorInput(si, error_regex=msg)
def sample_inputs_diag(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad, low=None, high=None)
yield SampleInput(make_arg(M))
tensors = (
make_arg((M, M)),
make_arg((3, 5)),
make_arg((5, 3)),
)
args = ((), (2,), (-2,), (1,), (2,))
for tensor, arg in product(tensors, args):
yield SampleInput(tensor.clone().requires_grad_(requires_grad), *arg)
def reference_inputs_diagonal_diag_embed(op_info, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_diagonal_diag_embed(
op_info, device, dtype, requires_grad, **kwargs)
make_arg = partial(
make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
shapes1d = ((0,), (1,))
shapes2d = ((L, M),)
shapes3d = ((L, M, S),)
kwargs1d = {}
kwargs2d = (
# dim1 > dim2 is allowed
dict(dim1=1, dim2=0),
# negative dims are allowed
dict(dim1=-2, dim2=-1),
# one dim negative and the other nonnegative is allowed
dict(dim1=-1, dim2=0),
# out of bounds offset should return an empty tensor in diagonal and
# offset the diagonal in diag_embed
dict(offset=100),
)
kwargs3d = kwargs2d + (
# make sure we can use non-sequential dims
dict(offset=-1, dim1=0, dim2=2),
)
samples1d = product(shapes1d, kwargs1d)
samples2d = product(shapes2d, kwargs2d)
samples3d = product(shapes3d, kwargs3d)
for shape, kwargs in chain(samples1d, samples2d, samples3d):
if 'diagonal' in op_info.name:
# these are error inputs for diagonal
if shape in ((0,), (1,)):
continue
yield SampleInput(input=make_arg(shape), kwargs=kwargs)
def sample_inputs_diagonal_scatter(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
# Shapes for 2D Tensors
shapes_2d = ((M, M), (3, 5), (5, 3))
# Shapes for 3D Tensors
shapes_3d = ((M, M, M),)
args_2d = ((), (2,), (-2,), (1,))
args_3d = ((1, 1, 2), (2, 0, 1), (-2, 0, 1))
for input_shape, arg in chain(product(shapes_2d, args_2d), product(shapes_3d, args_3d)):
input_ = make_arg(input_shape)
# We can programmatically figure out the right shape for src:
# It should be the same size as input.diagonal(other_args...)
if not isinstance(arg, tuple):
arg_tuple = (arg,)
else:
arg_tuple = arg
src_shape = input_.diagonal(*arg_tuple).size()
src = make_arg(src_shape)
yield SampleInput(input_, args=(src, *arg_tuple))
def sample_inputs_to_sparse(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
yield SampleInput(make_arg((S, S))).with_metadata(output_process_fn_grad=lambda x: x.to_dense())
yield SampleInput(make_arg((S, S)), 1).with_metadata(output_process_fn_grad=lambda x: x.to_dense())
def sample_inputs_cross_entropy(op_info, device, dtype, requires_grad, **kwargs):
batch_size, num_classes = shape = (2, 3)
reductions = ("mean", "sum", "none")
input_shape_and_kwargs: list[tuple[tuple[int, ...], dict[str, Any]]] = [
(shape, {}),
((*shape, 1), {}),
((*shape, 1, 2), {}),
((*shape, 1, 2, 3), {}),
*[(shape, dict(reduction=reduction)) for reduction in reductions],
*[
(
shape,
dict(
weight=make_tensor((num_classes,), device=device, dtype=dtype),
reduction=reduction,
),
)
for reduction in reductions
],
(shape, dict(ignore_index=1)),
]
for (input_shape, kwargs), probabilities_target in itertools.product(input_shape_and_kwargs, (False, True)):
input = make_tensor(input_shape, device=device, dtype=dtype, requires_grad=requires_grad)
if probabilities_target:
# ignore_index is not supported for probabilities target
if "ignore_index" in kwargs:
continue
target = make_tensor(
input_shape,
low=0,
high=1,
device=device,
dtype=dtype,
requires_grad=requires_grad,
)
else:
target = make_tensor(
(batch_size, *input_shape[2:]),
low=0,
high=num_classes,
device=device,
dtype=torch.long,
)
if "ignore_index" in kwargs and torch.all(target == kwargs["ignore_index"]):
# make sure at least one item in target is not ignored
target[0] = random.sample(sorted(set(range(num_classes)) - {kwargs["ignore_index"]}), 1)[0]
yield SampleInput(input, target, **kwargs)
def sample_inputs_logit(op_info, device, dtype, requires_grad, **kwargs):
low, high = op_info.domain
# Note: Operator is very sensitive at points near the
# start and end of domain and leads to NaN for float16
# if domain_eps is 1e-5.
if dtype.is_floating_point or dtype.is_complex:
domain_eps = op_info._domain_eps if dtype != torch.float16 else 3e-2
low = low + domain_eps
high = high - domain_eps
make_arg = partial(make_tensor, dtype=dtype, device=device, low=low, high=high, requires_grad=requires_grad)
yield SampleInput(make_arg((S, S, S)))
yield SampleInput(make_arg((S, S, S)), 0.2)
yield SampleInput(make_arg(()))
yield SampleInput(make_arg(()), 0.2)
def sample_inputs_isin(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# isin has two paths based on the size of elements and test_elements.
# if elements.numel() < 10 * pow(test_elements.numel(), 0.145):
yield SampleInput(make_arg((L,)), args=(make_arg((S,)),))
# else:
yield SampleInput(make_arg((S,)), args=(make_arg((L,)),))
def sample_inputs_masked_scatter(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
yield SampleInput(make_arg((S, S)), args=(torch.randn(S, S, device=device) > 0, make_arg((S, S))))
yield SampleInput(make_arg((S, S)), args=(torch.randn((S,), device=device) > 0, make_arg((S, S))))
yield SampleInput(make_arg((S, S)), args=(bernoulli_scalar().to(device), make_arg((S, S))))
yield SampleInput(make_arg((S,)),
args=(torch.randn(S, S, device=device) > 0, make_arg((S, S))),
broadcasts_input=True)
def error_inputs_masked_scatter(op_info, device, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=torch.float)
for mask_dtype in [torch.float, torch.uint8]:
yield ErrorInput(SampleInput(make_arg(1, 3), args=(torch.ones(1, 3, device=device, dtype=mask_dtype),
make_arg(3, 4))),
error_regex=r"masked_scatter_ only supports boolean masks")
def sample_inputs_masked_fill(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
yield SampleInput(make_arg((S, S)), args=(torch.randn(S, S, device=device) > 0, 10))
yield SampleInput(make_arg((S, S)), args=(torch.randn(S, S, device=device) > 0, make_arg(())))
yield SampleInput(make_arg((S, S)), args=(torch.randn(S, device=device) > 0, 10))
yield SampleInput(make_arg(()), args=(torch.randn((), device=device) > 0, 10))
yield SampleInput(make_arg(()), args=(torch.randn((), device=device) > 0, make_arg(())))
yield SampleInput(make_arg((S, S)), args=(torch.randn((), device=device) > 0, 10))
yield SampleInput(make_arg((S,)),
args=(torch.randn(S, S, device=device) > 0, make_arg(())),
broadcasts_input=True)
yield SampleInput(make_arg((S,)),
args=(torch.randn(S, S, device=device) > 0, 10),
broadcasts_input=True)
if torch.device(device).type == 'cuda':
# `self` and `mask` on CUDA but `value` is a CPU scalar tensor.
yield SampleInput(make_arg((S, S)),
args=(torch.randn(S, S, device=device) > 0,
make_tensor((), device="cpu", dtype=dtype)))
def error_inputs_masked_fill(op_info, device, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=torch.float, requires_grad=False)
# `value` is not a 0-D tensor.
yield ErrorInput(SampleInput(make_arg((2, 2)), args=(make_arg(()) > 0, make_arg((1,)))),
error_regex="only supports a 0-dimensional value tensor, but got tensor with 1 dimension")
# downcasting complex value (scalar overload)
yield ErrorInput(SampleInput(make_arg((2, 2)), args=(make_arg(()) > 0, 1j)),
error_regex=r"value cannot be converted to type .* without overflow")
# downcasting complex value (tensor overload)
yield ErrorInput(SampleInput(torch.ones(2, dtype=torch.long, device=device),
args=(make_arg(()) > 0, torch.tensor(1j, device=device))),
error_regex=r"value cannot be converted to type .* without overflow")
if torch.device(device).type == 'cuda':
# `self` and `mask` on CPU but `value` is a CUDA scalar tensor.
yield ErrorInput(SampleInput(torch.randn((S, S), device='cpu'),
args=(torch.randn(S, S, device='cpu') > 0,
torch.randn((), device='cuda'))),
error_regex=r"to be on same device")
def sample_inputs_masked_select(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(
make_tensor, device=device, dtype=dtype, requires_grad=requires_grad, low=None, high=None)
yield SampleInput(make_arg((M, M)), torch.randn(M, M, device=device) > 0)
yield SampleInput(make_arg((M, M)), torch.randn((M,), device=device) > 0)
yield SampleInput(make_arg((M,)), torch.randn((M, M), device=device) > 0)
yield SampleInput(make_arg((M, 1, M)), torch.randn((M, M), device=device) > 0)
yield SampleInput(make_arg(()), torch.tensor(1, device=device, dtype=torch.bool))
yield SampleInput(make_arg((M, M)), torch.tensor(1, device=device, dtype=torch.bool))
yield SampleInput(make_arg(()), torch.randn((M, M), device=device) > 0)
def sample_inputs_matrix_exp(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
yield SampleInput(make_arg((S, S)))
yield SampleInput(make_arg((S, S, S)))
def sample_inputs_matmul(op_info, device, dtype, requires_grad, is_rmatmul=False, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, low=None,
high=None, requires_grad=requires_grad)
test_cases = (((L,), (L,)),
((S, M), (M,)),
((M,), (M, S)),
((S, M), (M, S)),
((S, 0), (0, M)),
((S, S, M), (M,)),
((S, S, M), (M, S)),
((S, S, 0), (0, S)),
((M,), (S, M, S)),
((S, M), (S, M, S)),
((0, 0), (S, 0, 0)),
((S, S, M, M), (S, S, M, S)),
((S, S, M, M), (M,)),
((M,), (S, S, M, S)),
((S, S, S), (1, S, S))
)
for lhs_shape, rhs_shape in test_cases:
lhs = make_arg(lhs_shape)
rhs = make_arg(rhs_shape)
if not is_rmatmul:
yield SampleInput(lhs, rhs)
else:
yield SampleInput(rhs, lhs)
def sample_inputs_meshgrid(op_info: OpInfo, device: torch.device, dtype: torch.dtype,
requires_grad: bool,
*, variant: str, **kwargs) -> list[SampleInput]:
if variant == 'variadic':
def make_inputs(
tensors: list[torch.Tensor]) -> tuple[Union[torch.Tensor,
list[torch.Tensor]],
tuple[torch.Tensor, ...]]:
return tensors
elif variant == 'list':
def make_inputs(
tensors: list[torch.Tensor]) -> tuple[Union[torch.Tensor,
list[torch.Tensor]],
tuple[torch.Tensor, ...]]:
return [tensors]
else:
raise ValueError(
'Unsupported variant, must be one of {"variadic", "list"}. '
f'Got "{variant}".')
SCALAR = torch.Size([])
VECTOR = torch.Size([3])
test_cases: list[list[torch.Size]] = [
[SCALAR],
[VECTOR],
[VECTOR, SCALAR],
[VECTOR, SCALAR, VECTOR],
[VECTOR, SCALAR, VECTOR, SCALAR],
]
for shapes, indexing in itertools.product(test_cases, {'xy', 'ij'}):
args = make_inputs(
[make_tensor(shape, dtype=dtype, device=device, requires_grad=requires_grad)
for shape in shapes])
yield SampleInput(*args, indexing=indexing)
def sample_inputs_mvlgamma(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
tensor_shapes = ((S, S), ())
ns = (1, 2, 3, 4, 5)
# Since the accepted lower bound for input
# to mvlgamma depends on `p` argument,
# the following function computes the lower bound
# which we pass to `make_tensor`.
def compute_min_val(p):
return (p - 1.) / 2
for shape, n in product(tensor_shapes, ns):
min_val = compute_min_val(n)
if not dtype.is_floating_point:
# Round-up minimum value for integral dtypes
min_val += 1
else:
min_val += 2 * torch.finfo(dtype).eps
yield SampleInput(make_arg(shape, low=min_val), args=(n,))
# Since `mvlgamma` has multiple entries,
# there are multiple common skips for the additional
# entries. Following function is a helper to that end.
def skips_mvlgamma(skip_redundant=False):
skips = (
# outside domain values are hard error for mvlgamma op.
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_float_domains'),
DecorateInfo(unittest.expectedFailure, 'TestUnaryUfuncs',
'test_reference_numerics_extremal'),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
dtypes=(torch.float16, torch.int8)),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_small',
dtypes=(torch.int8,)),
)
if skip_redundant:
# Redundant tests
skips = skips + ( # type: ignore[assignment]
DecorateInfo(unittest.skip("Skipped!"), 'TestFwdGradients'),
DecorateInfo(unittest.skip("Skipped!"), 'TestBwdGradients'),
DecorateInfo(unittest.skip("Skipped!"), 'TestJit'),
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon'),
)
return skips
# To test reference numerics against multiple values of argument `p`,
# we make multiple OpInfo entries with each entry corresponding to different value of p.
# We run the op tests from test_ops.py only for `p=1` to avoid redundancy in testing.
def make_mvlgamma_opinfo(variant_test_name, domain, skips, sample_kwargs):
return UnaryUfuncInfo('mvlgamma',
ref=reference_mvlgamma if TEST_SCIPY else None,
aliases=('special.multigammaln',),
variant_test_name=variant_test_name,
domain=domain,
decorators=(precisionOverride({torch.float16: 5e-2}),),
dtypes=all_types_and(torch.half, torch.bfloat16),
dtypesIfCUDA=all_types_and(torch.float16, torch.bfloat16),
sample_inputs_func=sample_inputs_mvlgamma,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True,
skips=skips,
sample_kwargs=sample_kwargs)
def sample_inputs_cumulative_ops(op_info, device, dtype, requires_grad, supports_dtype_kwargs=True, **kwargs):
def _make_tensor_helper(shape, low=None, high=None):
return make_tensor(shape, dtype=dtype, device=device, low=low, high=high, requires_grad=requires_grad)
yield SampleInput(_make_tensor_helper((S, S, S)), 0)
yield SampleInput(_make_tensor_helper((S, S, S)), 1)
yield SampleInput(_make_tensor_helper(()), 0)
if supports_dtype_kwargs:
# NOTE: if `dtype` is not same as input, then inplace variants fail with
# `provided dtype must match the dtype of self tensor in cumsum`
yield SampleInput(_make_tensor_helper((S, S, S)), 1, dtype=dtype)
def sample_inputs_unfold(op_info, device, dtype, requires_grad, **kwargs):
test_cases = (
((), (0, 1, 1)),
((S, S, S, S), (0, 3, 1)),
((S, S, S, S), (1, 3, 1)),
((S, S, S, S), (2, 3, 1)),
((S, S, S, S), (3, 3, 1)),
((S, S, S, S), (0, 3, 2)),
((S, S, S, S), (1, 3, 2)),
((S, S, S, S), (2, 3, 2)),
((S, S, S, S), (3, 3, 2)),
((S, S, S, S), (0, 4, 1)),
((S, S, S, S), (1, 4, 1)),
((S, S, S, S), (2, 4, 1)),
((S, S, S, S), (3, 4, 1)),
((M,), (0, 3, 1)),
((M,), (0, 3, 2)),
((M,), (0, 3, 3)),
((1000,), (0, 3, 11)),
((1000,), (0, 2, 27)),
((10, 10), (0, 1, 2)),
((10, 10), (1, 2, 3)),
((10, 10), (1, 2, 2)),
((S, S, S), (2, 3, 2)),
)
for shape, arguments in test_cases:
yield SampleInput(make_tensor(shape, dtype=dtype, device=device,
low=None, high=None,
requires_grad=requires_grad),
*arguments)
def sample_inputs_split(op_info, device, dtype, requires_grad, *, list_args=False, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
if list_args:
cases = (
((S, S, S), (torch.Size([int(S / 3), S - int(S / 3) * 2, int(S / 3)]),)),
((S, S, S), (torch.Size([int(S / 2), S - int(S / 2) * 2, int(S / 2)]), 2),),
((S, S, S), (torch.Size([int(S / 2), S - int(S / 2) * 2, int(S / 2)]), -2),)
)
else:
cases = ( # type: ignore[assignment]
((S, S, S), (2,)),
((S, S, S), (S, 1)),
)
for shape, args in cases:
yield SampleInput(make_arg(shape), args=args)
def sample_inputs_split_with_sizes(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
cases = (((S, S, S), (torch.Size([int(S / 3), S - int(S / 3) * 2, int(S / 3)]),)),
((S, S, S), (torch.Size([int(S / 3), S - int(S / 3), 0]),)),
((S, S, S), (torch.Size([int(S / 3), S - int(S / 3) * 2, int(S / 3)]), 2)),
((S, S, S), (torch.Size([int(S / 3), S - int(S / 3) * 2, int(S / 3)]), -2)),
)
for shape, args in cases:
yield SampleInput(make_arg(shape), args=args)
def sample_inputs_msort(op_info, device, dtype, requires_grad, **kwargs):
def apply_grad(t):
if dtype in floating_types_and(torch.float16, torch.bfloat16):
t.requires_grad_(requires_grad)
def large_1d_unique(dtype, device):
res = torch.randperm(L * L * L, dtype=torch.int64, device=device)
res = res.to(dtype)
apply_grad(res)
return res
# Test case for large tensor.
yield SampleInput(large_1d_unique(dtype, device))
yield SampleInput(make_tensor((S, M, S), dtype=dtype, device=device,
low=None, high=None,
requires_grad=requires_grad))
def sample_inputs_lerp(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
# no broadcast
yield SampleInput(make_arg((S, S)), make_arg((S, S)), 0.4)
# broadcast rhs
yield SampleInput(make_arg((S, S)), make_arg((S,)), 0.4)
# scalar tensor
yield SampleInput(make_arg(()), make_arg(()), 0.4)
# broadcast rhs scalar-tensor
yield SampleInput(make_arg((S, S)), make_arg(()), 0.4)
# broadcast rhs with weight tensor
yield SampleInput(make_arg((S, S)), make_arg((S,)), make_arg((S, S)))
# broadcast rhs and weight tensor
yield SampleInput(make_arg((S, S)), make_arg((S, 1)), make_arg((S,)))
# broadcast lhs
yield SampleInput(make_arg((S,)), make_arg((S, S)), 0.4).with_metadata(broadcasts_input=True)
# scalar broadcast_lhs
yield SampleInput(make_arg(()), make_arg((S, S)), 0.4).with_metadata(broadcasts_input=True)
# broadcast all
yield SampleInput(make_arg((S, 1)), make_arg((S, S)), 0.4).with_metadata(broadcasts_input=True)
# tensor broadcast all
yield SampleInput(make_arg((S, 1)), make_arg((S, S)), make_arg((S, 1))).with_metadata(
broadcasts_input=True)
# no broadcast with weight tensor
yield SampleInput(make_arg((S, S)), make_arg((S, S)), make_arg((S, S)))
# broadcast lhs with weight tensor
yield SampleInput(make_arg((S,)), make_arg((S, S)), make_arg((S, S))).with_metadata(
broadcasts_input=True)
# broadcast lhs and weight tensor
yield SampleInput(make_arg((S,)), make_arg((S, S, S)), make_arg((S, S))).with_metadata(
broadcasts_input=True)
# broadcast lhs and weight tensor variant
yield SampleInput(make_arg((S, S)), make_arg((S, S, S)), make_arg((S,))).with_metadata(
broadcasts_input=True)
if dtype.is_complex:
# no broadcast
yield SampleInput(make_arg((S, S)), make_arg((S, S)), 0.4j)
yield SampleInput(make_arg((S, S)), make_arg((S, S)), 1.2 + 0.1j)
# broadcast rhs
yield SampleInput(make_arg((S, S)), make_arg((S,)), 0.4j)
yield SampleInput(make_arg((S, S)), make_arg((S, S)), 5.4 + 9j)
# scalar tensor
yield SampleInput(make_arg(()), make_arg(()), 0.4j)
yield SampleInput(make_arg(()), make_arg(()), 6.1 + 0.004j)
# broadcast rhs scalar-tensor
yield SampleInput(make_arg((S, S)), make_arg(()), 0.4j)
yield SampleInput(make_arg((S, S)), make_arg(()), 1 + 2j)
def sample_inputs_tensordot(self, device, dtype, requires_grad, **kwargs):
cases = (
((2, 2, 2), (2, 2, 2), (2)),
((2, 2, 1), (2, 1, 2), ([0, 1], [2, 0])),
((1, 1, 1), (2, 1, 2), ([0, 1], [2, 0])),
)
for first_shape, second_shape, dims in cases:
yield SampleInput(make_tensor(first_shape, dtype=dtype, device=device,
requires_grad=requires_grad, low=-1, high=+2),
make_tensor(second_shape, dtype=dtype, device=device,
requires_grad=requires_grad, low=-1, high=+2),
dims=dims)
def sample_inputs_kron(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(
make_tensor, dtype=dtype, device=device, requires_grad=requires_grad, low=None, high=None)
test_cases = (
((S, S), (M, L)),
)
for input_shape, other_shape in test_cases:
input = make_arg(input_shape)
other = make_arg(other_shape)
yield SampleInput(input, other)
def sample_inputs_inner(self, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
yield SampleInput(make_arg(S), make_arg(S))
yield SampleInput(make_arg(), make_arg(S, S))
def sample_inputs_scatter(op_info, device, dtype, requires_grad, **kwargs):
def _tensor(shape, dtype=dtype, low=None, high=None):
return make_tensor(shape, dtype=dtype, device=device, low=low, high=high, requires_grad=requires_grad)
def _gather(shape, index_dim, max_indices):
return gather_variable(shape, index_dim, max_indices, device=device)
zero = torch.tensor(0, dtype=torch.long, device=device)
test_cases = (
(_tensor((M, S)), (0, _gather((S, S), 1, M), _tensor((S, S)))),
(_tensor((M, S)), (0, _gather((S, S), 1, M).to(torch.int32), _tensor((S, S)))),
(_tensor((M, S)), (1, _gather((S, S), 0, S), _tensor((S, S)))),
(_tensor((M, S)), (-1, _gather((S, S), 0, S), _tensor((S, S)))),
(_tensor((M, S)), (0, _gather((M, S // 2), 1, M), _tensor((M, S // 2)))),
(_tensor((M, S)), (1, _gather((M, S // 2), 0, S), _tensor((M, S // 2)))),
(_tensor((M, S)), (-1, _gather((M, S // 2), 0, S), _tensor((M, S // 2)))),
(_tensor(()), (0, zero.detach().clone(), _tensor(()))),
(_tensor(()), (0, zero.detach().clone(), 2.5)),
)
for tensor, args in test_cases:
yield SampleInput(tensor, *args)
if not requires_grad:
yield SampleInput(tensor.detach().clone(), *args, reduce='add')
if dtype.is_floating_point:
yield SampleInput(tensor.detach().clone(), *args, reduce='multiply')
def sample_inputs_scatter_add(op_info, device, dtype, requires_grad, **kwargs):
def _tensor(shape, dtype=dtype, low=None, high=None):
return make_tensor(shape, dtype=dtype, device=device, low=low, high=high, requires_grad=requires_grad)
def _gather(shape, index_dim, max_indices):
return gather_variable(shape, index_dim, max_indices, device=device)
zero = torch.tensor(0, dtype=torch.long, device=device)
yield SampleInput(_tensor((M, S)), 0, _gather((S, S), 1, M), _tensor((S, S)))
yield SampleInput(_tensor((M, S)), 1, _gather((S, S), 0, S), _tensor((S, S)))
yield SampleInput(_tensor((M, S)), -1, _gather((S, S), 0, S), _tensor((S, S)))
yield SampleInput(_tensor((M, S)), 0, _gather((M, S // 2), 1, M), _tensor((M, S // 2)))
yield SampleInput(_tensor((M, S)), 1, _gather((M, S // 2), 0, S), _tensor((M, S // 2)))
yield SampleInput(_tensor((M, S)), -1, _gather((M, S // 2), 0, S), _tensor((M, S // 2)))
yield SampleInput(_tensor(()), 0, zero.detach().clone(), _tensor(()))
def sample_inputs_scatter_reduce(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
gather = partial(gather_variable, device=device)
zero = torch.tensor(0, dtype=torch.long, device=device)
test_cases = (
((M, S), 0, gather((S, S), 1, M), (S, S)),
((M, S), 1, gather((S, S), 0, S), (S, S)),
((M, S), -1, gather((S, S), 0, S), (S, S)),
((M, S), 0, gather((M, S // 2), 1, M), (M, S // 2)),
((M, S), 1, gather((M, S // 2), 0, S), (M, S // 2)),
((M, S), -1, gather((M, S // 2), 0, S), (M, S // 2)),
((), 0, zero.detach().clone(), ()),
)
reduce = op_info.variant_test_name
for (inp_shape, dim, index, src_shape), include_self in product(test_cases, [False, True, False]):
yield SampleInput(make_arg(inp_shape),
args=(dim, index, make_arg(src_shape), reduce),
kwargs={'include_self': include_self})
# Sample inputs to test edge cases for backward
# Check that gradients are propagated correctly for prod when zeros in self/src are reduced
if requires_grad and reduce == 'prod':
# This sample tests gradients for the following cases
# (a) 1 zero reduced (from src (self[0, 1], self[1, 1]), from self (self[0, 0], self[2, 0]))
# (b) 2 zeros reduced (1 from src and 1 from self (self[1, 0])
# (c) no zeros reduced (self([2, 1]))
# (d) 2 zeros reduced (both from src) is tested in test/test_autograd.py
# test_scatter_index_reduce_prod_gradgrad_error as this case is not supported for gradgrad
input = torch.tensor([[0, 13], [0, 17], [0, 19]], dtype=dtype, device=device, requires_grad=requires_grad)
src = torch.tensor([[0, 1, 2, 3], [0, 4, 0, 1], [2, 3, 5, 6]], dtype=dtype, device=device, requires_grad=requires_grad)
idx = torch.tensor([[1, 1, 0, 0], [0, 0, 1, 1], [0, 0, 0, 1]], dtype=torch.long, device=device)
yield SampleInput(input,
args=(1, idx, src, reduce),
kwargs={'include_self': True})
def sample_inputs_segment_reduce(op_info, device, dtype, requires_grad, *, mode='lengths', **kwargs):
def _tensor(shape, dtype=dtype, low=None, high=None):
return make_tensor(shape, dtype=dtype, device=device, low=low, high=high, requires_grad=requires_grad)
test_cases = (
# inp_shape, dim, lengths, unsafe
((S,), 0, [0, 1, 2, 2], False),
((S,), 0, [0, 1, 2, 2], True),
((S,), 0, [2, 0, 3, 0], False),
((S, S), 0, [0, 1, 2, 2], False),
# test when lengths do not sum to dim size
((M, S, S), 0, [1, 2, 0, 6, 0], True),
# test for higher dimensions
((S, S), 1, [[0, 1, 2, 2] for _ in range(S)], False),
((S, S), 1, [[2, 0, 3, 0], [0, 1, 2, 2], [3, 0, 2, 0], [1, 1, 1, 2], [0, 1, 2, 2]], False),
((S, S, S), 1, [[0, 1, 2, 2] for _ in range(S)], False),
((S, S, S), 1, [[2, 0, 3, 0], [0, 1, 2, 2], [3, 0, 2, 0], [1, 1, 1, 2], [0, 1, 2, 2]], False),
)
reductions = ["max", "mean", "min", "sum", "prod"]
for args, reduce, initial in product(test_cases, reductions, [1, 2]):
inp_shape, dim, lengths, unsafe = args
lengths_t = torch.tensor(lengths, dtype=torch.long, device=device)
sample_input_kwargs = {'axis': dim, 'unsafe': unsafe, 'initial': initial}
if mode == 'lengths':
sample_input_kwargs['lengths'] = lengths_t
elif mode == 'offsets':
zeros_shape = list(lengths_t.shape)
zeros_shape[dim] = 1
offsets_t = torch.cat((lengths_t.new_zeros(zeros_shape), lengths_t), dim).cumsum_(dim)
sample_input_kwargs['offsets'] = offsets_t
else:
raise RuntimeError(f"mode most be one of 'offsets' or 'lengths' got '{mode}'.")
yield SampleInput(_tensor(inp_shape),
args=(reduce,),
kwargs=sample_input_kwargs)
def sample_inputs_ravel(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device,
low=None, high=None, requires_grad=requires_grad)
yield SampleInput(make_arg((S, S, S)))
yield SampleInput(make_arg(()))
yield SampleInput(make_arg((S, S, S), noncontiguous=True))
def sample_inputs_unravel_index(op_info, device, dtype, requires_grad, **kwargs):
yield SampleInput(
torch.tensor(
[[3, 8, 13], [0, 5, 10]],
device=device,
dtype=dtype),
(4, 5))
yield SampleInput(
torch.tensor([[3, 8, 13], [0, 5, 10]], device=device, dtype=dtype),
(4, 2**30))
yield SampleInput(
torch.tensor([[3, 8, 13], [0, 5, 10]], device=device, dtype=dtype),
(2**30, 4))
yield SampleInput(
torch.tensor(2, device=device, dtype=dtype),
(2, 2))
max_val = 2**(8 * dtype.itemsize - (1 if dtype.is_signed else 0)) - 1
yield SampleInput(
torch.tensor(max_val - 1, device=device, dtype=dtype),
(1, max_val))
yield SampleInput(
torch.tensor([22, 41, 37], device=device, dtype=dtype),
(7, 6))
yield SampleInput(
torch.tensor(min(1621, max_val), device=device, dtype=dtype),
(6, 7, 8, 9))
yield SampleInput(
torch.tensor([], device=device, dtype=dtype),
(10, 3, 5))
yield SampleInput(
torch.tensor(
[[1, 0, 1, 2, 3, 4], [1, 6, 1, 3, 2, 0]],
device=device,
dtype=dtype),
(5, 8))
yield SampleInput(
torch.tensor(
[[1, 0, 1, 2, 3, 4], [1, 6, 1, 3, 2, 0], [1, 3, 1, 0, 9, 5]],
device=device,
dtype=dtype),
(5, 8, 10))
yield SampleInput(
torch.tensor(0, device=device, dtype=dtype),
())
a = np.array([[2, 4, 5, 6], [7, 8, 1, 15]])
b = np.array([[3, 2, 7, 6], [10, 12, 8, 9]])
_, i1, i2 = np.intersect1d(a, b, assume_unique=True, return_indices=True)
yield SampleInput(torch.tensor(i1, device=device, dtype=dtype), a.shape)
yield SampleInput(torch.tensor(i2, device=device, dtype=dtype), b.shape)
a = np.array([[2, 4, 5, 6, 6], [4, 7, 8, 7, 2]])
b = np.array([[3, 2, 7, 7], [10, 12, 8, 7]])
_, i1, i2 = np.intersect1d(a, b, return_indices=True)
yield SampleInput(torch.tensor(i1, device=device, dtype=dtype), a.shape)
yield SampleInput(torch.tensor(i2, device=device, dtype=dtype), b.shape)
def sample_inputs_tril_triu(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
cases = (((M, M), ()),
((M, M), (2,),),
((M, S), ()),
((M, S), (-1,)),
((M, M), (2,),),
((S, M, S), ()),
((S, M, S), (2,)),
((3, 3, S, S), ()),)
for shape, args in cases:
yield SampleInput(make_arg(shape), args=args)
def error_inputs_tril_triu(opinfo, device, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=torch.float32)
# error inputs for input.ndim <= 2
yield ErrorInput(SampleInput(make_arg((4,))), error_regex="input tensor must have at least 2 dimensions")
def sample_inputs_trilu_indices(op_info, device, dtype, requires_grad, **kwargs):
# (row, col, offset)
args_list = ((0, 0),
(20, 0),
(0, 20),
(20, 21, 0),
(20, 21, 7),
(20, 21, -7),
# Large test cases below are deliberately commented out to speed up CI
# tests and to avoid OOM error. When modifying implementations of
# tril_indices and triu_indices, please enable these tests and make sure
# they pass.
# (2, 68435455, 3),
# (5000, 5000),
# (5000, 5000, 1234),
# (5000, 5000, -1233),
)
for args in args_list:
yield SampleInput(args[0], args=args[1:], kwargs={"dtype": dtype, "device": device})
def sample_inputs_clone_contiguous(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
yield SampleInput(make_arg((S, M, S)))
yield SampleInput(make_arg(()))
def reference_inputs_clone_contiguous(op, device, dtype, requires_grad, **kwargs):
# NOTE: the default memory format for clone is torch.preserve_format, for contiguous it's torch.contiguous_format
# This exploits that default to test torch.preserve_format for clone, without causing an error when testing contiguous
yield from sample_inputs_clone_contiguous(op, device, dtype, requires_grad, **kwargs)
shapes = (
(3, 5, 6),
(1, 1, 3, 5, 6),
(1, 1, 3, 5, 6, 1, 1),
(1, 0, 3, 5, 0, 2),
(1, 0, 3, 5, 0, 0, 1, 1, 2),
(),
)
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
for shape in shapes:
yield SampleInput(make_arg(shape))
yield SampleInput(make_arg(shape).transpose(0, -1))
yield SampleInput(make_arg(shape, noncontiguous=True))
yield SampleInput(make_arg(shape, noncontiguous=True).transpose(0, -1))
yield SampleInput(make_arg(shape), kwargs={'memory_format': torch.contiguous_format})
yield SampleInput(make_arg(shape).transpose(0, -1), kwargs={'memory_format': torch.contiguous_format})
yield SampleInput(make_arg(shape, noncontiguous=True), kwargs={'memory_format': torch.contiguous_format})
yield SampleInput(make_arg(shape, noncontiguous=True).transpose(0, -1), kwargs={'memory_format': torch.contiguous_format})
# shape, strides, offset
strided_cases = (
((5, 6, 2), (1, 1, 7), 2),
((5, 5, 4), (1, 1, 7), 2),
((5, 5, 2), (4, 5, 7), 3),
((5, 5, 2), (5, 5, 7), 3),
((5, 5, 2), (5, 5, 5), 3),
((9, 5, 2), (0, 1, 7), 3),
)
for shape, strides, offset in strided_cases:
yield SampleInput(make_arg(500,).as_strided(shape, strides, offset))
yield SampleInput(make_arg(500,).as_strided(shape, strides, offset), kwargs={'memory_format': torch.contiguous_format})
# channels last 2D
yield SampleInput(make_arg((2, 2, 2, 2)), kwargs={'memory_format': torch.channels_last})
a = make_arg((2, 2, 2, 2)).permute(0, 3, 1, 2)
yield SampleInput(a, kwargs={'memory_format': torch.channels_last})
# channels last 3D
yield SampleInput(make_arg((2, 2, 2, 2, 2)), kwargs={'memory_format': torch.channels_last_3d})
a = make_arg((2, 2, 2, 2, 2)).permute(0, 4, 1, 2, 3)
yield SampleInput(a, kwargs={'memory_format': torch.channels_last_3d})
def sample_inputs_sum_to_size(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
# list of tuples (shape, shape) defining the shapes of the input and output tensors
sample_shapes = [
((), ()),
((S,), (1,)),
((S, S), (1, 1)),
((S, S), (1, S)),
((S, S), (S, S)),
((S, S, S), (S, 1, S)),
]
for input_shape, output_shape in sample_shapes:
yield SampleInput(make_arg(input_shape), args=(output_shape,))
if output_shape == ():
continue
yield SampleInput(make_arg(input_shape), args=(list(output_shape),))
yield SampleInput(make_arg(input_shape), args=(*output_shape,))
def error_inputs_sum_to_size(op_info, device, **kwargs):
shape = (M, S, M)
err_msg = "is not expandable to size"
si = SampleInput(make_tensor(shape, device=device, dtype=torch.float32), args=(M, M))
yield ErrorInput(si, error_regex=err_msg)
shape = (M + 1, S, S, M)
err_msg = "is not expandable to size"
si = SampleInput(make_tensor(shape, device=device, dtype=torch.float32), args=(M + 1, 1))
yield ErrorInput(si, error_regex=err_msg)
def sample_inputs_resize_ops(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device)
cases = (((S, S, S), (S * S, S)),
((), ()),
((), (1, 1, 1)),
)
for shape, args_or_shape in cases:
# Update `args` based on operator
if op_info.name == 'resize_':
# resize_ takes shape/tuple of ints,
args = (args_or_shape, )
elif op_info.name == 'resize_as_':
# resize_as_ takes another tensor
args = (make_arg(shape, requires_grad=False), ) # type:ignore[assignment]
else:
raise ValueError("sample_inputs_resize_ops is being used with incorrect operator")
yield SampleInput(make_arg(shape, requires_grad=requires_grad), args=args)
def sample_inputs_view_reshape(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
cases = (
# a, b, is_tensor_supported
((S, S, S), (S * S, S), True),
((S * S, S), (S, S, S), True),
((S * S, S), (S, -1, S), False), # neg index
((S * S * 2, S), (S, -1), False), # neg index
((S,), (S,), True),
((), (), False), # empty
((), (1,), True),
)
for a, b, is_tensor_supported in cases:
# skip unsupported cases
if kwargs.get("tensor_arg") and not is_tensor_supported:
continue
# convert to tensor
if kwargs.get("tensor_arg"):
b = make_arg(b, requires_grad=False)
yield SampleInput(make_arg(a), args=(b,))
def reference_inputs_view_reshape(op, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_view_reshape(op, device, dtype, requires_grad, **kwargs)
cases = (
# a, b, is_tensor_supported
((125,), (25, 5), True),
((25, 25), (1, 5, 5, 1, 5, 1, 5, 1), True),
((16, 32), (2, 4, 1, 4, 4, 1, 4), True),
((16, 12), (12, 16), True),
((1, 16, 12), (12, 16), True),
((1, 5, 1, 5), (25, 1), True),
((2, 4, 2), (4, 4), True),
((1, 4), (1, 1, 2, 1, 2), True),
((3, 5, 7), (7, 5, 3), True),
((1,), (), False), # empty
((5, 0, 2, 3), (5, 0, 2, 3), True),
((2, 1, 0, 3, 1), (5, 0), True),
((1,), (), False), # empty
((4, 5, 6), (4, 5, 6, 1, 1, 1), True),
((), (1, 1, 1, 1), False), # empty
)
irreversible_cases = (
((), (-1,), False), # neg index, empty
((4, 7, 9, 1, 1), (1, 4, 3, -1, 1), False), # neg index
)
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
for a, b, is_tensor_supported in cases:
# skip unsupported cases
if kwargs.get("tensor_arg") and not is_tensor_supported:
continue
if kwargs.get("tensor_arg"):
# convert to tensor
yield SampleInput(make_arg(a), args=(make_arg(b, requires_grad=False),))
yield SampleInput(make_arg(b), args=(make_arg(a, requires_grad=False),))
else:
yield SampleInput(make_arg(a), args=(b,))
yield SampleInput(make_arg(b), args=(a,))
for a, b, is_tensor_supported in irreversible_cases:
# skip unsupported cases
if kwargs.get("tensor_arg") and not is_tensor_supported:
continue
# convert to tensor
if kwargs.get("tensor_arg"):
b = make_arg(b, requires_grad=False)
yield SampleInput(make_arg(a), args=(b,))
def error_inputs_view_reshape(op, device, **kwargs):
cases = (
# a, b, is_tensor_supported
# Reshape to different numel
((2,), (), False), # empty
((1, 3, 0), (), False), # empty
((4, 3), (4, 2), True),
((1, 3, 5), (5, 2, 2), True),
# No valid inference
((1, 3, 5), (5, -1, 2), False), # neg index
# Two inferred shapes
((1, 3, 5), (5, -1, -1), False), # neg index
((1), (0, -1), False), # neg index
((0, 5), (0, -1), False), # neg index
)
make_arg = partial(make_tensor, dtype=torch.float32, device=device, requires_grad=False)
for a, b, is_tensor_supported in cases:
# skip unsupported cases
if kwargs.get("tensor_arg") and not is_tensor_supported:
continue
if b == (5, -1, -1):
error_regex = "only one dimension can be inferred"
elif a == (0, 5):
error_regex = (r"cannot reshape tensor of 0 elements into shape "
r"\[0, -1\] because the unspecified dimension size "
r"-1 can be any value and is ambiguous")
else:
# to avoid having issues with a regex
shape = ', '.join(map(str, b))
size = a if type(a) is int else functools.reduce(operator.mul, a, 1)
error_regex = rf"shape '\[{shape}\]' is invalid for input of size {size}"
# convert to tensor
if kwargs.get("tensor_arg"):
b = make_arg(b, requires_grad=False)
yield ErrorInput(SampleInput(make_arg(a), args=(b,)), error_type=Exception,
error_regex=error_regex)
def sample_inputs_atleast1d2d3d(op_info, device, dtype, requires_grad, **kwargs):
shapes = ((S, S, S, S), (S, S, S), (S, S), (S, ), (),)
make_tensor_partial = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
for shape in shapes:
yield SampleInput(make_tensor_partial(shape))
yield SampleInput([make_tensor_partial(shape) for shape in shapes])
def sample_inputs_column_stack(op_info, device, dtype, requires_grad, **kwargs):
cases: tuple[tuple, tuple] = ( # type: ignore[assignment]
((S, 2, 1), (S, 3, 1)),
((S), (S, 5)), ((), (1, S))
)
make_tensor_partial = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
for shape1, shape2 in cases:
yield SampleInput([make_tensor_partial(shape1), make_tensor_partial(shape2)])
def sample_inputs_flatten(op_info, device, dtype, requires_grad, **kwargs):
shapes = ((S, S, S), (S, S), (S, ), (),)
make_tensor_partial = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
for shape in shapes:
yield SampleInput(make_tensor_partial(shape))
if len(shape) > 1:
yield SampleInput(make_tensor_partial(shape), start_dim=1, end_dim=-1)
def reference_inputs_flatten(op, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_flatten(op, device, dtype, requires_grad, **kwargs)
# shape x start_dim x end_dim
cases = (
((5, 4, 0, 1, 3, 7), 1, 3),
((5, 4, 0, 1, 3, 7), 4, 5),
((5, 4, 1, 1, 3, 7), 2, 3),
((), 0, -1),
((1,), 0, -1),
((3, 7, 5), 1, 2),
((4, 5), 1, 1),
((1, 5, 5, 1, 5, 1, 5, 1), 0, 2),
((1, 5, 5, 1, 5, 1, 5, 1), 3, -1),
((1, 5, 5, 1, 5, 7, 5, 1), -2, -1),
((2, 4, 2), 0, 1),
((4, 2, 2), 1, 2),
((0, 3, 4, 5), 1, 3),
)
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
for shape, start, end in cases:
yield SampleInput(make_arg(shape), args=(start, end,))
yield SampleInput(make_arg(shape, noncontiguous=True).transpose(0, -1), args=(start, end,))
yield SampleInput(make_arg(shape).transpose(0, -1), args=(start, end,))
def sample_inputs_unflatten(op_info, device, dtype, requires_grad, **kwargs):
# in_shape, dim, sizes
args = (((8,), 0, (8,)),
((8,), 0, (4, 2)),
((8,), -1, (2, 2, 2)),
((8,), -1, (-1, 2)),
((3, 6, 2), 1, (2, 3)),
((3, 6, 2), -2, (2, 3)),
((3, 6, 2), -2, (-1, 3)),
((3, 2, 12), 2, (3, 2, 2)),
((4, 0), 0, (2, 2)),
((4, 0), 1, (2, 0, 0, 0)),
)
make_tensor_partial = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
for in_shape, dim, sizes in args:
yield SampleInput(make_tensor_partial(in_shape), args=(dim, sizes))
def sample_inputs_select(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
cases = (((S, S, S), (1, 2)),
((S, S, S), (-1, 2)),
((S, S, S), (-1, -1)),
((S, S, S), (1, -1)),
((S, S), (-1, 2)),
((S,), (0, 2))
)
for shape, args in cases:
yield SampleInput(make_arg(shape), args=args)
def sample_inputs_select_scatter(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
cases = (((S, S, S), (S, S), (1, 2)),
((S, S, S), (S, S), (-1, 2)),
((S, S, S), (S, S), (-1, -1)),
((S, S, S), (S, S), (1, -1)),
((S,), (), (0, 2))
)
for input_shape, src_shape, args in cases:
input_ = make_arg(input_shape)
src = make_arg(src_shape)
yield SampleInput(input_, args=(src, *args))
def sample_inputs_slice_scatter(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
cases = (((L, L, L), (L, L, L,), (0, 0, L, 1)),
((L, L, L), (L // 2, L, L,), (0, L // 2, L, 1)),
((L, L, L), (L // 4, L, L,), (0, L // 2, L, 2)),
((L, L, L), (L, L, L,), (1, 0, L, 1)),
((L, L, L), (L, L // 2, L,), (1, L // 2, L, 1)),
((L, L, L), (L, L // 4, L,), (1, L // 2, L, 2)),
((L, L, L), (L, L, L,), (2, 0, L, 1)),
((L, L, L), (L, L, L // 2,), (2, L // 2, L, 1)),
((L, L, L), (L, L, L // 4,), (2, L // 2, L, 2)),
)
for input_shape, src_shape, args in cases:
input_ = make_arg(input_shape)
src = make_arg(src_shape)
yield SampleInput(input_, args=(src, *args))
def sample_inputs_expand(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
cases = (((S, 1, 1), (S, S, S)),
((S, 1, S), (S, S, S)),
((S, 1, S), (-1, S, -1)),
((S, 1, S), (-1, S, S)),
((S, 1), (S, S, S)),
((1,), (S, S, S)),
((1, S), (1, 1, S)),
((), ()),
((), (1, 3, 2)),
)
for case in cases:
shape, args = case
yield SampleInput(make_arg(shape), args=(args,))
def sample_inputs_conversion(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
shapes = ((),
(2, 3))
memory_format_options = [None, torch.contiguous_format]
for shape, memory_format in itertools.product(shapes, memory_format_options):
yield SampleInput(make_arg(shape),
kwargs={'memory_format': memory_format} if memory_format else {})
yield SampleInput(make_arg((2, 3, 2, 3)), kwargs={'memory_format': torch.channels_last})
def sample_inputs_byte(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, low=0, high=255, requires_grad=requires_grad)
shapes = ((),
(2, 3))
memory_format_options = [None, torch.contiguous_format]
for shape, memory_format in itertools.product(shapes, memory_format_options):
yield SampleInput(make_arg(shape),
kwargs={'memory_format': memory_format} if memory_format else {})
yield SampleInput(make_arg((2, 3, 2, 3)), kwargs={'memory_format': torch.channels_last})
def sample_inputs_expand_as(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device)
cases = (((S, 1, 1), (S, S, S)),
((), ()),
((), (1, 1)),
)
for shape, shape_other in cases:
yield SampleInput(make_arg(shape, requires_grad=requires_grad),
args=(make_arg(shape_other, requires_grad=False),))
def sample_inputs_where(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
def make_bool_mask(shape):
# Make sure atleast one element is nonzero,
# except for empty tensor
mask_t = make_tensor(shape, dtype=torch.bool, device=device, requires_grad=False)
if mask_t.numel() == 0:
return mask_t
elif mask_t.numel() == 1:
mask_t.fill_(True)
return mask_t
if mask_t.sum() == 0:
def random_index(shape):
return tuple(random.randrange(0, max_idx) for max_idx in shape)
mask_t[random_index(mask_t.shape)] = True
return mask_t
return mask_t
cases = (((M, M), (M, M), (M, M), False),
((M, 1, M), (M, M), (M, M, 1), True),
((), (), (), False),
((M, 1, M), (), (M, M, 1), True),
((), (M, M), (), True),
((), (2), (1, 1), True),
)
for shape, mask_shape, other_shape, broadcasts_input in cases:
yield SampleInput(make_arg(shape),
args=(make_bool_mask(mask_shape), make_arg(other_shape)),
broadcasts_input=broadcasts_input)
# TODO: add reference inputs for where(condition) signature
def reference_inputs_where(op, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_where(op, device, dtype, requires_grad, **kwargs)
make_cond = partial(make_tensor, dtype=torch.bool, device=device, requires_grad=requires_grad)
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
# noncontiguous
c = make_cond((10, 3), noncontiguous=True)
a = make_arg((10, 1), noncontiguous=True)
b = make_arg((3, 10, 3)).transpose(0, -1)
# NOTE that the OpInfo for where takes samples of the form a, cond, b
yield SampleInput(a, args=(c, b))
# MPS does not support float64, which causes issues in the following tests
if torch.device(device).type == "mps":
return
# type promoting
# FIXME(rec): shouldn't other_dtype be used two lines below?
other_dtype = torch.double if dtype is not torch.double else torch.long # noqa: F841
c = make_cond((10, 3), noncontiguous=True)
a = make_arg((10, 1), dtype=torch.long)
b = make_arg((10, 1))
yield SampleInput(a, args=(c, b))
# two python scalars
c = make_cond((10, 3), noncontiguous=True)
a = make_arg((1,)).item()
b = make_arg((1,)).item()
yield SampleInput(a, args=(c, b))
# NaN propagation
if dtype.is_floating_point or dtype.is_complex:
if dtype.is_floating_point:
nan = float('nan')
else:
# dtype.is_complex
nan = complex(float('nan'), float('nan'))
c = make_cond((1, 10, 3))
a = make_arg((10, 3), noncontiguous=True)
a[2, 1] = nan
b = make_arg((1, 3))
b[0, 2] = nan
yield SampleInput(a, args=(c, b))
# Python scalars type promotion
for scalar in (0, 0.0, 2j, False):
yield SampleInput(scalar, args=(c, b))
yield SampleInput(a, args=(c, scalar))
def error_inputs_where(op_info, device, **kwargs):
shape = (S,)
err_msg = "Expected all tensors to be on the same device"
for devices in product(('cpu', device), repeat=3):
if len(set(devices)) == 2:
si = SampleInput(make_tensor(shape, device=devices[0], dtype=torch.float32),
args=(make_tensor(shape, dtype=torch.bool, device=devices[1]),
make_tensor(shape, device=devices[2], dtype=torch.float32)))
yield ErrorInput(si, error_regex=err_msg)
def sample_inputs_nonzero(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
sizes = ((), (S,), (S, S), (S, S, S), (S, 1, S), (S, 0, S))
inputs = []
for shape in sizes:
# construct input without any non-zero elements
zeros = torch.zeros(shape, dtype=dtype, device=device, requires_grad=requires_grad)
inputs.append(zeros)
# construct input with mixed zero and non-zero elements
mixed = make_arg(shape).requires_grad_(False)
mask_t = make_tensor(shape, dtype=torch.bool, device=device, requires_grad=False)
mixed[mask_t] = 0
inputs.append(mixed)
for input_t, as_tuple in product(inputs, [False, True]):
yield SampleInput(input_t.clone().requires_grad_(requires_grad),
kwargs=dict(as_tuple=as_tuple))
def sample_inputs_nonzero_static(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
sizes = ((), (S,), (S, S), (S, S, S), (S, 1, S), (S, 0, S))
inputs = []
for shape in sizes:
# construct input without any non-zero elements
zeros = torch.zeros(shape, dtype=dtype, device=device, requires_grad=requires_grad)
inputs.append(zeros)
# construct input with mixed zero and non-zero elements
mixed = make_arg(shape).requires_grad_(False)
mask_t = make_tensor(shape, dtype=torch.bool, device=device, requires_grad=False)
mixed[mask_t] = 0
inputs.append(mixed)
nonzero_sizes = [0, 1, XS, S, M]
for input_t, nonzero_size in product(inputs, nonzero_sizes):
yield SampleInput(input_t.clone().requires_grad_(requires_grad),
kwargs=dict(size=nonzero_size))
def sample_inputs_chunk(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
cases = (((S, S, S), (2,)),
((S, S, S), (S, 1)),
((S, S, S), (S, -1)))
for case in cases:
shape, args = case
yield SampleInput(make_arg(shape), args=args)
def reference_inputs_chunk(op, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_chunk(op, device, dtype, requires_grad, **kwargs)
make_arg = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
# shape x chunks x dim
cases = (
((13, 9, 11), 17, -1),
((13, 9, 11), 11, -1),
((13,), 12, -1),
((15,), 12, -1),
((15,), 7, 0),
((15,), 9, 0),
((3, 7), 9, 1),
((3, 7), 9, 0),
((3, 7), 2, 0),
((3, 7), 3, 0),
((3, 7), 1, 0),
((3, 7), 1, 1),
((4, 4), 2, 0),
)
for shape, chunks, dim in cases:
yield SampleInput(make_arg(shape), args=(chunks, dim))
def sample_inputs_kthvalue(op_info, device, dtype, requires_grad, **kwargs):
def _tensor(shape, dtype=dtype, low=None, high=None):
return make_tensor(shape, dtype=dtype, device=device, low=low, high=high, requires_grad=requires_grad)
test_cases = [
((S, S, S), (2,)),
((S, S, S), (2, 1,)),
((S, S, S), (2, -1,)),
((S, S, S), (2, 1, True,)),
((S, S, S), (2, -1, True,)),
((S,), (2, 0,)),
((S,), (2, 0, True,)),
((), (1,)),
((), (1, 0,)),
((), (1, 0, True)),
]
yield from (SampleInput(_tensor(tensor), *args) for tensor, args in test_cases)
def error_inputs_kthvalue(op_info, device, **kwargs):
# tests overlapping output fails
t = make_tensor(10, dtype=torch.float32, device=device)
indices = torch.empty((), device=device, dtype=torch.long)
yield ErrorInput(SampleInput(t, 5, out=(t, indices)),
error_regex="unsupported operation")
k_out_of_range_err = "selected number k out of range for dimension"
yield ErrorInput(SampleInput(torch.randn(2, 2, device=device), 3, 0),
error_regex=k_out_of_range_err)
yield ErrorInput(SampleInput(torch.randn(2, 2, device=device), 3),
error_regex=k_out_of_range_err)
yield ErrorInput(SampleInput(torch.tensor(2, device=device), 3),
error_regex=k_out_of_range_err)
def sample_inputs_dropout(op_info, device, dtype, requires_grad, *,
train=None, valid_input_dim=None, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
if valid_input_dim:
cases = ((S,) * i for i in valid_input_dim)
else:
cases = ((S, S), (S,), ())
p_vals = [0.0, 0.5, 1.0]
# This is to handle special case for feature_alpha_dropout which has different
# supported dtypes depending on `train` parameter
training_vals = [train] if train is not None else [True, False]
for case, p, training in product(cases, p_vals, training_vals):
yield SampleInput(make_arg(case), p=p, training=training)
yield SampleInput(make_arg(case))
def sample_inputs_dropout_backward(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
make_mask = partial(make_tensor, device=device, dtype=torch.bool, requires_grad=False)
cases = ((S, S, S, S), (S,), ())
scale_vals = [0.0, 1.0, 2.0]
for case, scale in product(cases, scale_vals):
yield SampleInput(make_arg(case), make_mask(case), scale)
def sample_inputs_embedding_bag(op_info, device, dtype, requires_grad, **kwargs):
def make_input(shape):
return make_tensor(shape, device=device, dtype=dtype, requires_grad=requires_grad)
def make_long_input(shape, *, low, high, noncontiguous=False):
return make_tensor(shape, device=device, dtype=torch.long, low=low, high=high,
noncontiguous=noncontiguous)
def make_per_sample_weight(flag, idx):
# a tensor of float / double weights, or None
# to indicate all weights should be taken to be 1
if flag:
return make_input(idx.shape)
return None
offsets = torch.tensor([0, 3], device=device, dtype=torch.long)
for generate_per_sample_weight in (True, False):
for mode in ('sum', 'mean', 'max'):
# per_sample_weights is only supported for mode='sum' (got mode='****')
if generate_per_sample_weight and mode in ('mean', 'max'):
continue
# 1-D index tensor
idx = make_long_input((S,), low=0, high=M)
per_sample_weights = make_per_sample_weight(generate_per_sample_weight, idx)
yield SampleInput(make_input((M, S)), args=(idx,),
kwargs={'offsets': offsets, 'mode': mode,
'per_sample_weights': per_sample_weights})
idx = make_long_input((S,), low=0, high=M, noncontiguous=True)
per_sample_weights = make_per_sample_weight(generate_per_sample_weight, idx)
yield SampleInput(make_input((M, S)), args=(idx,),
kwargs={'offsets': offsets, 'mode': mode,
'per_sample_weights': per_sample_weights})
# bag with zero length
idx = make_long_input((S,), low=0, high=M, noncontiguous=True)
per_sample_weights = make_per_sample_weight(generate_per_sample_weight, idx)
yield SampleInput(make_input((M, S)), args=(idx,),
kwargs={'offsets': torch.tensor([0, 0, 3], device=device, dtype=torch.long),
'mode': mode,
'per_sample_weights': per_sample_weights})
# 2-D index tensor
idx = make_long_input((S, S), low=0, high=M)
per_sample_weights = make_per_sample_weight(generate_per_sample_weight, idx)
yield SampleInput(make_input((M, S)), args=(idx,),
kwargs={'mode': mode, 'per_sample_weights': per_sample_weights})
idx = make_long_input((S, S), low=0, high=M, noncontiguous=True)
per_sample_weights = make_per_sample_weight(generate_per_sample_weight, idx)
yield SampleInput(make_input((M, S)), args=(idx,),
kwargs={'mode': mode, 'per_sample_weights': per_sample_weights})
# The gradient vector at `padding_idx` is not updated.
# Negative padding_idx
idx = make_long_input((6,), low=0, high=S)
idx[0] = 4
idx[4] = 4
per_sample_weights = make_per_sample_weight(generate_per_sample_weight, idx)
yield SampleInput(make_input((S, S)), args=(idx,),
kwargs={'padding_idx': -1, 'offsets': offsets,
'mode': mode, 'per_sample_weights': per_sample_weights},)
idx = make_long_input((3, 3), low=0, high=S)
# Positive padding_idx
idx[0, 0] = 2
idx[1, 1] = 2
per_sample_weights = make_per_sample_weight(generate_per_sample_weight, idx)
yield SampleInput(make_input((S, S)), args=(idx,),
kwargs={'padding_idx': 2, 'mode': mode,
'per_sample_weights': per_sample_weights},)
idx = make_long_input((6, ), low=0, high=S)
weights = make_input((S, S))
offsets_ = torch.tensor([0, 3, 6], device=device, dtype=torch.long)
per_sample_weights = make_per_sample_weight(generate_per_sample_weight, idx)
yield SampleInput(weights, args=(idx,),
kwargs={'mode': mode, 'offsets': offsets_, 'include_last_offset': True},)
if not requires_grad:
# Following inputs return different gradient from the numerical gradient.
# This is expected and relevant tests are present in `test_nn.py`.
# Due to inplace renorming of weight, the numerical gradient doesn't match the
# analytical gradient.
idx = make_long_input((2, 2), low=0, high=S)
weights = make_input((S, S)) * 2
per_sample_weights = make_per_sample_weight(generate_per_sample_weight, idx)
yield SampleInput(weights, args=(idx,),
kwargs={'max_norm': 1., 'mode': mode,
'per_sample_weights': per_sample_weights},)
idx = make_long_input((6, ), low=0, high=S)
weights = make_input((S, S)) * 2
per_sample_weights = make_per_sample_weight(generate_per_sample_weight, idx)
yield SampleInput(weights, args=(idx,),
kwargs={'max_norm': 1., 'norm_type': 1.0,
'mode': mode, 'offsets': offsets,
'per_sample_weights': per_sample_weights},)
if mode != 'max':
# Scale the gradient based on the inverse frequency of a particular index.
# Note : smax mode does not support sparse weights
idx = make_long_input((2, 2), low=0, high=S)
idx[0, 0] = 1
idx[0, 1] = 1
weights = make_input((S, S))
per_sample_weights = make_per_sample_weight(generate_per_sample_weight, idx)
yield SampleInput(weights, args=(idx,),
kwargs={'scale_grad_by_freq': True, 'mode': mode,
'per_sample_weights': per_sample_weights},)
# gradcheck not implemented for sparse tensors.
# Note : max mode does not support sparse weights
idx = make_long_input((6, ), low=0, high=S)
weights = make_input((S, S))
per_sample_weights = make_per_sample_weight(generate_per_sample_weight, idx)
yield SampleInput(weights, args=(idx,),
kwargs={'sparse': True, 'offsets': offsets,
'mode': mode, 'per_sample_weights': per_sample_weights})
idx = make_long_input((6, ), low=0, high=S)
idx[0] = 1 # freq more than 1
idx[1] = 1 # freq more than 1
idx[3] = 0 # padding_idx
weights = make_input((S, S)) * 2
per_sample_weights = make_per_sample_weight(generate_per_sample_weight, idx)
yield SampleInput(weights, args=(idx,),
kwargs={'sparse': True, 'scale_grad_by_freq': True, 'padding_idx': 0,
'max_norm': 1., 'offsets': offsets,
'mode': mode, 'per_sample_weights': per_sample_weights})
def sample_inputs_embedding(op_info, device, dtype, requires_grad, **kwargs):
def make_input(shape):
return make_tensor(shape, device=device, dtype=dtype, requires_grad=requires_grad)
def make_long_input(shape, *, low, high):
return make_tensor(shape, device=device, dtype=torch.long, low=low, high=high)
# 0-D index tensor
idx = make_long_input((), low=0, high=M)
yield SampleInput(make_input((M, S)), args=(idx,),)
# 1-D index tensor
idx = make_long_input((S,), low=0, high=M)
yield SampleInput(make_input((M, S)), args=(idx,),)
# 2-D index tensor
idx = make_long_input((S, S), low=0, high=M)
yield SampleInput(make_input((M, S)), args=(idx,),)
if not requires_grad:
# Following inputs return different gradient from the numerical gradient.
# This is expected and relevant tests are present in `test_nn.py`.
# The gradient vector at `padding_idx` is not updated.
idx = make_long_input((2, 2), low=0, high=S)
idx[0, 0] = 2
idx[1, 1] = 2
yield SampleInput(make_input((S, S)), args=(idx,), kwargs={'padding_idx': 2},)
idx = make_long_input((2, 2), low=0, high=S)
idx[0, 0] = 4
idx[1, 1] = 4
yield SampleInput(make_input((S, S)), args=(idx,), kwargs={'padding_idx': -1},)
# Due to inplace renorming of weight, the numerical gradient doesn't match the
# analytical gradient.
idx = make_long_input((2, 2), low=0, high=S)
weights = make_input((S, S)) * 2
yield SampleInput(weights, args=(idx,), kwargs={'max_norm': 1.},)
idx = make_long_input((2, 2), low=0, high=S)
weights = make_input((S, S)) * 2
yield SampleInput(weights, args=(idx,), kwargs={'max_norm': 1., 'norm_type': 1.0},)
# Scale the gradient based on the inverse frequency of a particular index.
idx = make_long_input((2, 2), low=0, high=S)
idx[0, 0] = 1
idx[0, 1] = 1
weights = make_input((S, S))
yield SampleInput(weights, args=(idx,), kwargs={'scale_grad_by_freq': True},)
# gradcheck not implemented for sparse tensors.
idx = make_long_input((2, 2), low=0, high=S)
weights = make_input((S, S))
yield SampleInput(weights, args=(idx,), kwargs={'sparse': True})
idx = make_long_input((3, 3), low=0, high=S)
idx[0, 0] = 1 # freq more than 1
idx[0, 1] = 1 # freq more than 1
idx[1, 0] = 0 # padding_idx
weights = make_input((S, S)) * 2
yield SampleInput(weights, args=(idx,),
kwargs={'sparse': True, 'scale_grad_by_freq': True,
'padding_idx': 0, 'max_norm': 1.})
def sample_inputs_one_hot(op_info, device, dtype, requires_grad, **kwargs):
def make_input(shape, *, low, high):
return make_tensor(shape, device=device, dtype=dtype, low=low, high=high, requires_grad=requires_grad)
shapes = ((), (S,), (L, M, S))
num_classess = (-1, 10)
return (
SampleInput(
make_input(
shape,
low=0,
high=10 if num_classes == -1 else num_classes // 2,
),
kwargs=dict(num_classes=num_classes),
)
for shape, num_classes in itertools.product(shapes, num_classess)
)
def sample_inputs_loss(op_info, device, dtype, requires_grad, **kwargs):
rhs_requires_grad = kwargs.get('rhs_requires_grad', requires_grad)
_make_tensor = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# Although most losses also support the reduce and size_average combination instead of reduce, the former is
# deprecated since 0.4.1 and thus is not tested
shapes_and_kwargs = (
((), None),
((S,), dict(reduction="mean")),
((S,), dict(reduction="sum")),
((S,), dict(reduction="none")),
((S, S), None),
((S, S, S), None),
)
for shape, kwargs in shapes_and_kwargs:
yield SampleInput(_make_tensor(shape),
args=(_make_tensor(shape, requires_grad=rhs_requires_grad),),
kwargs=kwargs)
def sample_inputs_grid_sample(op_info, device, dtype, requires_grad, **kwargs):
# We get better tests if we change the range of the values to something like [-2,2]
# because for grid (second tensor argument) the "useful" range is [-1,1] and this way
# you get a better combination of out-of-range and in-range test cases
_make_tensor = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad,
low=-2, high=2)
batch_size = 2
num_channels = 3
modes = ("bilinear", "nearest")
align_cornerss = (False, True)
padding_modes = ("zeros", "border", "reflection")
for dim in (2, 3):
modes_ = (*modes, "bicubic") if dim == 2 else modes
for mode, padding_mode, align_corners in itertools.product(modes_, padding_modes, align_cornerss):
yield SampleInput(
_make_tensor((batch_size, num_channels, *[S] * dim)),
_make_tensor((batch_size, *[S] * dim, dim)),
mode=mode,
padding_mode=padding_mode,
align_corners=align_corners,
)
def reference_inputs_grid_sample(op_info, device, dtype, requires_grad, **kwargs):
batch_size = 2
num_channels = 3
height = 345
width = 456
modes = ("bilinear", "nearest", "bicubic")
align_cornerss = (False, True)
padding_modes = ('zeros', 'border', 'reflection')
# Create an affine transformation matrix
a = torch.deg2rad(torch.tensor(45.0))
ca, sa = torch.cos(a), torch.sin(a) # rotation angles
s1, s2 = 1.23, 1.34 # scales
theta = torch.tensor([[
[ca / s1, sa, 0.0],
[-sa, ca / s2, 0.0],
]], dtype=dtype, device=device)
theta = theta.expand(batch_size, 2, 3).contiguous()
x = torch.arange(batch_size * num_channels * height * width, device=device)
x = x.reshape(batch_size, num_channels, height, width).to(torch.uint8)
x = x.to(dtype=dtype)
x.requires_grad_(requires_grad)
for mode, padding_mode, align_corners in itertools.product(modes, padding_modes, align_cornerss):
grid = torch.nn.functional.affine_grid(
theta, size=(batch_size, num_channels, height, width), align_corners=align_corners
)
yield SampleInput(
x,
grid,
mode,
padding_mode,
align_corners,
)
def sample_inputs_grid_sampler_2d(op_info, device, dtype, requires_grad, **kwargs):
# We get better tests if we change the range of the values to something like [-2,2]
# because for grid (second tensor argument) the "useful" range is [-1,1] and this way
# you get a better combination of out-of-range and in-range test cases
_make_tensor = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad,
low=-2, high=2)
batch_size = 2
num_channels = 3
modes = (0, 1, 2)
align_cornerss = (False, True)
padding_modes = (0, 1, 2)
for mode, padding_mode, align_corners in itertools.product(modes, padding_modes, align_cornerss):
yield SampleInput(
_make_tensor((batch_size, num_channels, S, L)),
_make_tensor((batch_size, M + 3, M, 2)),
mode,
padding_mode,
align_corners,
)
def sample_inputs_cosine_embedding_loss(op_info, device, dtype, requires_grad, **kwargs):
make_input = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
def make_target(shape):
shape = () if len(shape) == 1 else (shape[0], )
t = torch.randint(0, 2, shape, device=device, dtype=torch.long)
# Label with -1 or 1
t = t * 2 - 1
target = t.to(dtype=dtype).detach_().requires_grad_(requires_grad)
return target
shapes = ((S, S), (S,))
reductions = ('none', 'mean', 'sum')
for s, r in product(shapes, reductions):
yield SampleInput(
make_input(s),
args=(make_input(s), make_target(s)),
kwargs=dict(reduction=r, margin=random.uniform(-1, 1))
)
def sample_inputs_ctc_loss(op_info, device, dtype, requires_grad, **kwargs):
input_length = 50
batch = 16
num_char = 20
target_length = 30
def make_log_probs(s):
t = make_tensor(s, device=device, dtype=dtype)
log_probs = t.log_softmax(2).to(device=device, dtype=dtype).detach().requires_grad_(requires_grad=requires_grad)
return log_probs
reductions = ('none', 'mean', 'sum')
zero_inf = (True, False)
lengths_type = (list, torch.Tensor)
for r, z, lt in product(reductions, zero_inf, lengths_type):
log_probs = make_log_probs((input_length, batch, num_char))
targets = torch.randint(1, num_char, (batch, target_length), dtype=torch.long, device=device)
input_lengths = torch.full((batch, ), input_length, dtype=torch.long, device=device)
target_lengths = torch.randint(10, target_length, (batch, ), dtype=torch.long, device=device)
# Dont generate int[] types if reduction = "Mean" since this results in non composite compliant calls
# to ctc_loss.IntList since a tensor needs to be created from the target lengths.
# Creating such a tensor requires the use of pointers to copy data from int[] -> torch.Tensor
# e.g. via std::copy. Similarly symbolic/real tracing with fx will also not work
if lt is list and r in ["none", "sum"]:
input_lengths = input_lengths.tolist()
target_lengths = target_lengths.tolist()
yield SampleInput(log_probs, args=(targets, input_lengths, target_lengths,), kwargs=dict(reduction=r, zero_infinity=z))
def sample_inputs_nll_loss(op_info, device, dtype, requires_grad, **kwargs):
shape = (2, 3)
num_classes = shape[1]
make_input = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# FIXME: Derivative wrt. weight not implemented
make_weight = partial(make_tensor, num_classes, device=device, dtype=dtype, requires_grad=False)
def make_target(shape, zeros=False):
s = (shape[0], *shape[2:]) if len(shape) > 1 else ()
if zeros:
return torch.zeros(s, device=device, dtype=torch.long)
else:
return make_tensor(s,
low=0,
high=shape[1] if len(shape) > 1 else shape[0],
device=device,
dtype=torch.long)
def gen_shape_kwargs():
# Batched, non-batched and 2d
shapes = (shape, (num_classes,), shape + (2, 2))
reductions = ('none', 'mean', 'sum')
for reduction, s in product(reductions, shapes):
yield make_input(s), make_target(s), dict(reduction=reduction)
yield make_input(s), make_target(s), dict(weight=make_weight(), reduction=reduction)
yield make_input(s), make_target(s), dict(weight=make_weight(low=0), reduction=reduction)
yield make_input(s), make_target(s), dict(weight=make_weight(high=0), reduction=reduction)
t = make_target(s)
ignore = num_classes // 2
# If "mean", nll returns NaN, so it's not differentiable at those points
if t.eq(ignore).all() and reduction == "mean":
t.fill_(0)
yield make_input(s), t, dict(ignore_index=num_classes // 2, reduction=reduction)
yield make_input(s), t, dict(ignore_index=num_classes // 2, reduction=reduction, weight=make_weight())
# Test ignoring all the targets
# If "mean", nll returns NaN, so it's not differentiable at those points
if reduction != "mean":
yield make_input(s), make_target(s, zeros=True), dict(ignore_index=0, reduction=reduction)
for input, target, kwargs in gen_shape_kwargs():
yield SampleInput(input, args=(target,), kwargs=kwargs)
target = torch.tensor([-1, 2], device=device, dtype=torch.long)
yield SampleInput(make_input(shape), args=(target,), kwargs={'ignore_index': -1})
def sample_inputs_binary_cross_entropy_with_logits(
op_info, device, dtype, requires_grad, **kwargs
):
make = partial(make_tensor, device=device, dtype=dtype)
make_prob = partial(make, low=0, high=1)
reductions = ("mean", "sum", "none")
def make_weight_shape_kwargs():
kwargs = []
for shape in ((1,), (1, S), (S), (S, S)):
kwargs.extend([((S, S), dict(reduction=reduction, weight=make(shape))) for reduction in reductions])
return kwargs
shapes_and_kwargs = [
*[(shape, None) for shape in ((), (1,), (S,), (S, S), (S, S, S))],
*[((S, S), dict(reduction=reduction)) for reduction in reductions],
*make_weight_shape_kwargs(),
*[((S, S), dict(reduction=reduction, pos_weight=make((S,), low=0))) for reduction in reductions],
*[((S, S), dict(reduction=reduction, weight=make((S, S)), pos_weight=make((S,), low=0))) for reduction in reductions],
]
for shape, kwargs in shapes_and_kwargs:
yield SampleInput(
make(shape, requires_grad=requires_grad),
args=(make_prob(shape, requires_grad=requires_grad),),
kwargs=kwargs,
)
def sample_inputs_argwhere(op_info, device, dtype, requires_grad, **kwargs):
yield SampleInput(torch.tensor([1, 0, 2, 0], dtype=dtype, device=device, requires_grad=requires_grad))
mask = torch.tensor([[0, 1, 0, 1, 0],
[1, 1, 1, 1, 0],
[0, 0, 0, 1, 0],
[1, 0, 1, 1, 0],
[1, 0, 0, 1, 0]], dtype=torch.bool, device=device)
t = make_tensor((S, S), dtype=dtype, device=device, requires_grad=requires_grad)
t[mask] = 0
yield SampleInput(t)
t = make_tensor((S, S), dtype=dtype, device=device, requires_grad=requires_grad, noncontiguous=True)
t[mask] = 0
yield SampleInput(t)
t = make_tensor((S, 0), dtype=dtype, device=device, requires_grad=requires_grad)
yield SampleInput(t)
yield SampleInput(torch.zeros((S,), dtype=dtype, device=device, requires_grad=requires_grad))
yield SampleInput(make_tensor((), dtype=dtype, device=device, requires_grad=requires_grad))
def _generate_sample_shape_reduction():
shapes = ((S,), (S, S), (S, S, S))
reductions = ('none', 'mean', 'sum')
yield from product(shapes, reductions)
def sample_inputs_gaussian_nll_loss(op_info, device, dtype, requires_grad, **kwargs):
_make_tensor = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
# Set low slightly above 0 so gradcheck doesn't accidentally dip below 0
make_var = partial(make_tensor, low=0.1, device=device, dtype=dtype, requires_grad=requires_grad)
def gen_shape(shape):
yield shape
# Broadcast
yield (*shape[:-1], 1)
yield shape[:-1]
def gen_shape_kwargs():
for s, r in _generate_sample_shape_reduction():
for t_s, v_s in product(gen_shape(s), gen_shape(s)):
yield _make_tensor(s), _make_tensor(t_s), make_var(v_s), dict(reduction=r)
yield (
_make_tensor(s), _make_tensor(t_s), make_var(v_s),
dict(full=True, reduction=r)
)
yield (
_make_tensor(s), _make_tensor(t_s), make_var(v_s),
dict(eps=random.uniform(1e-6, 1e-3), reduction=r)
)
yield (
_make_tensor(s), _make_tensor(t_s), make_var(v_s),
dict(full=True, eps=random.uniform(1e-6, 1e-3), reduction=r)
)
for input, target, var, kwargs in gen_shape_kwargs():
yield SampleInput(input, args=(target, var, ), kwargs=kwargs)
def error_inputs_gaussian_nll_loss(op_info, device, **kwargs):
_make = partial(make_tensor, device=device, dtype=torch.float32)
# invalid reduction value
yield ErrorInput(SampleInput(_make(10, 2, 3), _make(10, 2, 3), _make((10, 2, 3), low=0), reduction="abc"),
error_type=ValueError, error_regex="abc is not valid")
# var is of incorrect shape
yield ErrorInput(SampleInput(_make(10, 2, 3), _make(10, 2, 3), _make((10, 2, 2), low=0)),
error_type=ValueError, error_regex="var is of incorrect size")
# target is of incorrect shape
yield ErrorInput(SampleInput(_make(10, 2, 3), _make(10, 2, 2), _make((10, 2, 3), low=0)),
error_type=RuntimeError,
error_regex=(r"The size of tensor a \(3\) must match the size of tensor b \(2\) "
r"at non-singleton dimension 2"))
def _generate_sample_inputs_nn_loss(op_info, device, dtype, requires_grad, **kwargs):
_make_tensor = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
for s, r in _generate_sample_shape_reduction():
yield _make_tensor(s), _make_tensor(s), dict(reduction=r)
def sample_inputs_hinge_embedding_loss(op_info, device, dtype, requires_grad, **kwargs):
for input, target, d in _generate_sample_inputs_nn_loss(op_info, device, dtype, requires_grad, **kwargs):
# target should contain either 1 or -1 as per docs
mask = torch.rand_like(target) > 0.5
target[mask] = 1
target[~mask] = -1
d['margin'] = random.uniform(-9, 9)
yield SampleInput(input, args=(target, ), kwargs=d)
# scalar input and target.
_make_tensor = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
yield SampleInput(_make_tensor(()), args=(_make_tensor(()), ))
def error_inputs_hinge_embedding_loss(op, device, **kwargs):
make_input = partial(make_tensor, device=device, dtype=torch.float32)
# invalid reduction value
yield ErrorInput(SampleInput(make_input(5, 4), args=(make_input(5, 4),), kwargs={'reduction': 'abc'}),
error_type=ValueError, error_regex='is not a valid value')
def reference_inputs_hinge_embedding_loss(op, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_hinge_embedding_loss(op, device, dtype, requires_grad, **kwargs)
make_input = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
for reduction in ('sum', 'mean', 'none'):
if dtype.is_floating_point: # only supports ints and floats
# NaN propagation
inp = make_input((10, ))
inp[2] = float('nan')
target = make_input((10, ))
# target should contain either 1 or -1 as per docs
mask = torch.rand_like(target) > 0.5
target[mask] = -1
target[~mask] = 1
yield SampleInput(inp, args=(target,), kwargs={'reduction': reduction})
# Inf Handling
inp = make_input((10, ))
inp[4] = float('inf')
target = make_input((10, ))
mask = torch.rand_like(target) > 0.5
target[mask] = -1
target[~mask] = 1
yield SampleInput(inp, args=(target,), kwargs={'reduction': reduction})
# Broadcasting
inp = make_input((5, 5))
target = make_input((1, 5))
mask = torch.rand_like(target) > 0.5
target[mask] = -1
target[~mask] = 1
yield SampleInput(inp, args=(target,), kwargs={'reduction': reduction})
def sample_inputs_huber_loss(op_info, device, dtype, requires_grad, **kwargs):
for input, target, d in _generate_sample_inputs_nn_loss(op_info, device, dtype, requires_grad, **kwargs):
d['delta'] = random.uniform(1e-3, 9)
yield SampleInput(input, args=(target, ), kwargs=d)
def error_inputs_huber_loss(op, device, **kwargs):
make_input = partial(make_tensor, device=device, dtype=torch.float32)
# invalid reduction value
err = 'is not a valid value for reduction'
yield ErrorInput(SampleInput(make_input(5, 4), args=(make_input(5, 4),), kwargs={'reduction': 'abc'}),
error_type=ValueError, error_regex=err)
# delta <= 0
for delta in (0, -1):
err = 'huber_loss does not support non-positive values for delta.'
yield ErrorInput(SampleInput(make_input(5, 4), args=(make_input(5, 4),), kwargs={'delta': delta}),
error_type=RuntimeError, error_regex=err)
def sample_inputs_poisson_nll_loss(op_info, device, dtype, requires_grad, **kwargs):
_make_tensor = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
def gen_shape_kwargs():
for s, r in _generate_sample_shape_reduction():
for li in (True, False):
for f in (True, False):
i1 = _make_tensor(s)
i2 = _make_tensor(s)
# For Poisson NLL Loss,
# target is assumed to be from
# Poisson Distribution which
# always has positive samples
t1 = _make_tensor(s, low=0)
t2 = _make_tensor(s, low=0)
if not li:
i1.abs_()
i2.abs_()
t1.abs_()
t2.abs_()
yield (
i1, t1,
dict(log_input=li, full=f, reduction=r)
)
yield (
i2, t2,
dict(log_input=li, full=f,
eps=random.uniform(1e-8, 1e-3),
reduction=r)
)
for input, target, kwargs in gen_shape_kwargs():
yield SampleInput(input, args=(target, ), kwargs=kwargs)
# test INT_TO_FLOAT promotion
if dtype.is_complex:
for d in (torch.bool, torch.int64):
yield SampleInput(_make_tensor(dtype=dtype), args=(_make_tensor(dtype=d),))
yield SampleInput(_make_tensor(dtype=d), args=(_make_tensor(dtype=dtype),))
def error_inputs_poisson_nll_loss(op_info, device, **kwargs):
make = partial(make_tensor, device=device, dtype=torch.float32)
# invalid reduction value
yield ErrorInput(SampleInput(make(5, 4), args=(make(5, 4),),
kwargs={'reduction': 'abc'}),
error_type=ValueError,
error_regex='abc is not a valid value for reduction')
# invalid input shapes
yield ErrorInput(SampleInput(make(5, 4), args=(make(5,),)),
error_regex=(r'(Attempting to broadcast a dimension of length|'
r'The size of tensor a \(5\) must match the '
r'size of tensor b \(4\) at non-singleton '
r'dimension 1)'))
def error_inputs_soft_margin_loss(op_info, device, **kwargs):
make = partial(make_tensor, device=device, dtype=torch.float32)
# invalid reduction value
yield ErrorInput(SampleInput(make(5, 4), args=(make(5, 4),),
kwargs={'reduction': 'abc'}),
error_type=ValueError,
error_regex='abc is not a valid value for reduction')
# invalid input shapes
yield ErrorInput(SampleInput(make(5, 4), args=(make(5,),)),
error_regex=(r'(Attempting to broadcast a dimension of length|'
r'The size of tensor a \(4\) must match the '
r'size of tensor b \(5\) at non-singleton '
r'dimension 1)'))
def sample_inputs_triplet_margin_loss(op_info, device, dtype, requires_grad, with_distance=False, **kwargs):
make = partial(make_tensor, (S, M), device=device, dtype=dtype, requires_grad=requires_grad)
kwargss = (
*[dict(margin=margin) for margin in (1e-6, 1.0, 10.0)],
dict(swap=True),
*[dict(reduction=reduction) for reduction in ("mean", "sum", "none")],
)
for kwargs in kwargss:
input = make()
args = (make(), make())
if with_distance:
kwargs["distance_function"] = torch.nn.PairwiseDistance()
yield SampleInput(input, args=args, kwargs=kwargs)
def error_inputs_triplet_margin_loss(op_info, device, **kwargs):
make_input = partial(make_tensor, device=device, dtype=torch.float32)
samples = (
# input, args, kwargs, error_type, error_regex
# invalid reduction
(make_input(3, 4), (make_input(3, 4), make_input(3, 4)),
dict(reduction="abc"),
ValueError, "abc is not a valid value for reduction"),
# invalid margin
(make_input(3, 4), (make_input(3, 4), make_input(3, 4)),
dict(margin=-1.0),
ValueError, "margin must be greater than 0, got -1.0"),
# shape mismatch
(make_input(3, 5), (make_input(3, 4), make_input(3, 4)),
{},
RuntimeError,
(r'(Attempting to broadcast a dimension of length|'
r"The size of tensor a \(5\) must match the size of tensor b \(4\) "
r"at non-singleton dimension 1)")),
(make_input(3, 4), (make_input(3, 5), make_input(3, 4)),
{},
RuntimeError,
(r'(Attempting to broadcast a dimension of length|'
r"The size of tensor a \(4\) must match the size of tensor b \(5\) "
r"at non-singleton dimension 1)")),
(make_input(3, 4), (make_input(3, 4), make_input(3, 5)),
{},
RuntimeError,
(r'(Attempting to broadcast a dimension of length|'
r"The size of tensor a \(4\) must match the size of tensor b \(5\) "
r"at non-singleton dimension 1)")),
# different dimensions
(make_input(3,), (make_input(3, 4), make_input(3, 4)),
{},
RuntimeError,
(r"The anchor, positive, and negative tensors are expected to have "
r"the same number of dimensions, but got: anchor 1D, positive 2D, "
r"and negative 2D inputs")),
(make_input(3, 4), (make_input(3,), make_input(3, 4)),
{},
RuntimeError,
(r"The anchor, positive, and negative tensors are expected to have "
r"the same number of dimensions, but got: anchor 2D, positive 1D, "
r"and negative 2D inputs")),
(make_input(3, 4), (make_input(3, 4), make_input(3,)),
{},
RuntimeError,
(r"The anchor, positive, and negative tensors are expected to have "
r"the same number of dimensions, but got: anchor 2D, positive 2D, "
r"and negative 1D inputs")),
)
for input, args, kwargs, error_type, error_regex in samples:
yield ErrorInput(SampleInput(input, args=args, kwargs=kwargs),
error_type=error_type, error_regex=error_regex)
def sample_inputs_scaled_mm(op_info, device, dtype, requires_grad, **kwargs):
make_mat_e4m3 = partial(make_tensor, device=device, dtype=torch.float8_e4m3fn, requires_grad=requires_grad)
make_mat_e5m2 = partial(make_tensor, device=device, dtype=torch.float8_e5m2, requires_grad=requires_grad)
make_scale = partial(make_tensor, device=device, dtype=torch.float, requires_grad=False)
M, N, K = 15, 32, 16
samples = []
# two e4m3
mat1 = make_mat_e4m3((M, K))
mat2 = make_mat_e4m3((K, N)).t().contiguous().t()
scale1 = make_scale((1,))
scale2 = make_scale((1,))
samples.append(SampleInput(mat1, mat2, scale1, scale2))
# mat1 e4m3 mat2 e5m2
mat1 = make_mat_e4m3((M, K))
mat2 = make_mat_e5m2((K, N)).t().contiguous().t()
scale1 = make_scale((1,))
scale2 = make_scale((1,))
samples.append(SampleInput(mat1, mat2, scale1, scale2))
# mat1 e5m2 mat2 e4m3
mat1 = make_mat_e5m2((M, K))
mat2 = make_mat_e4m3((K, N)).t().contiguous().t()
scale1 = make_scale((1,))
scale2 = make_scale((1,))
samples.append(SampleInput(mat1, mat2, scale1, scale2))
yield from samples
def sample_inputs_scaled_dot_product_attention(op_info, device, dtype, requires_grad, **kwargs):
make = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
batch, seq_q, seq_kv, num_heads, head_dim = 4, 3, 6, 4, 8
dim_3_q_shape = (batch, seq_q, head_dim)
dim_3_kv_shape = (batch, seq_kv, head_dim)
dim_4_q_shape = (batch, num_heads, seq_q, head_dim)
dim_4_kv_shape = (batch, num_heads, seq_kv, head_dim)
broadcast_tuple = ((num_heads, seq_q, head_dim), (batch, num_heads, seq_kv, head_dim))
qkv_shapes = [(dim_3_q_shape, dim_3_kv_shape), (dim_4_q_shape, dim_4_kv_shape), broadcast_tuple]
samples = []
gqa_options = [True, False]
causal_options = [True, False]
for qkv_shape, is_causal, dropout_p, _enable_gqa in product(
qkv_shapes, causal_options, [0.0, 0.5], gqa_options):
shape_q, shape_kv = qkv_shape
samples.append(SampleInput(
make(shape_q),
make(shape_kv),
make(shape_kv),
is_causal=is_causal,
dropout_p=dropout_p
))
# Add non standard shapes
# FIXME(rec): should diff_v_head_dim be appended to samples?
diff_v_head_dim = SampleInput( # noqa: F841
make((batch, num_heads, seq_q, head_dim)),
make((batch, num_heads, seq_kv, head_dim)),
make((batch, num_heads, seq_kv, head_dim + 8)),
is_causal=is_causal,
dropout_p=dropout_p
)
# Add an attn_mask
samples.append(
SampleInput(
make((batch, num_heads, seq_q, head_dim)),
make((batch, num_heads, seq_kv, head_dim)),
make((batch, num_heads, seq_kv, head_dim)),
attn_mask=make((seq_q, seq_kv)),
is_causal=False,
dropout_p=0.0)
)
yield from samples
def sample_inputs_efficient_attention_forward(op_info, device, dtype, requires_grad, **kwargs):
make = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
batch, num_heads, head_dim = 4, 4, 8
seq_q = 11
seq_kv = 32
dim_4_q_shape = (batch, num_heads, seq_q, head_dim)
dim_4_kv_shape = (batch, num_heads, seq_kv, head_dim)
qkv_shapes = [(dim_4_q_shape, dim_4_kv_shape)]
samples = []
mask_types = [1, 2] # UpperLeft, LowerRight
scales = [None, 1.0]
for qkv_shape, _is_causal, dropout_p, mask_type, scale in product(
qkv_shapes, [True, False], [0.0, 0.5], mask_types, scales):
shape_q, shape_kv = qkv_shape
samples.append(SampleInput(
make(shape_q).transpose(1, 2),
make(shape_kv).transpose(1, 2),
make(shape_kv).transpose(1, 2),
bias=None,
cu_seqlens_q=None,
cu_seqlens_k=None,
max_seqlen_q=None,
max_seqlen_k=None,
dropout_p=dropout_p,
custom_mask_type=mask_type,
compute_log_sumexp=requires_grad,
scale=scale,
seqlen_k=None
))
# Add non standard shapes
# FIXME(rec): should diff_v_head_dim be appended to samples?
diff_v_head_dim = SampleInput( # noqa: F841
make((batch, seq_q, num_heads, head_dim)),
make((batch, seq_kv, num_heads, head_dim)),
make((batch, seq_kv, num_heads, head_dim + 8)),
bias=None,
cu_seqlens_q=None,
cu_seqlens_k=None,
max_seqlen_q=None,
max_seqlen_k=None,
dropout_p=dropout_p,
custom_mask_type=0, # No Mask
compute_log_sumexp=requires_grad,
scale=None,
seqlen_k=None
)
# Add an attn_mask
samples.append(
SampleInput(
make((batch, seq_q, num_heads, head_dim)),
make((batch, seq_kv, num_heads, head_dim)),
make((batch, seq_kv, num_heads, head_dim)),
bias=make(batch, num_heads, seq_q, seq_kv),
cu_seqlens_q=None,
cu_seqlens_k=None,
max_seqlen_q=None,
max_seqlen_k=None,
dropout_p=dropout_p,
custom_mask_type=0, # No Mask
compute_log_sumexp=requires_grad,
scale=None,
seqlen_k=None
)
)
# jagged (with query/keys offsets)
cu_seqlens_k = torch.arange(-1, 32 * 2 + 1, 2, dtype=torch.int32, device=device)
cu_seqlens_k[-1] = 62
cu_seqlens_k[0] = 0
samples.append(
SampleInput(
make((32, 2, 64)).view(-1, 8, 8).unsqueeze(0),
make((64, 64)).view(-1, 8, 8).unsqueeze(0),
make((64, 64)).view(-1, 8, 8).unsqueeze(0),
bias=None,
cu_seqlens_q=torch.arange(0, 32 * 2 + 2, 2, dtype=torch.int32, device=device),
cu_seqlens_k=cu_seqlens_k,
max_seqlen_q=2,
max_seqlen_k=2,
dropout_p=0.0,
custom_mask_type=0, # No Mask
compute_log_sumexp=requires_grad,
scale=None,
seqlen_k=None,
)
)
yield from samples
def sample_inputs_flash_attention_forward(op_info, device, dtype, requires_grad, **kwargs):
make = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
batch, num_heads, head_dim = 4, 4, 8
seq_q = 11
seq_kv = 32
dim_4_q_shape = (batch, num_heads, seq_q, head_dim)
dim_4_kv_shape = (batch, num_heads, seq_kv, head_dim)
qkv_shapes = [(dim_4_q_shape, dim_4_kv_shape)]
samples = []
scales = [None, 1.0]
for qkv_shape, is_causal, dropout_p, scale in product(
qkv_shapes, [True, False], [0.0, 0.5], scales):
shape_q, shape_kv = qkv_shape
samples.append(SampleInput(
make(shape_q).transpose(1, 2),
make(shape_kv).transpose(1, 2),
make(shape_kv).transpose(1, 2),
cum_seq_q=None,
cum_seq_k=None,
max_q=seq_q,
max_k=seq_kv,
dropout_p=dropout_p,
is_causal=is_causal,
return_debug_mask=False,
scale=scale,
))
yield from samples
def sample_inputs_pairwise_distance(op_info, device, dtype, requires_grad, **kwargs):
make = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
shape = (3,)
batched_shape = (2, *shape)
shapes_and_kwargs = [
(shape, None),
(batched_shape, None),
(shape, dict(keepdim=True)),
(batched_shape, dict(keepdim=True)),
(shape, dict(p=5.0)),
(shape, dict(p=-1.0)),
(shape, dict(eps=1.0)),
]
return (
SampleInput(make(shape), args=(make(shape),), kwargs=kwargs) for shape, kwargs in shapes_and_kwargs
)
def sample_inputs_pixel_shuffle(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
yield from (
SampleInput(make_arg((1, 9, 2, 2)), upscale_factor=upscale_factor)
for upscale_factor in (1, 3)
)
yield from (
SampleInput(make_arg(shape), upscale_factor=1)
for shape in [
(1, 0, 1, 1),
(1, 1, 0, 1),
(1, 1, 1, 0),
]
)
def sample_inputs_pixel_unshuffle(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
yield from (
SampleInput(make_arg((1, 1, 6, 6)), downscale_factor=downscale_factor)
for downscale_factor in (1, 3)
)
yield from (
SampleInput(make_arg(shape), downscale_factor=1)
for shape in [
(1, 0, 1, 1),
(1, 1, 0, 1),
(1, 1, 1, 0),
]
)
def sample_inputs_channel_shuffle(op_info, device, dtype, requires_grad, **kwargs):
make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
shapes_groups = [
((1, 4, 10, 10), 2),
((2, 6, 8, 8), 3),
((2, 8, 5, 5), 4),
]
yield from (
SampleInput(make_arg(shape), args=(groups,))
for shape, groups in shapes_groups
)
def sample_inputs_binary_cross_entropy(op_info, device, dtype, requires_grad, logits=False, **kwargs):
make = partial(make_tensor, device=device, dtype=dtype)
# Lower bounds must be greater than 'eps' defined in gradcheck.py::gradgradcheck() -> eps
# otherwise perturbation calculation causes Tensor value to become negative triggering
# a device-side hardware assertion
make_prob = partial(make, low=1e-6, high=1)
reductions = ("mean", "sum", "none")
shapes_and_kwargs = [
*[(shape, None) for shape in ((), (1,), (S,), (S, S), (S, S, S))],
*[((S, S), dict(reduction=reduction)) for reduction in reductions],
*[((S, S), dict(reduction=reduction, weight=make((S, S)))) for reduction in reductions],
]
if logits:
shapes_and_kwargs.extend(
[((S, S), dict(reduction=reduction, pos_weight=make((S,), low=0))) for reduction in reductions]
)
for shape, kwargs in shapes_and_kwargs:
yield SampleInput(
(make if logits else make_prob)(shape, requires_grad=requires_grad),
args=(make_prob(shape, requires_grad=requires_grad),),
kwargs=kwargs,
)
def sample_inputs_allclose(op_info, device, dtype, requires_grad, **kwargs):
sample_shapes = [(), (S), (S, S, S)]
atols = [1e-2, 1e-16]
rtols = [1e-1, 0.5]
for s, rtol, atol in product(sample_shapes, rtols, atols):
# close sample
t = make_tensor(s, device=device, dtype=dtype, requires_grad=requires_grad)
close = (t + atol).detach().requires_grad_(requires_grad)
yield SampleInput(t, close, rtol=rtol, atol=atol)
# random sample
a = make_tensor(s, device=device, dtype=dtype, requires_grad=requires_grad)
b = make_tensor(s, device=device, dtype=dtype, requires_grad=requires_grad)
yield SampleInput(a, b, rtol=rtol, atol=atol)
def sample_inputs_l1_loss(op_info, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_loss(op_info, device, dtype, requires_grad, **kwargs)
# test COMPLEX_TO_FLOAT promotion
if dtype.is_complex:
make = partial(make_tensor, (), device=device, requires_grad=requires_grad)
yield SampleInput(make(dtype=dtype), args=(make(dtype=torch.double),))
yield SampleInput(make(dtype=torch.double), args=(make(dtype=dtype),))
def error_inputs_l1_loss(op_info, device, **kwargs):
make = partial(make_tensor, device=device, dtype=torch.float32)
# invalid reduction value
yield ErrorInput(SampleInput(make(5, 4), args=(make(5, 4),),
kwargs={'reduction': 'abc'}),
error_type=ValueError,
error_regex='abc is not a valid value for reduction')
# invalid input shapes
yield ErrorInput(SampleInput(make(5, 4), args=(make(5,),)),
error_regex=(r'(Attempting to broadcast a dimension of length|'
r'The size of tensor a \(4\) must match the '
r'size of tensor b \(5\) at non-singleton '
r'dimension 1)')
)
def sample_inputs_smooth_l1_loss(op_info, device, dtype, requires_grad, **kwargs):
yield from sample_inputs_loss(op_info, device, dtype, requires_grad, **kwargs)
make = partial(make_tensor, (S, S), device=device, dtype=dtype, requires_grad=requires_grad)
# This test case always triggers the smooth condition, since absolute difference of input and target
# is smaller than beta
yield SampleInput(make(low=0, high=2), args=(make(low=-2, high=0),), kwargs=dict(beta=5))
yield SampleInput(make(), args=(make(),), kwargs=dict(beta=0))
def sample_inputs_kl_div(op_info, device, dtype, requires_grad, **kwargs):
# kl_div works with inputs in [0, 1] (aka the pdf of a probability measure)
# Then log [0, 1] = (-inf, 0], so this is the log space
make_arg = partial(make_tensor, low=0., device=device, dtype=dtype, requires_grad=requires_grad)
def make_log(shape):
out = torch.nn.functional.log_softmax(make_arg(shape), -1)
out.requires_grad_(requires_grad)
return out
def make_prob(shape):
out = torch.nn.functional.softmax(make_arg(shape), -1)
out.requires_grad_(requires_grad)
return out
shapes = ((2,), (2, 3))
reductions = ("none", "mean", "batchmean", "sum")
for shape, reduction, log_target in product(shapes, reductions, (True, False)):
input = make_log(shape)
target = make_log(shape) if log_target else make_prob(shape)
yield SampleInput(input, args=(target,), kwargs=dict(reduction=reduction, log_target=log_target))
def sample_inputs_pdist(op_info, device, dtype, requires_grad, **kwargs):
make_input = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
yield from (SampleInput(make_input((n, m))) for n, m in itertools.product((1, S), repeat=2))
yield from (SampleInput(make_input((S, S)), kwargs=dict(p=p)) for p in (0.0, 1.0, 2.0, 10.0, float("inf")))
def reference_pdist(input, p=2):
pdist = scipy.spatial.distance.pdist
if p == 0:
output = pdist(input, "hamming") * input.shape[1]
elif p == float("inf"):
output = pdist(input, lambda x, y: np.abs(x - y).max())
else:
output = pdist(input, "minkowski", p=p)
return output.astype(input.dtype)
def sample_inputs_diagflat(op_info, device, dtype, requires_grad, **kwargs):
make_input = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
yield SampleInput(make_input(()))
yield SampleInput(make_input((2,)))
yield SampleInput(make_input((2, 2)))
yield SampleInput(make_input((2,)), offset=1)
yield SampleInput(make_input((2,)), offset=-1)
def sample_inputs_max_unpool(op_info, device, dtype, requires_grad, **kwargs):
unpool_name_to_pool_method_dict = {
'nn.functional.max_unpool1d': torch.nn.functional.max_pool1d,
'nn.functional.max_unpool2d': torch.nn.functional.max_pool2d,
'nn.functional.max_unpool3d': torch.nn.functional.max_pool3d
}
unpool_name_to_dim = {
'nn.functional.max_unpool1d': 1,
'nn.functional.max_unpool2d': 2,
'nn.functional.max_unpool3d': 3
}
unpool_to_pool_name_dict = {k: f'nn.functional.{v.__name__}' for k, v in unpool_name_to_pool_method_dict.items()}
pool_dim = unpool_name_to_dim[op_info.name]
pool_method = unpool_name_to_pool_method_dict[op_info.name]
pool_op_info = copy.copy(op_info)
pool_op_info.name = unpool_to_pool_name_dict[op_info.name]
for sample in sample_inputs_max_pool(pool_op_info, device, dtype, requires_grad, **kwargs):
# shapes (C, ...) do not work as of now,
# see https://github.com/pytorch/pytorch/issues/68337
# TODO: remove once the issue is resolved
if sample.input.dim() != pool_dim + 2:
continue
# No dilation > 1 for max_unpool,
# see https://github.com/pytorch/pytorch/issues/68420
if sample.kwargs['dilation'] != 1:
continue
# Can't unpool without indices
if sample.kwargs['return_indices']:
pool, indices = pool_method(sample.input, **sample.kwargs)
# arg has to be a leaf
arg = pool.detach().requires_grad_(requires_grad)
sample_kwargs = {
'kernel_size': sample.kwargs['kernel_size'],
'stride': sample.kwargs['stride'],
'padding': sample.kwargs['padding'],
# output_size could be None but we specify it explicitly
# to compensate for the information lose in pool due
# to the floor/ceil operation used to compute the shapes
'output_size': sample.input.size()
}
yield SampleInput(arg, args=(indices,), kwargs=sample_kwargs)
def sample_inputs_max_unpool_grad(op_info, device, dtype, requires_grad, **kwargs):
for sample in sample_inputs_max_unpool(op_info, device, dtype, requires_grad, **kwargs):
indices = sample.args[0]
# The samples for max_unpool are generated with max_pool.
# It could be that a single element from the max_pool's
# input is mapped to several locations in its output.
# This situation leads to failed gradchecks because
# the finite difference algorithm perturbs the elements
# of the output one by one, and not in classes of
# equivalences determined by whether two elements
# in the output are coming from the same location in the
# input (simply put, they have the same corresponding index).
# So, there are two ways to resolve this issue:
# 1. Extract a perturbation for one element and apply it all
# the elements from the same equivalence class, or
# 2. Make sure that the equivalence classes are all singletons,
# i.e. the index tensor has to be comprised of only unique
# indices.
# Here we go with the solution 2, the easiest of all.
if indices.unique().numel() == indices.numel():
yield sample
def sample_inputs_multi_head_attention_forward(opinfo, device, dtype, requires_grad, **kwargs):
make_input = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
if requires_grad:
# backward tests would take too long to complete, causing the job timeout.
bsz = 2
is_batcheds = (True,)
use_separate_proj_weights = (False,)
emb_sizes = (2,)
src_lens = (XS,)
tgt_lens = (XS,)
heads = (2,)
dropouts = (0.5,)
mask_types = ("2d",)
else:
bsz = 2
is_batcheds = (False, True)
use_separate_proj_weights = (False, True)
emb_sizes = (2, 4)
src_lens = (XS,)
tgt_lens = (XS, S)
heads = (1, 2)
dropouts = (0.0, 0.5)
mask_types = (None, "2d", "3d")
for is_batched, use_separate_proj_weight, mask_type, emb_size, src_len, tgt_len, num_heads, dropout_p in itertools.product(
is_batcheds, use_separate_proj_weights, mask_types, emb_sizes, src_lens, tgt_lens, heads, dropouts
):
attn_mask = None
if mask_type == "2d":
attn_mask = make_input(src_len, tgt_len)
elif mask_type == "3d":
attn_mask = make_input((bsz if is_batched else 1) * num_heads, src_len, tgt_len)
if is_batched:
q = make_input(src_len, bsz, emb_size)
k = make_input(tgt_len, bsz, emb_size)
v = make_input(tgt_len, bsz, emb_size)
else:
q = make_input(src_len, emb_size)
k = make_input(tgt_len, emb_size)
v = make_input(tgt_len, emb_size)
if use_separate_proj_weight:
in_proj_weight = None
q_proj_weight = make_input(emb_size, emb_size)
k_proj_weight = make_input(emb_size, emb_size)
v_proj_weight = make_input(emb_size, emb_size)
else:
in_proj_weight = make_input(emb_size * 3, emb_size)
q_proj_weight = None
k_proj_weight = None
v_proj_weight = None
bias_k = make_input(emb_size)
bias_v = make_input(emb_size)
in_proj_bias = make_input(emb_size * 3)
out_proj_weight = make_input(emb_size, emb_size)
out_proj_bias = make_input(emb_size)
sample_args = (
k, v, emb_size, num_heads, in_proj_weight,
in_proj_bias, bias_k, bias_v, False,
dropout_p, out_proj_weight, out_proj_bias
)
sample_kwargs = {
"q_proj_weight" : q_proj_weight,
"k_proj_weight" : k_proj_weight,
"v_proj_weight" : v_proj_weight,
"attn_mask" : attn_mask,
"training" : True if dropout_p > 0.0 else False,
"use_separate_proj_weight" : use_separate_proj_weight
}
yield SampleInput(q, args=sample_args, kwargs=sample_kwargs)
# Includes some values such that N * N won't be a multiple of 4,
# which should ensure we test the vectorized and non-vectorized
# kernel code paths.
NUM_SIZE0_TENSORS = 10000
foreach_num_tensors = [20, 23] if not TEST_WITH_SLOW else [23, 30, 300]
_foreach_inputs_default_kwargs = {"noncontiguous": False, "same_size": False, "low": None, "high": None}
class ForeachRightmostArgType(enum.Enum):
TensorList = enum.auto()
ScalarList = enum.auto()
Scalar = enum.auto()
Tensor = enum.auto()
class ForeachSampleInput(SampleInput):
# For TensorList <op> Scalar/Tensor, we compute the reference
# by converting it into TensorList <op> ScalarList/TensorList and
# then converting into multiple Tensor <op> Scalar/Tensor.
# ref_args contains the args converted to TensorList <op> ScalarList/TensorList
ref_args: Any
disable_fastpath: bool
def __init__(self, *args, disable_fastpath=False, ref_args=None, **kwargs):
super().__init__(*args, **kwargs)
self.ref_args = ref_args or self.args
self.disable_fastpath = disable_fastpath
class foreach_inputs_sample_func:
def __init__(
self,
arity: int,
rightmost_supports_scalar: bool,
rightmost_supports_scalarlist: bool,
rightmost_supports_tensor: bool = False,
) -> None:
self.arity = arity
self._set_rightmost_arg_types(
rightmost_supports_scalar, rightmost_supports_scalarlist, rightmost_supports_tensor,
)
self._intersperse_empty = (True, False)
def _set_rightmost_arg_types(
self,
rightmost_supports_scalar: bool,
rightmost_supports_scalarlist: bool,
rightmost_supports_tensor: bool,
) -> None:
self._rightmost_arg_types = [ForeachRightmostArgType.TensorList]
if self.arity > 1:
if rightmost_supports_scalar:
self._rightmost_arg_types.append(ForeachRightmostArgType.Scalar)
if rightmost_supports_scalarlist:
self._rightmost_arg_types.append(ForeachRightmostArgType.ScalarList)
if rightmost_supports_tensor:
self._rightmost_arg_types.append(ForeachRightmostArgType.Tensor)
def _sample_rightmost_arg(
self,
opinfo,
rightmost_arg_type,
device,
dtype,
num_tensors,
allow_higher_dtype_scalars,
**_foreach_inputs_kwargs,
):
if rightmost_arg_type == ForeachRightmostArgType.TensorList:
return [sample_inputs_foreach(None, device, dtype, num_tensors, **_foreach_inputs_kwargs)]
if rightmost_arg_type == ForeachRightmostArgType.Tensor:
return [make_tensor(
(), device=device, dtype=dtype,
noncontiguous=_foreach_inputs_kwargs["noncontiguous"],
requires_grad=_foreach_inputs_kwargs.get("requires_grad", False),
)]
should_use_simpler_scalars = opinfo.name == "_foreach_pow" and dtype in (torch.float16, torch.bfloat16)
def sample_float():
s = random.random()
if should_use_simpler_scalars:
return 1.0 if s > 0.5 else 2.0
else:
return 1.0 - s
high = 2 if should_use_simpler_scalars else 9
if rightmost_arg_type == ForeachRightmostArgType.ScalarList:
scalarlist_list = []
scalarlist_list.append([random.randint(0, high) + 1 for _ in range(num_tensors)])
if allow_higher_dtype_scalars or dtype.is_floating_point:
scalarlist_list.append([sample_float() for _ in range(num_tensors)])
if allow_higher_dtype_scalars or dtype.is_complex:
scalarlist_list.append([complex(sample_float(), sample_float()) for _ in range(num_tensors)])
scalarlist_list.append([1, 2.0, 3.0 + 4.5j] + [3.0 for _ in range(num_tensors - 3)])
scalarlist_list.append([True, 1, 2.0, 3.0 + 4.5j] + [3.0 for _ in range(num_tensors - 4)])
return scalarlist_list
if rightmost_arg_type == ForeachRightmostArgType.Scalar:
scalars = []
scalars.append(random.randint(1, high + 1))
if allow_higher_dtype_scalars or dtype.is_floating_point:
scalars.append(sample_float())
if allow_higher_dtype_scalars or dtype.is_complex:
scalars.append(complex(sample_float(), sample_float()))
scalars.append(True)
return scalars
raise AssertionError(f"Invalid rightmost_arg_type of {rightmost_arg_type}")
def _should_disable_fastpath(self, opinfo, rightmost_arg, rightmost_arg_type, dtype):
if self.arity == 1:
if "foreach_abs" in opinfo.name and dtype in complex_types():
return True
# unary
if opinfo.ref in (torch.abs, torch.neg):
return False
if opinfo.ref_inplace in (torch.Tensor.zero_,):
return False
return dtype in integral_types_and(torch.bool)
if self.arity < 2 or rightmost_arg_type == ForeachRightmostArgType.Tensor:
return None
if "foreach_pow" in opinfo.name and dtype in integral_types_and(torch.bool):
return True
if any(
foreach_name in opinfo.name
for foreach_name in ("foreach_clamp_max", "foreach_clamp_min", "foreach_maximum", "foreach_minimum")
) and dtype in integral_types_and(torch.bool):
return True
if rightmost_arg_type == ForeachRightmostArgType.TensorList:
disable_fastpath = "foreach_div" in opinfo.name and dtype in integral_types_and(torch.bool)
if "foreach_add" in opinfo.name and dtype == torch.bool:
disable_fastpath = True
return disable_fastpath
elif rightmost_arg_type == ForeachRightmostArgType.Scalar:
disable_fastpath = "foreach_div" in opinfo.name and dtype in integral_types_and(torch.bool)
if isinstance(rightmost_arg, bool):
disable_fastpath |= dtype == torch.bool
if opinfo.ref in (torch.add, torch.mul):
disable_fastpath = False
elif isinstance(rightmost_arg, int):
disable_fastpath |= dtype == torch.bool
elif isinstance(rightmost_arg, float):
disable_fastpath |= dtype in integral_types_and(torch.bool)
elif isinstance(rightmost_arg, complex):
disable_fastpath |= dtype not in complex_types()
else:
raise AssertionError(f"Invalid scalar of type {rightmost_arg_type} - {rightmost_arg}")
return disable_fastpath
elif rightmost_arg_type == ForeachRightmostArgType.ScalarList:
disable_fastpath = opinfo.ref == torch.div and dtype in integral_types_and(torch.bool)
elmt_t = type(rightmost_arg[0])
has_same_type = all(isinstance(v, elmt_t) for v in rightmost_arg)
if not has_same_type:
return dtype not in complex_types()
if isinstance(rightmost_arg[0], bool):
if ("foreach_add" in opinfo.name or "foreach_mul" in opinfo.name) and dtype == torch.bool:
disable_fastpath = False
elif isinstance(rightmost_arg[0], int):
disable_fastpath |= dtype == torch.bool
elif isinstance(rightmost_arg[0], float):
disable_fastpath |= dtype in integral_types_and(torch.bool)
elif isinstance(rightmost_arg[0], complex):
disable_fastpath |= dtype not in complex_types()
else:
raise AssertionError(f"Invalid scalarlist of {rightmost_arg}")
return disable_fastpath
else:
raise AssertionError(f"Invalid rightmost_arg_type of {rightmost_arg_type}")
def _sample_kwargs(self, opinfo, rightmost_arg, rightmost_arg_type, dtype):
kwargs = {}
if rightmost_arg_type == ForeachRightmostArgType.TensorList and opinfo.supports_alpha_param:
if dtype in integral_types_and(torch.bool):
kwargs["alpha"] = 3
elif dtype.is_complex:
kwargs["alpha"] = complex(3, 3)
else:
kwargs["alpha"] = 3.14
if self.arity > 1:
kwargs["disable_fastpath"] = self._should_disable_fastpath(opinfo, rightmost_arg, rightmost_arg_type, dtype)
return kwargs
def sample_zero_size_tensor_inputs(self, opinfo, device, dtype, requires_grad, **kwargs):
assert "num_input_tensors" not in kwargs
_foreach_inputs_kwargs = {k: kwargs.pop(k, v) for k, v in _foreach_inputs_default_kwargs.items()}
_foreach_inputs_kwargs["requires_grad"] = requires_grad
allow_higher_dtype_scalars = kwargs.pop("allow_higher_dtype_scalars", False)
for _rightmost_arg_type in self._rightmost_arg_types:
zero_size_foreach_inputs_kwargs = copy.deepcopy(_foreach_inputs_kwargs)
zero_size_foreach_inputs_kwargs["zero_size"] = True
input = sample_inputs_foreach(None, device, dtype, NUM_SIZE0_TENSORS, **zero_size_foreach_inputs_kwargs)
if self.arity > 1:
args = [
sample_inputs_foreach(None, device, dtype, NUM_SIZE0_TENSORS, **zero_size_foreach_inputs_kwargs)
for _ in range(self.arity - 2)
]
args.append(
self._sample_rightmost_arg(
opinfo,
ForeachRightmostArgType.TensorList,
device,
dtype,
NUM_SIZE0_TENSORS,
allow_higher_dtype_scalars=allow_higher_dtype_scalars,
**zero_size_foreach_inputs_kwargs,
)[0])
kwargs = self._sample_kwargs(
opinfo, args[-1], ForeachRightmostArgType.TensorList, dtype)
else:
args = []
kwargs = {}
if opinfo.ref in (torch.abs, torch.neg):
kwargs["disable_fastpath"] = False
else:
kwargs["disable_fastpath"] = dtype in integral_types_and(torch.bool)
yield ForeachSampleInput(input, *args, **kwargs)
def __call__(self, opinfo, device, dtype, requires_grad, **kwargs):
num_input_tensors_specified = "num_input_tensors" in kwargs
num_input_tensors = kwargs.pop("num_input_tensors") if num_input_tensors_specified else foreach_num_tensors
assert isinstance(num_input_tensors, list)
_foreach_inputs_kwargs = {k: kwargs.pop(k, v) for k, v in _foreach_inputs_default_kwargs.items()}
_foreach_inputs_kwargs["requires_grad"] = requires_grad
_foreach_inputs_kwargs["zero_size"] = False
allow_higher_dtype_scalars = kwargs.pop("allow_higher_dtype_scalars", False)
# add empty tensor interspersion to test fully fixing #100701
for num_tensors, rightmost_arg_type, intersperse_empty_tensors in itertools.product(
num_input_tensors, self._rightmost_arg_types, self._intersperse_empty):
if intersperse_empty_tensors and (num_tensors != max(num_input_tensors) or str(device) == 'cpu'):
# generate interspersed empty tensors for only 1 N on non-cpu device to lessen redundancy
continue
_foreach_inputs_kwargs["intersperse_empty_tensors"] = intersperse_empty_tensors
input = sample_inputs_foreach(
None, device, dtype, num_tensors, **_foreach_inputs_kwargs)
args = []
if self.arity > 1:
args = [
sample_inputs_foreach(
None, device, dtype, num_tensors, **_foreach_inputs_kwargs)
for _ in range(self.arity - 2)
]
rightmost_arg_list = self._sample_rightmost_arg(
opinfo, rightmost_arg_type, device, dtype, num_tensors, allow_higher_dtype_scalars,
**_foreach_inputs_kwargs)
for rightmost_arg in rightmost_arg_list:
args.append(rightmost_arg)
kwargs = self._sample_kwargs(opinfo, rightmost_arg, rightmost_arg_type, dtype)
ref_args = args
if rightmost_arg_type in (ForeachRightmostArgType.Scalar, ForeachRightmostArgType.Tensor):
ref_args = args[:-1] + [[args[-1] for _ in range(num_tensors)]]
sample = ForeachSampleInput(input, *args, ref_args=ref_args, **kwargs)
yield sample
args.pop()
else:
yield ForeachSampleInput(
input,
*args,
disable_fastpath=self._should_disable_fastpath(opinfo, None, None, dtype),
)
class foreach_max_sample_func(foreach_inputs_sample_func):
def __init__(
self,
arity: int,
rightmost_supports_scalar: bool,
rightmost_supports_scalarlist: bool,
rightmost_supports_tensor: bool = False,
) -> None:
super().__init__(arity, rightmost_supports_scalar, rightmost_supports_scalarlist, rightmost_supports_tensor)
self._intersperse_empty = (False,)
def sample_zero_size_tensor_inputs(self, opinfo, device, dtype, requires_grad, **kwargs):
return []
def _should_disable_fastpath(self, opinfo, rightmost_arg, rightmost_arg_type, dtype):
return False
class foreach_norm_sample_func(foreach_inputs_sample_func):
def sample_zero_size_tensor_inputs(self, opinfo, device, dtype, requires_grad, **kwargs):
assert "num_input_tensors" not in kwargs
_foreach_inputs_kwargs = {k: kwargs.pop(k, v) for k, v in _foreach_inputs_default_kwargs.items()}
_foreach_inputs_kwargs["requires_grad"] = requires_grad
for ord in (0, 1, 2, -1, -2, float('inf'), float('-inf')):
input = sample_inputs_foreach(None, device, dtype, NUM_SIZE0_TENSORS, zero_size=True, **_foreach_inputs_kwargs)
disable_fastpath = True
if ord in (1, 2, float('inf')) and dtype in floating_types_and(torch.half, torch.bfloat16):
disable_fastpath = False
yield ForeachSampleInput(input, ord=ord, disable_fastpath=disable_fastpath)
def __call__(self, opinfo, device, dtype, requires_grad, **kwargs):
num_input_tensors = kwargs.pop("num_input_tensors", foreach_num_tensors)
assert isinstance(num_input_tensors, list)
_foreach_inputs_kwargs = {k: kwargs.pop(k, v) for k, v in _foreach_inputs_default_kwargs.items()}
_foreach_inputs_kwargs["requires_grad"] = requires_grad
_allow_higher_dtype_scalars = kwargs.pop("allow_higher_dtype_scalars", False)
for num_tensors, ord, out_dtype, intersperse_empty_tensors in product(
num_input_tensors,
(0, 1, 2, -1, -2, float('inf'), float('-inf')),
(None,) + (torch.complex128,) if dtype in complex_types() else (torch.float64,),
(True, False),
):
# inf norm and negative norms on empty tensors is not supported by our reference func vector norm:
# linalg.vector_norm cannot compute the inf norm on an empty tensor because the operation does not have an identity
if (ord in [float('inf'), float('-inf')] or ord < 0) and intersperse_empty_tensors:
continue
_foreach_inputs_kwargs["intersperse_empty_tensors"] = intersperse_empty_tensors
input = sample_inputs_foreach(None, device, dtype, num_tensors, zero_size=False, **_foreach_inputs_kwargs)
disable_fastpath = True
if ord in (1, 2, float('inf')) and dtype in floating_types_and(torch.half, torch.bfloat16):
disable_fastpath = False
yield ForeachSampleInput(input, ord=ord, disable_fastpath=disable_fastpath, dtype=out_dtype)
# Also test nan propagation with a single tensor, but skip autograd testing
if not requires_grad:
nan_inputs = [
[float('nan')],
[float('nan'), 1.0],
[1.0, float('nan')],
[1.0, 2.0, 3.0, float('nan'), float('nan'), 7.0, float('nan'), float('nan'), -1.5, 6.0],
[7.0, 3.0, float('nan'), float('nan'), -1.5, 6.0],
[3.0, float('nan'), float('nan'), -1.5, 6.0],
]
for input in nan_inputs:
x = torch.tensor(input, device=device)
disable_fastpath = True
if ord in (1, 2, float('inf')) and dtype in floating_types_and(torch.half, torch.bfloat16):
disable_fastpath = False
yield ForeachSampleInput([x], ord=ord, disable_fastpath=disable_fastpath)
class foreach_pointwise_sample_func(foreach_inputs_sample_func):
def __init__(
self,
arity: int = 3,
rightmost_supports_scalar: bool = False,
rightmost_supports_scalarlist: bool = False,
):
super().__init__(arity, rightmost_supports_scalar, rightmost_supports_scalarlist)
def _should_disable_fastpath(self, opinfo, rightmost_arg, rightmost_arg_type, dtype):
return dtype in integral_types_and(torch.bool) and opinfo.ref in (torch.addcmul,)
def sample_zero_size_tensor_inputs(self, opinfo, device, dtype, requires_grad, **kwargs):
assert "num_input_tensors" not in kwargs
_foreach_inputs_kwargs = {k: kwargs.pop(k, v) for k, v in _foreach_inputs_default_kwargs.items()}
_foreach_inputs_kwargs["requires_grad"] = requires_grad
# zero_size tensor
input = sample_inputs_foreach(None, device, dtype, NUM_SIZE0_TENSORS, zero_size=True, **_foreach_inputs_kwargs)
args = [
sample_inputs_foreach(None, device, dtype, NUM_SIZE0_TENSORS, zero_size=True, **_foreach_inputs_kwargs)
for _ in range(2)
]
if "scalars" in kwargs:
del kwargs["scalars"]
kwargs.update(self._sample_kwargs(opinfo, args[-1], ForeachRightmostArgType.TensorList, dtype))
yield ForeachSampleInput(input, *args, **kwargs)
def __call__(self, opinfo, device, dtype, requires_grad, **kwargs):
num_input_tensors_specified = "num_input_tensors" in kwargs
num_input_tensors = kwargs.pop("num_input_tensors") if num_input_tensors_specified else foreach_num_tensors
assert isinstance(num_input_tensors, list)
_foreach_inputs_kwargs = {k: kwargs.pop(k, v) for k, v in _foreach_inputs_default_kwargs.items()}
_foreach_inputs_kwargs["requires_grad"] = requires_grad
allow_higher_dtype_scalars = kwargs.pop("allow_higher_dtype_scalars", False)
for num_tensors, rightmost_arg_type, intersperse_empty_tensors in itertools.product(
num_input_tensors, self._rightmost_arg_types, (True, False)):
_foreach_inputs_kwargs["intersperse_empty_tensors"] = intersperse_empty_tensors
input = sample_inputs_foreach(None, device, dtype, num_tensors, zero_size=False, **_foreach_inputs_kwargs)
args = [
sample_inputs_foreach(None, device, dtype, num_tensors, zero_size=False, **_foreach_inputs_kwargs)
for _ in range(2 - int(rightmost_arg_type == ForeachRightmostArgType.TensorList))
]
rightmost_arg_list = self._sample_rightmost_arg(
opinfo,
rightmost_arg_type,
device,
dtype,
num_tensors,
zero_size=False,
allow_higher_dtype_scalars=False if intersperse_empty_tensors else allow_higher_dtype_scalars,
**_foreach_inputs_kwargs,
)
for rightmost_arg in rightmost_arg_list:
kwargs = {}
if rightmost_arg_type == ForeachRightmostArgType.TensorList:
args.append(rightmost_arg)
elif rightmost_arg_type in [ForeachRightmostArgType.Tensor, ForeachRightmostArgType.ScalarList]:
kwargs["scalars"] = rightmost_arg
else:
kwargs["value"] = rightmost_arg
kwargs.update(self._sample_kwargs(opinfo, rightmost_arg, rightmost_arg_type, dtype))
assert len(args) == 2, f"{len(args)=}"
sample = ForeachSampleInput(input, *args, **kwargs)
yield sample
if rightmost_arg_type == ForeachRightmostArgType.TensorList:
args.pop()
foreach_unary_op_db: list[OpInfo] = [
ForeachFuncInfo(
'exp',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32),
backward_requires_result=True,
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
),
),
ForeachFuncInfo(
'acos',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
),
),
ForeachFuncInfo(
'asin',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
),
),
ForeachFuncInfo(
'atan',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
),
),
ForeachFuncInfo(
'cos',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
),
),
ForeachFuncInfo(
'cosh',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
),
),
ForeachFuncInfo(
'log',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
),
),
ForeachFuncInfo(
'log10',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
),
),
ForeachFuncInfo(
'log2',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
),
),
ForeachFuncInfo(
'tan',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
backward_requires_result=True,
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
# due to https://github.com/pytorch/pytorch/pull/102427 enabling jiterator for complex
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
DecorateInfo(
toleranceOverride(
{
torch.complex64: tol(atol=3e-04, rtol=2e-05)
}
),
'TestForeach',
'test_parity',
device_type='cuda'
),
),
),
ForeachFuncInfo(
'tanh',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
backward_requires_result=True,
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
DecorateInfo(
toleranceOverride(
{torch.complex64: tol(atol=5e-03, rtol=1e-04)}
),
'TestForeach',
'test_parity',
device_type='cuda'
),
),
),
ForeachFuncInfo(
'sin',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=integral_types_and(torch.bool,),
),
),
),
ForeachFuncInfo(
'sinh',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=integral_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=integral_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=integral_types_and(torch.bool),
),
),
),
ForeachFuncInfo(
'neg',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_outplace",
dtypes=(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_outplace",
dtypes=(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_outplace",
dtypes=(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestForeach",
"test_unary_op_tensors_on_different_devices",
device_type="cuda",
dtypes=(torch.bool,),
),
),
),
ForeachFuncInfo(
'sqrt',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
backward_requires_result=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=integral_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=integral_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=integral_types_and(torch.bool),
),
),
),
ForeachFuncInfo(
'rsqrt',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
backward_requires_result=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=integral_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=integral_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=integral_types_and(torch.bool),
),
),
),
ForeachFuncInfo(
'ceil',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32, torch.int8),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=complex_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_outplace",
dtypes=complex_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=complex_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_outplace",
dtypes=complex_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=complex_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_outplace",
dtypes=complex_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestForeach",
"test_autodiff",
device_type="cuda",
dtypes=(torch.complex128,),
),
),
),
ForeachFuncInfo(
'erf',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=integral_types_and(torch.bool) + complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_outplace",
dtypes=complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=integral_types_and(torch.bool) + complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_outplace",
dtypes=complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=integral_types_and(torch.bool) + complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_outplace",
dtypes=complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestForeach",
"test_autodiff",
device_type="cuda",
dtypes=(torch.complex128,),
),
),
),
ForeachFuncInfo(
'erfc',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=integral_types_and(torch.bool) + complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_outplace",
dtypes=complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=integral_types_and(torch.bool) + complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_outplace",
dtypes=complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=integral_types_and(torch.bool) + complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_outplace",
dtypes=complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestForeach",
"test_autodiff",
device_type="cuda",
dtypes=(torch.complex128,),
),
),
),
ForeachFuncInfo(
'expm1',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
backward_requires_result=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=integral_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=integral_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=integral_types_and(torch.bool),
),
),
),
ForeachFuncInfo(
'floor',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=complex_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_outplace",
dtypes=complex_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=complex_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_outplace",
dtypes=complex_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=complex_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_outplace",
dtypes=complex_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestForeach",
"test_autodiff",
device_type="cuda",
dtypes=(torch.complex128,),
),
),
),
ForeachFuncInfo(
'log1p',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=integral_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=integral_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=integral_types_and(torch.bool),
),
),
),
ForeachFuncInfo(
'round',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=complex_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_outplace",
dtypes=complex_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=complex_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_outplace",
dtypes=complex_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=complex_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_outplace",
dtypes=complex_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestForeach",
"test_autodiff",
device_type="cuda",
dtypes=(torch.complex128,),
),
),
),
ForeachFuncInfo(
'frac',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=integral_types_and(torch.bool) + complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_outplace",
dtypes=integral_types_and(torch.bool) + complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=integral_types_and(torch.bool) + complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_outplace",
dtypes=integral_types_and(torch.bool) + complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=integral_types_and(torch.bool) + complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_outplace",
dtypes=integral_types_and(torch.bool) + complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestForeach",
"test_autodiff",
device_type="cuda",
dtypes=(torch.complex128,),
),
),
),
ForeachFuncInfo(
'reciprocal',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
backward_requires_result=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=integral_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=integral_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=integral_types_and(torch.bool),
),
),
),
ForeachFuncInfo(
'sigmoid',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.float16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
backward_requires_result=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=integral_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=integral_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=integral_types_and(torch.bool),
),
),
),
ForeachFuncInfo(
'trunc',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32, torch.int8),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=complex_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_outplace",
dtypes=complex_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=complex_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_outplace",
dtypes=complex_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=complex_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_outplace",
dtypes=complex_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestForeach",
"test_autodiff",
device_type="cuda",
dtypes=(torch.complex128,),
),
),
),
ForeachFuncInfo(
'abs',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_outplace",
device_type="cpu",
dtypes=(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
device_type="cpu",
dtypes=(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_outplace",
device_type="cpu",
dtypes=(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
device_type="cpu",
dtypes=(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_outplace",
device_type="cpu",
dtypes=(torch.bool,),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
device_type="cpu",
dtypes=(torch.bool,),
),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_meta_inplace", dtypes=complex_types()),
),
),
ForeachFuncInfo(
'zero',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
supports_out=False,
),
ForeachFuncInfo(
'sign',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_outplace",
dtypes=complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_outplace",
dtypes=complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_outplace",
dtypes=complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestForeach",
"test_autodiff",
device_type="cuda",
dtypes=(torch.complex128,),
),
),
),
ForeachFuncInfo(
'lgamma',
sample_inputs_func=foreach_inputs_sample_func(1, False, False),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(unittest.skip("In-place lgamma not supported for integral tensors"), "TestMeta",
"test_dispatch_symbolic_meta_inplace", dtypes=integral_types_and(torch.bool)),
# DecorateInfo(unittest.skip("In-place lgamma not supported for integral tensors"), "TestMeta",
# "test_dispatch_meta_inplace", dtypes=integral_types_and(torch.bool)),
DecorateInfo(unittest.skip("In-place lgamma not supported for integral tensors"), "TestMeta",
"test_meta_inplace", dtypes=integral_types_and(torch.bool)),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=complex_types() + integral_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_outplace",
dtypes=complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=complex_types() + integral_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_outplace",
dtypes=complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=complex_types() + integral_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_outplace",
dtypes=complex_types(),
),
DecorateInfo(
unittest.expectedFailure,
"TestForeach",
"test_autodiff",
device_type="cuda",
dtypes=(torch.complex128,),
),
),
),
]
foreach_binary_op_db: list[OpInfo] = [
ForeachFuncInfo(
"add",
sample_inputs_func=foreach_inputs_sample_func(2, True, True, True),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.float16, torch.int32),
supports_alpha_param=True,
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
# These tests fail with aten._local_scalar_dense not being implemented.
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_meta_outplace"),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_meta_inplace",
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16)),
# Samples have complex types and inplace only works if the dtype is complex.
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_meta_inplace",
dtypes=(torch.bool,)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_inplace",
dtypes=(torch.bool,)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_inplace_all_strides",
dtypes=integral_types() + complex_types_and(torch.bool, torch.bfloat16, torch.float16, torch.float64)),
),
),
ForeachFuncInfo(
"sub",
sample_inputs_func=foreach_inputs_sample_func(2, True, True),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_alpha_param=True,
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_meta_inplace"),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_inplace"),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_meta_inplace"),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_meta_outplace"),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_outplace"),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_meta_outplace"),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_inplace_all_strides"),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_outplace_all_strides"),
DecorateInfo(unittest.skip("consistently fails internally and causes other tests to appear flaky"),
"TestForeach", "test_parity", dtypes=(torch.complex128,),
active_if=lambda kwargs: IS_FBCODE and not kwargs["noncontiguous"]),
),
),
ForeachFuncInfo(
"mul",
sample_inputs_func=foreach_inputs_sample_func(2, True, True, True),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
# Samples have complex types and inplace only works if the dtype is complex.
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_meta_inplace",
dtypes=(torch.bool,)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_inplace",
dtypes=(torch.bool,)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_meta_inplace", dtypes=(torch.bool,)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_inplace_all_strides",
dtypes=(torch.bool,)),
DecorateInfo(unittest.skip("consistently fails internally and causes other tests to appear flaky"),
"TestForeach", "test_parity", dtypes=(torch.complex128,),
active_if=lambda kwargs: IS_FBCODE and not kwargs["noncontiguous"]),
),
),
ForeachFuncInfo(
"div",
sample_inputs_func=foreach_inputs_sample_func(2, True, True, True),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.float16, torch.int32, torch.int8),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
# Samples have complex types and inplace only works if the dtype is complex.
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_meta_inplace",
dtypes=integral_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_inplace",
dtypes=integral_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_meta_inplace",
dtypes=integral_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_inplace_all_strides",
dtypes=integral_types_and(torch.bool)),
),
),
ForeachFuncInfo(
"clamp_min",
sample_inputs_func=foreach_inputs_sample_func(2, True, True),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.float16, torch.int64, torch.int32, torch.int8, torch.bool),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_meta_inplace",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_inplace",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_meta_inplace",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_meta_outplace",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_outplace",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_meta_outplace",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_inplace_all_strides",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_outplace_all_strides",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(
unittest.expectedFailure,
"TestForeach",
"test_autodiff",
device_type="cuda",
dtypes=(torch.complex128,),
),
DecorateInfo(
unittest.expectedFailure,
"TestForeach",
"test_binary_op_scalar_with_overlapping_tensors",
dtypes=complex_types(),
),
),
),
ForeachFuncInfo(
"clamp_max",
sample_inputs_func=foreach_inputs_sample_func(2, True, True),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.float16, torch.int64, torch.int32, torch.int8, torch.bool),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_meta_inplace",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_inplace",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_meta_inplace",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_meta_outplace",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_outplace",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_meta_outplace",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_inplace_all_strides",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_outplace_all_strides",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(
unittest.expectedFailure,
"TestForeach",
"test_autodiff",
device_type="cuda",
dtypes=(torch.complex128,),
),
DecorateInfo(
unittest.expectedFailure,
"TestForeach",
"test_binary_op_scalar_with_overlapping_tensors",
dtypes=complex_types(),
),
),
),
# note(crcrpar): forward ad not implemented.
ForeachFuncInfo(
"minimum",
sample_inputs_func=foreach_inputs_sample_func(2, True, True),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32),
supports_autograd=True,
supports_inplace_autograd=False,
supports_forward_ad=False,
decorators=(
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_meta_inplace",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_inplace",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_meta_inplace",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_meta_outplace",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_outplace",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_meta_outplace",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_inplace_all_strides",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_outplace_all_strides",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(
unittest.expectedFailure,
"TestForeach",
"test_autodiff",
device_type="cuda",
dtypes=(torch.complex128,),
),
DecorateInfo(
unittest.expectedFailure,
"TestForeach",
"test_binary_op_scalar_with_overlapping_tensors",
dtypes=complex_types(),
),
),
),
# note(crcrpar): forward ad not implemented.
ForeachFuncInfo(
"maximum",
sample_inputs_func=foreach_inputs_sample_func(2, True, True),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32),
supports_autograd=True,
supports_forward_ad=False,
supports_inplace_autograd=False,
decorators=(
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_meta_inplace",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_inplace",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_meta_inplace",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_meta_outplace",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_outplace",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_meta_outplace",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_inplace_all_strides",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_outplace_all_strides",
dtypes=complex_types_and(torch.bool)),
DecorateInfo(
unittest.expectedFailure,
"TestForeach",
"test_autodiff",
device_type="cuda",
dtypes=(torch.complex128,),
),
DecorateInfo(
unittest.expectedFailure,
"TestForeach",
"test_binary_op_scalar_with_overlapping_tensors",
dtypes=complex_types(),
),
),
),
ForeachFuncInfo(
"pow",
supports_alpha_param=False,
supports_scalar_self_arg=True,
sample_inputs_func=foreach_inputs_sample_func(2, True, True),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.float16, torch.int32, torch.int8, torch.bool),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_meta_inplace", dtypes=(torch.bool,)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_inplace",
dtypes=(torch.bool,)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_meta_inplace", dtypes=(torch.bool,)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_meta_outplace", dtypes=(torch.bool,)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_meta_outplace",
dtypes=(torch.bool,)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_outplace",
dtypes=(torch.bool,)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_meta_outplace",
dtypes=(torch.bool,)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_outplace",
dtypes=(torch.bool,),),
DecorateInfo(unittest.skip("flaky"), "TestForeach", "test_parity", device_type="cpu", dtypes=(torch.complex64,)),
DecorateInfo(
unittest.skip("failed starting on ROCm 6.2"),
"TestForeach",
"test_parity",
device_type="cuda",
dtypes=(torch.complex64,),
active_if=TEST_WITH_ROCM),
DecorateInfo(
unittest.expectedFailure,
"TestForeach",
"test_binary_op_with_scalar_self_support",
device_type="cuda",
dtypes=(torch.bool,),
active_if=lambda kwargs: kwargs["is_fastpath"],
),
),
backward_requires_result=True,
),
ForeachFuncInfo(
"copy",
sample_inputs_func=foreach_inputs_sample_func(2, False, False),
supports_out=False,
supports_forward_ad=False,
supports_autograd=False,
supports_inplace_autograd=False,
)
]
foreach_pointwise_op_db: list[ForeachFuncInfo] = [
ForeachFuncInfo(
"addcmul",
sample_inputs_func=foreach_pointwise_sample_func(4, True, True),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_meta_outplace", dtypes=(torch.bool,)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_outplace",
dtypes=(torch.bool,)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_meta_outplace", dtypes=(torch.bool,)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_outplace_all_strides",
dtypes=(torch.bool,)),
# # Samples have complex types and inplace only works if the dtype is complex.
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_meta_inplace", dtypes=(torch.bool,)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_inplace",
dtypes=(torch.bool,)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_meta_inplace", dtypes=(torch.bool,)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_inplace_all_strides",
dtypes=integral_types() + complex_types_and(torch.bool)),
),
),
ForeachFuncInfo(
"addcdiv",
sample_inputs_func=foreach_pointwise_sample_func(4, True, True),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
# Samples have complex types and inplace only works if the dtype is complex.
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_meta_inplace",
dtypes=integral_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_inplace",
dtypes=integral_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_meta_inplace",
dtypes=integral_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_inplace_all_strides",
dtypes=integral_types() + complex_types_and(torch.bool)),
# fails with div_cpu is not implemented with ComplexHalf
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_meta_outplace",
dtypes=integral_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_outplace",
dtypes=integral_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_meta_outplace",
dtypes=integral_types_and(torch.bool)),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_outplace_all_strides",
dtypes=integral_types() + complex_types_and(torch.bool)),
),
),
]
foreach_reduce_op_db: list[ForeachFuncInfo] = [
ForeachFuncInfo(
"max",
sample_inputs_func=foreach_max_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
# no complex support for ordering ops like max
DecorateInfo(
unittest.expectedFailure,
"TestForeach",
"test_autodiff",
dtypes=(torch.complex128, torch.complex64),
),
DecorateInfo(
unittest.expectedFailure,
"TestForeach",
"test_foreach_reduce_large_input",
dtypes=(torch.complex128, torch.complex64),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_outplace",
dtypes=(torch.complex128, torch.complex64),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_outplace",
dtypes=(torch.complex128, torch.complex64),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_outplace",
dtypes=(torch.complex128, torch.complex64),
),
),
),
ForeachFuncInfo(
"norm",
sample_inputs_func=foreach_norm_sample_func(1, False, False),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_meta_inplace"),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_outplace",
dtypes=integral_types_and(torch.bool),
),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_inplace"),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_outplace",
dtypes=integral_types_and(torch.bool),
),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_meta_inplace"),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_outplace",
dtypes=integral_types_and(torch.bool),
),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_inplace_all_strides"),
DecorateInfo(
unittest.expectedFailure,
"TestForeach",
"test_foreach_reduce_large_input",
device_type="cuda",
dtypes=integral_types_and(torch.bool),
),
),
),
]
foreach_other_op_db: list[ForeachFuncInfo] = [
ForeachFuncInfo(
"lerp",
sample_inputs_func=foreach_inputs_sample_func(3, True, True),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_autograd=True,
supports_inplace_autograd=True,
supports_forward_ad=True,
decorators=(
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_inplace",
dtypes=integral_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_meta_outplace",
dtypes=integral_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_outplace",
dtypes=integral_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace",
dtypes=integral_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_inplace",
dtypes=integral_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_meta_outplace",
dtypes=integral_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_inplace_all_strides",
dtypes=integral_types_and(torch.bool),
),
DecorateInfo(
unittest.expectedFailure,
"TestMeta",
"test_dispatch_symbolic_meta_outplace_all_strides",
dtypes=integral_types_and(torch.bool),
),
),
),
]
def reference_sign(x):
if x.dtype == np.bool_:
# `np.sign` doesn't support `bool`.
# >>> np.sign(True)
# ufunc 'sign' did not contain a loop
# with signature matching types dtype('bool') -> dtype('bool')
return np.sign(x, dtype=np.uint8).astype(np.bool_)
return np.sign(x)
def reference_sgn(x):
# NumPy doesn't have an equivalent to `torch.sgn` when the dtype is complex.
# For complex inputs, `np.sign` returns sign(x.real) + 0j if x.real != 0 else sign(x.imag) + 0j.
# while `torch.sgn` returns, 0 if abs(input) == 0 else input/abs(input)
if x.dtype not in [np.complex64, np.complex128]:
return reference_sign(x)
out = (x / np.abs(x))
if out.ndim == 0:
# Handle x == 0 case
if (x == 0):
# Can't assign to np.complex object
# So make a new one.
return np.array(complex(0, 0), dtype=x.dtype)
return out
# Handle x == 0 case
mask = (x == 0)
out[mask] = complex(0, 0)
return out
def reference_sigmoid(x):
# 'scipy.special.expit' not supported for the input types
if x.dtype in [np.complex64, np.complex128]:
return (1 / (1 + np.exp(-x)))
return scipy.special.expit(x)
def reference_logsigmoid(x):
return np.where(
x < 0,
x - np.log1p(np.exp(x)),
-np.log1p(np.exp(-x)))
def reference_hardsigmoid(x):
intermediate = x / 6 + 0.5
y = np.clip(intermediate, 0, None)
return np.where(y > 1, 1, y).astype(x.dtype)
def reference_lgamma(x):
# scipy.special.gammaln returns `-inf` when input is `-inf`.
# While Pytorch, C and C++, all return `inf` when input is `-inf`.
# Reference:
# https://en.cppreference.com/w/cpp/numeric/math/lgamma
# https://en.cppreference.com/w/c/numeric/math/lgamma
# To handle the above discrepancy,
# we replace -inf with inf so values
# that were originally -inf map to inf as expected
if x.dtype.kind == 'f':
x = np.where(x == float('-inf'), np.array(float('inf'), dtype=x.dtype), x)
out = scipy.special.gammaln(x)
if x.dtype == np.float16:
# `scipy.special.gammaln` returns output of float32 when input is float16,
# while `torch.lgamma` preserves `float16`. But due to smaller range of float16,
# Pytorch version outputs `inf` while SciPy returns finite values.
out = out.astype(np.float16)
return out
def reference_mvlgamma(x, d):
if x.dtype == np.float16:
return scipy.special.multigammaln(x, d).astype(np.float16)
return scipy.special.multigammaln(x, d)
def reference_softplus(input, beta=1, threshold=20):
non_linear = input * beta <= threshold
output = input.copy()
output[non_linear] = np.log(1 + np.exp(beta * input[non_linear])) / beta
return output
def reference_gelu(X, *, approximate='none'):
def _gelu_ref(X):
return X * stats.norm.cdf(X)
def _tanh_gelu_ref(X):
M_SQRT_2_PI = math.sqrt(2 / math.pi)
Z = M_SQRT_2_PI * (X + 0.044715 * np.power(X, 3.0))
return 0.5 * X * (1.0 + np.tanh(Z))
if approximate == 'tanh':
return _tanh_gelu_ref(X)
else:
return _gelu_ref(X)
def reference_one_hot(a: npt.NDArray, num_classes: int = -1) -> npt.NDArray:
if num_classes == -1:
num_classes = int(np.amax(a) + 1)
idcs = a.reshape(-1) + np.arange(0, a.size, dtype=np.int64) * num_classes
one_hot = np.zeros((a.size, num_classes), dtype=a.dtype)
np.put(one_hot, idcs, 1)
return one_hot.reshape(*a.shape, -1)
def reference_mse_loss(input, target, reduction="mean"):
se = (input - target) ** 2
if reduction == "mean":
return np.mean(se)
elif reduction == "sum":
return np.sum(se)
else: # reduction == "none"
return se
def reference_layer_norm(inp: npt.NDArray, normalized_shape: tuple[int], weight=None, bias=None, eps=1e-5):
return reference_native_layer_norm(inp, normalized_shape, weight, bias, eps)[0]
def reference_native_layer_norm(inp: npt.NDArray, normalized_shape: tuple[int], weight, bias, eps):
feature_size = np.prod(normalized_shape)
inp_view = inp.reshape(-1, feature_size) # type: ignore[call-overload]
mean = inp_view.mean(axis=-1, keepdims=True)
var = inp_view.var(axis=-1, ddof=0, keepdims=True)
Y = (inp_view - mean) / np.sqrt(var + eps)
if weight is None and bias is not None:
Y = Y + bias.reshape(-1)
elif weight is not None and bias is None:
Y = Y * weight.reshape(-1)
elif weight is not None and bias is not None:
Y = Y * weight.reshape(-1) + bias.reshape(-1)
axis = inp.ndim - len(normalized_shape)
stat_shape = inp.shape[:axis] + (1,) * len(normalized_shape)
return Y.reshape(*inp.shape), mean.reshape(stat_shape), (1.0 / np.sqrt(var + eps)).reshape(stat_shape)
def reference_rms_norm(inp: npt.NDArray, normalized_shape: tuple[int], weight=None, eps=None):
if eps is None:
eps = torch.finfo(numpy_to_torch_dtype(inp.dtype)).eps
feature_size = np.prod(normalized_shape)
inp_view = inp.reshape(-1, feature_size) # type: ignore[call-overload]
rms = np.sqrt((inp_view**2).mean(axis=-1, keepdims=True) + eps)
Y = inp_view / rms
if weight is not None:
Y = Y * weight.reshape(-1)
return Y.reshape(*inp.shape)
def reference_group_norm(inp: npt.NDArray, num_groups: int, weight=None, bias=None, eps=1e-5):
inp_view = inp
if np.prod(inp.shape) != 0:
inp_view = inp.reshape((inp.shape[0], num_groups, -1))
mean = inp_view.mean(axis=-1, keepdims=True)
var = inp_view.var(axis=-1, ddof=0, keepdims=True)
Y = (inp_view - mean) / np.sqrt(var + eps)
Y = Y.reshape(inp.shape)
if weight is not None:
# weight is a vector of length equal to the channel
if len(Y.shape) > 2:
weight = np.expand_dims(weight, [0] + [idx + 2 for idx in range(inp.ndim - 2)])
Y = Y * weight
if bias is not None:
# bias is a vector of length equal to the channel
if len(Y.shape) > 2:
bias = np.expand_dims(bias, [0] + [idx + 2 for idx in range(inp.ndim - 2)])
Y = Y + bias
return Y
# using a custom reference function since numpy only has a string side arg (instead of right and side) and doesn't
# have an out_int32 arg. Additionally, numpy doesn't support searchsorted with ND arrays, so this splits those into
# stacked 1D cases
def reference_searchsorted(sorted_sequence, boundary, out_int32=False, right=False, side='left', sorter=None):
side = 'right' if (right or side == 'right') else 'left'
if len(sorted_sequence.shape) == 1 :
ret = np.searchsorted(sorted_sequence, boundary, side=side, sorter=sorter)
return ret.astype(np.int32) if out_int32 else ret
elif sorted_sequence.shape[0] == 0:
if sorter is not None:
sorter = sorter.flatten()
ret = np.searchsorted(sorted_sequence.flatten(), boundary.flatten(), side=side, sorter=sorter)
ret = ret.astype(np.int32) if out_int32 else ret
return ret.reshape(boundary.shape)
else:
# numpy searchsorted only supports 1D inputs so we split up ND inputs
orig_shape = boundary.shape
num_splits = np.prod(sorted_sequence.shape[:-1])
splits = range(0, num_splits)
sorted_sequence, boundary = sorted_sequence.reshape(num_splits, -1), boundary.reshape(num_splits, -1)
if sorter is not None:
sorter = sorter.reshape(num_splits, -1)
split_sequence = [sorted_sequence[i] for i in splits]
split_boundary = [boundary[i] for i in splits]
split_sorter = [sorter[i] if (sorter is not None) else None for i in splits]
split_ret = [np.searchsorted(s_seq, b, side=side, sorter=s_sort)
for (s_seq, b, s_sort) in zip(split_sequence, split_boundary, split_sorter)]
split_ret = [i.astype(np.int32) for i in split_ret] if out_int32 else split_ret
return np.stack(split_ret).reshape(orig_shape)
def loss_reference_reduction_wrapper(fn):
def wrapper(input, target, *, size_average=None, reduce=None, reduction="mean", **other_kwargs):
if size_average is not None or reduce is not None:
raise RuntimeError(
"The keyword arguments 'size_average' and 'reduce' are deprecated and not supported by this wrapper"
)
output = fn(input, target, **other_kwargs)
if reduction == "mean":
return np.mean(output)
elif reduction == "sum":
return np.sum(output)
else: # reduction == "none"
return output
return wrapper
@loss_reference_reduction_wrapper
def reference_smooth_l1_loss(input, target, beta=1.0):
diff = input - target
abs_diff = np.abs(diff)
above_threshold = abs_diff >= beta
loss = np.empty_like(input)
loss[above_threshold] = abs_diff[above_threshold] - 0.5 * beta
loss[~above_threshold] = diff[~above_threshold] ** 2 / (2 * beta)
return loss
def reference_std_var(f):
"""Forwards unbiased/correction kwargs as NumPy's equivalent ddof"""
g = reference_reduction_numpy(f)
@wraps(g)
def wrapper(x: npt.NDArray, *args, **kwargs):
assert not ('unbiased' in kwargs and 'correction' in kwargs)
if 'unbiased' in kwargs:
kwargs['ddof'] = int(kwargs.pop('unbiased'))
elif 'correction' in kwargs:
kwargs['ddof'] = kwargs.pop('correction')
return g(x, *args, **kwargs)
return wrapper
def generate_std_var_kwargs(t: torch.Tensor, **kwargs):
"""Generates unbiased/correction kwargs for std/var operators"""
yield ((), {'unbiased': True})
yield ((), {'unbiased': False})
# Currently, calling std with correction is only enabled when
# both dim and keepdim are provided.
if 'dim' in kwargs and 'keepdim' in kwargs:
yield ((), {'correction': 0})
yield ((), {'correction': 1})
numel = torch.tensor(t.shape)[kwargs.get('dim')].prod()
yield ((), {'correction': numel // 2})
def error_inputs_mean(op_info, device, is_ref=False, **kwargs):
if is_ref:
err_msg1 = (r"mean\(\): could not infer output dtype. "
r"Input dtype must be either a floating point or complex dtype. "
r"Got: torch.int64")
else:
err_msg1 = (r"mean\(\): could not infer output dtype. "
r"Input dtype must be either a floating point or complex dtype. "
r"Got: Long")
yield ErrorInput(
SampleInput(make_tensor((3, 4, 5), dtype=torch.int64, device=device), []),
error_regex=err_msg1,
)
if is_ref:
err_msg2 = (r"mean\(\): could not infer output dtype. "
r"Optional dtype must be either a floating point or complex dtype. "
r"Got: torch.int64")
else:
err_msg2 = (r"mean\(\): could not infer output dtype. "
r"Optional dtype must be either a floating point or complex dtype. "
r"Got: Long")
yield ErrorInput(
SampleInput(
make_tensor((3, 4, 5), dtype=torch.float32, device=device),
[],
dtype=torch.int64),
error_regex=err_msg2
)
# numpy implementation of torch.flatten
# unfortunately there's no np.flatten. we figure out the desired shape and call np.reshape
def reference_flatten(input, start_dim=0, end_dim=-1):
in_shape = input.shape
in_rank = len(in_shape)
for d in start_dim, end_dim:
if not ((in_rank == 0 and d in (-1, 0)) or -in_rank <= d < in_rank):
raise IndexError(f"Dimension out of range (expected to be in range of [{-in_rank}, {in_rank - 1}], but got {d}")
end_dim = end_dim if end_dim >= 0 else in_rank + end_dim
start_dim = start_dim if start_dim >= 0 else in_rank + start_dim
if in_rank == 0:
end_dim = start_dim
if end_dim < start_dim:
raise RuntimeError("flatten() has invalid args: start_dim cannot come after end_dim")
flatten_bit_dim = functools.reduce(operator.mul, in_shape[start_dim:end_dim + 1], 1)
out_shape = in_shape[:start_dim] + (flatten_bit_dim,) + in_shape[end_dim + 1:]
return np.reshape(input, out_shape)
def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs):
yield SampleInput(make_tensor((S,), dtype=dtype, device=device, requires_grad=requires_grad))
yield SampleInput(make_tensor((), dtype=dtype, device=device, requires_grad=requires_grad))
# Operator database (sorted alphabetically)
op_db: list[OpInfo] = [
UnaryUfuncInfo('abs',
aliases=('absolute', ),
ref=np.abs,
dtypes=all_types_and_complex_and(torch.half, torch.bfloat16, torch.chalf),
dtypesIfCUDA=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
skips=(
DecorateInfo(unittest.skip("In-place abs not supported for complex tensors"), 'TestBwdGradients',
'test_inplace_grad', dtypes=(torch.cdouble,)),
DecorateInfo(unittest.skip("In-place abs not supported for complex tensors"), 'TestBwdGradients',
'test_inplace_gradgrad', dtypes=(torch.cdouble,)),
DecorateInfo(unittest.skip("In-place abs not supported for complex tensors"), 'TestFwdGradients',
'test_inplace_forward_mode_AD', dtypes=(torch.cdouble,)),
DecorateInfo(unittest.skip("In-place abs not supported for complex tensors"), "TestSparseUnaryUfuncs",
"test_inplace", dtypes=(torch.cdouble, torch.cfloat, torch.chalf)),
# Reference: https://github.com/pytorch/pytorch/issues/49224
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_small',
dtypes=[torch.int8], active_if=TEST_WITH_ASAN),
# TODO: Fix test_out_arg_all_dtypes as torch.empty_like(expected_output) where expected_output=op(input)
# We can break the logic of the loop over all possible types but it is OK.
# https://github.com/pytorch/pytorch/blob/master/test/test_unary_ufuncs.py#L440-L449
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_out_arg_all_dtypes',
dtypes=[torch.cfloat, torch.cdouble]),
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_meta_inplace',
dtypes=(torch.cdouble, torch.cfloat, torch.chalf)),
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_dispatch_meta_inplace',
dtypes=(torch.cdouble, torch.cfloat, torch.chalf)),
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_dispatch_symbolic_meta_inplace',
dtypes=(torch.cdouble, torch.cfloat, torch.chalf)),
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_dispatch_symbolic_meta_inplace_all_strides',
dtypes=(torch.cdouble, torch.cfloat, torch.chalf)),
),
supports_fwgrad_bwgrad=True,
assert_autodiffed=True,
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
supports_forward_ad=True),
# NOTE: CPU complex acos produces incorrect outputs (https://github.com/pytorch/pytorch/issues/42952)
UnaryUfuncInfo('acos',
aliases=('arccos', ),
ref=np.arccos,
domain=(-1, 1),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfCUDA=all_types_and_complex_and(torch.chalf, torch.bool, torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True,
decorators=(precisionOverride({torch.float16: 1e-2,
torch.bfloat16: 1e-1,
torch.complex64: 1e-2}),),
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_normal',
device_type='cuda', dtypes=[torch.cdouble], active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
device_type='cuda', dtypes=[torch.cdouble], active_if=IS_WINDOWS),
# Failing with wrong imaginary sign on at least some Windows jobs
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_small',
device_type='cuda', dtypes=[torch.cdouble],
active_if=IS_WINDOWS),
# Failing with wrong imaginary sign on at least some Windows jobs
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
device_type='cuda', dtypes=[torch.cdouble],
active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestBwdGradients', 'test_fn_grad',
dtypes=[torch.cdouble], active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestBwdGradients', 'test_method_grad',
dtypes=[torch.cdouble], active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestBwdGradients', 'test_inplace_grad',
dtypes=[torch.cdouble], active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestFwdGradients', 'test_forward_mode_AD',
dtypes=[torch.cdouble], active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestFwdGradients', 'test_inplace_forward_mode_AD',
dtypes=[torch.cdouble], active_if=IS_WINDOWS),)),
# NOTE: the derivative for inplace acosh is not implemented
UnaryUfuncInfo('acosh',
aliases=('arccosh', ),
ref=np.arccosh,
domain=(1, None),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfCUDA=all_types_and_complex_and(torch.chalf, torch.bool, torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
decorators=(precisionOverride({torch.bfloat16: 5e-2}),),
supports_inplace_autograd=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True,
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_normal',
device_type='cuda', dtypes=[torch.cdouble], active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
device_type='cuda', dtypes=[torch.cdouble], active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
device_type='cuda', dtypes=[torch.cdouble],
active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
device_type='cuda', dtypes=[torch.cdouble],
active_if=IS_WINDOWS),
# Failing with wrong imaginary sign on at least some Windows jobs
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_small',
device_type='cuda', dtypes=[torch.cdouble],
active_if=IS_WINDOWS),
),
# acosh is not defined at x < 1 (real)
reference_numerics_filter=NumericsFilter(
condition=lambda x: (x < 1 if not x.is_complex() else torch.zeros_like(x, dtype=torch.bool)),
safe_val=2)),
BinaryUfuncInfo('add',
# NumPy has no builtin reference for the alpha kwarg, but it is easy enough to emulate
ref=lambda input, other, *, alpha=1: np.add(input, other) if alpha == 1 \
else np.add(input, np.multiply(alpha, other)),
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16,
torch.float16, torch.chalf),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32),
assert_autodiffed=True,
sample_inputs_func=sample_inputs_add_sub,
supports_fwgrad_bwgrad=True,
supports_forward_ad=True,
supports_two_python_scalars=True,
decorators=(
DecorateInfo(
toleranceOverride({torch.chalf: tol(atol=1e-2, rtol=0)}),
'TestBinaryUfuncs', 'test_reference_numerics'),
),
skips=(
# boolean alpha not handled properly
DecorateInfo(unittest.expectedFailure,
'TestNNCOpInfo',
'test_nnc_correctness',
dtypes=(torch.bool,)),
DecorateInfo(unittest.skip("Skipped!"),
'TestCommon',
'test_numpy_refs',
dtypes=(torch.complex128,)),
DecorateInfo(unittest.skip("Skipped!"),
'TestBinaryUfuncs',
'test_reference_numerics_extremal_values',
dtypes=(torch.complex64, torch.complex128)),
)),
OpInfo('item',
op=lambda inp, *args, **kwargs: wrapper_set_seed(torch.Tensor.item, inp, *args, **kwargs),
ref=np.ndarray.item,
method_variant=None,
dtypes=all_types_and_complex_and(torch.bfloat16, torch.float16, torch.chalf, torch.bool),
dtypesIfHpu=custom_types(torch.float32),
supports_out=False,
supports_autograd=False,
error_inputs_func=error_inputs_item,
sample_inputs_func=sample_inputs_item,
skips=(
# Error testing item function variant
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit',
dtypes=(torch.float32, torch.complex64)),
# FX failed to normalize op - add the op to the op_skip list.
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# RuntimeError: Composite compliance check failed with the above error.
DecorateInfo(unittest.expectedFailure, 'TestCompositeCompliance', 'test_operator'),
# Booleans mismatch: AssertionError: False is not true
DecorateInfo(unittest.expectedFailure, 'TestFakeTensor', 'test_fake_autocast'),
# Booleans mismatch: AssertionError: False is not true
DecorateInfo(unittest.expectedFailure, 'TestFakeTensor', 'test_fake'),
)),
OpInfo('arange',
dtypes=all_types_and(torch.bfloat16, torch.float16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32, torch.int8),
supports_out=True,
supports_autograd=False,
is_factory_function=True,
error_inputs_func=error_inputs_arange,
sample_inputs_func=sample_inputs_arange,
skips=(
# https://github.com/pytorch/pytorch/issues/81774
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# Tests that assume input is a tensor or sequence of tensors
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_conj_view'),
# Lazy tensor failures
DecorateInfo(unittest.expectedFailure, 'TestLazyOpInfo', 'test_dispatched_to_lazy'),
DecorateInfo(unittest.skip("Skipped!"), 'TestLazyOpInfo', 'test_correctness'),
DecorateInfo(unittest.skip("Skipped!"), 'TestLazyOpInfo', 'test_correctness_with_reusing_ir'),
# Exception raised from analyzeImpl at ../torch/csrc/jit/ir/alias_analysis.cpp:608
# We don't have an op for aten::arange but it isn't a special case.
# Argument types: bool, bool, bool, int, int, Device, boo
DecorateInfo(unittest.expectedFailure, 'TestNNCOpInfo', 'test_nnc_correctness'),
# Captured graph does not contain aten::arange (succeeds on complex!)
# g: graph():
# %25 : Long(1, strides=[1], requires_grad=0, device=cpu) = prim::Constant[value={1}]()
# return (%25)
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit', dtypes=(torch.float32,)),
# UserWarning not triggered : Resized a non-empty tensor but did not warn about it.
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'),
)),
OpInfo('cauchy',
op=lambda inp, *args, **kwargs: wrapper_set_seed(torch.Tensor.cauchy_, inp, *args, **kwargs),
inplace_variant=torch.Tensor.cauchy_,
dtypes=floating_types_and(torch.float16, torch.bfloat16),
supports_out=False,
supports_autograd=False,
allow_cow_input_materialize_forward=[0],
sample_inputs_func=sample_inputs_cauchy,
error_inputs_func=error_inputs_cauchy,
skips=(
# Tests that assume input tensor has a meaningful effect on output tensor
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
# AssertionError: JIT Test does not execute any logic
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# AssertionError: Tensor-likes are not close!
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
# FX failed to normalize op - add the op to the op_skip list.
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# vmap: calling random operator not supported
DecorateInfo(unittest.skip("Test expects tensor input"), "TestVmapOperatorsOpInfo", "test_vmap_exhaustive"),
DecorateInfo(unittest.skip("Test expects tensor input"), "TestVmapOperatorsOpInfo", "test_op_has_batch_rule"),
DecorateInfo(unittest.skip("make_traced() doesn't set seed properly!"), 'TestCommon', 'test_python_ref_executor'),
DecorateInfo(unittest.expectedFailure, 'TestDecomp', 'test_quick'),
)),
OpInfo('exponential',
op=lambda inp, *args, **kwargs: wrapper_set_seed(torch.Tensor.exponential_, inp, *args, **kwargs),
inplace_variant=torch.Tensor.exponential_,
dtypes=floating_types_and(torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_out=False,
supports_autograd=False,
allow_cow_input_materialize_forward=[0],
sample_inputs_func=sample_inputs_exponential,
error_inputs_func=error_inputs_exponential,
skips=(
# Tests that assume input tensor has a meaningful effect on output tensor
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
# AssertionError: JIT Test does not execute any logic
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# AssertionError: Tensor-likes are not close!
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
# FX failed to normalize op - add the op to the op_skip list.
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# vmap: calling random operator not supported
DecorateInfo(unittest.expectedFailure, "TestVmapOperatorsOpInfo", "test_vmap_exhaustive"),
DecorateInfo(unittest.expectedFailure, "TestVmapOperatorsOpInfo", "test_op_has_batch_rule"),
DecorateInfo(unittest.expectedFailure, 'TestDecomp', 'test_quick'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
)),
OpInfo('geometric',
op=lambda inp, *args, **kwargs: wrapper_set_seed(torch.Tensor.geometric_, inp, *args, **kwargs),
inplace_variant=torch.Tensor.geometric_,
dtypes=floating_types_and(torch.float16, torch.bfloat16, torch.int8, torch.int16, torch.int32, torch.int64, torch.uint8),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_out=False,
supports_autograd=False,
allow_cow_input_materialize_forward=[0],
sample_inputs_func=sample_inputs_geometric,
error_inputs_func=error_inputs_geometric,
skips=(
# Tests that assume input tensor has a meaningful effect on output tensor
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
# AssertionError: JIT Test does not execute any logic
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# AssertionError: Tensor-likes are not close!
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
# FX failed to normalize op - add the op to the op_skip list.
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# vmap: calling random operator not supported
DecorateInfo(unittest.skip("Test expects tensor input"), "TestVmapOperatorsOpInfo", "test_vmap_exhaustive"),
DecorateInfo(unittest.skip("Test expects tensor input"), "TestVmapOperatorsOpInfo", "test_op_has_batch_rule"),
DecorateInfo(unittest.expectedFailure, 'TestDecomp', 'test_quick'),
)),
OpInfo('log_normal',
op=lambda inp, *args, **kwargs: wrapper_set_seed(torch.Tensor.log_normal_, inp, *args, **kwargs),
inplace_variant=torch.Tensor.log_normal_,
dtypes=floating_types_and(torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_out=False,
supports_autograd=False,
allow_cow_input_materialize_forward=[0],
sample_inputs_func=sample_inputs_log_normal,
error_inputs_func=error_inputs_log_normal,
skips=(
# Tests that assume input tensor has a meaningful effect on output tensor
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
# AssertionError: JIT Test does not execute any logic
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# AssertionError: Tensor-likes are not close!
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
# FX failed to normalize op - add the op to the op_skip list.
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# vmap: calling random operator not supported
DecorateInfo(unittest.skip("Test expects tensor input"), "TestVmapOperatorsOpInfo", "test_vmap_exhaustive"),
DecorateInfo(unittest.skip("Test expects tensor input"), "TestVmapOperatorsOpInfo", "test_op_has_batch_rule"),
DecorateInfo(unittest.expectedFailure, 'TestDecomp', 'test_quick'),
)),
OpInfo('normal',
variant_test_name='in_place',
op=lambda inp, *args, **kwargs: wrapper_set_seed(torch.Tensor.normal_, inp, *args, **kwargs),
inplace_variant=torch.Tensor.normal_,
dtypes=floating_and_complex_types_and(torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_out=False,
supports_autograd=False,
allow_cow_input_materialize_forward=[0],
sample_inputs_func=sample_inputs_normal,
error_inputs_func=error_inputs_normal,
skips=(
# Tests that assume input is a tensor or sequence of tensors
DecorateInfo(unittest.skip("Test expects tensor input"), "TestCommon", "test_noncontiguous_samples"),
# Tests that assume input tensor has a meaningful effect on output tensor
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestDecomp', 'test_quick'),
# AssertionError: JIT Test does not execute any logic
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# AssertionError: Tensor-likes are not close!
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
# FX failed to normalize op - add the op to the op_skip list.
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# vmap: calling random operator not supported
DecorateInfo(unittest.skip("Test expects tensor input"), "TestVmapOperatorsOpInfo", "test_vmap_exhaustive"),
DecorateInfo(unittest.skip("Test expects tensor input"), "TestVmapOperatorsOpInfo", "test_op_has_batch_rule"),
)),
OpInfo('uniform',
op=lambda inp, *args, **kwargs: wrapper_set_seed(torch.Tensor.uniform_, inp, *args, **kwargs),
method_variant=None,
inplace_variant=torch.Tensor.uniform_,
dtypes=floating_and_complex_types_and(torch.bfloat16, torch.float16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_out=False,
supports_autograd=False,
is_factory_function=False,
allow_cow_input_materialize_forward=[0],
sample_inputs_func=sample_inputs_uniform,
error_inputs_func=error_inputs_uniform,
skips=(
# FX failed to normalize op - add the op to the op_skip list.
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# Tests that assume input tensor has a meaningful effect on output tensor
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_conj_view'),
# AssertionError: JIT Test does not execute any logic
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# aten.uniform was not decomposed
DecorateInfo(unittest.expectedFailure, 'TestDecomp', 'test_quick'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
)),
BinaryUfuncInfo('clamp_max',
ref=_clamp_max_numpy,
dtypes=all_types_and(torch.bool, torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32, torch.int8),
supports_forward_ad=True,
supports_rhs_python_scalar=False,
supports_fwgrad_bwgrad=True,
rhs_make_tensor_kwargs=dict(exclude_zero=False),
skips=(
# RuntimeError: "max_elementwise_cuda" not implemented for 'ComplexFloat'
DecorateInfo(unittest.expectedFailure,
'TestBinaryUfuncs',
'test_type_promotion',
device_type='cuda'),
# dispatch to lazy test failed
DecorateInfo(unittest.expectedFailure, 'TestLazyOpInfo', 'test_dispatched_to_lazy'),
# test error disabled since rhs non-tensor python scalar is supported
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_errors'),
)),
BinaryUfuncInfo('clamp_min',
ref=_clamp_min_numpy,
dtypes=all_types_and(torch.bool, torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32, torch.int8),
supports_forward_ad=True,
supports_rhs_python_scalar=False,
supports_fwgrad_bwgrad=True,
rhs_make_tensor_kwargs=dict(exclude_zero=False),
skips=(
# RuntimeError: "min_elementwise_cuda" not implemented for 'ComplexFloat'
DecorateInfo(unittest.expectedFailure,
'TestBinaryUfuncs',
'test_type_promotion',
device_type='cuda'),
# dispatch to lazy test failed
DecorateInfo(unittest.expectedFailure, 'TestLazyOpInfo', 'test_dispatched_to_lazy'),
# test error disabled since rhs non-tensor python scalar is supported
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_errors'),
)),
BinaryUfuncInfo('mul',
aliases=('multiply',),
dtypes=all_types_and_complex_and(torch.chalf, torch.float16, torch.bfloat16, torch.bool),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_two_python_scalars=True,
error_inputs_sparse_func=error_inputs_sparse_mul,
sample_inputs_sparse_coo_func=partial(sample_inputs_sparse_mul, layout=torch.sparse_coo),
sample_inputs_sparse_csr_func=partial(sample_inputs_sparse_mul, layout=torch.sparse_csr),
sample_inputs_sparse_csc_func=partial(sample_inputs_sparse_mul, layout=torch.sparse_csc),
sample_inputs_sparse_bsr_func=partial(sample_inputs_sparse_mul, layout=torch.sparse_bsr),
sample_inputs_sparse_bsc_func=partial(sample_inputs_sparse_mul, layout=torch.sparse_bsc)),
BinaryUfuncInfo('sub',
# NumPy has no builtin reference for the alpha kwarg, but it is easy enough to emulate
ref=lambda input, other, *, alpha=1: np.subtract(input, np.multiply(alpha, other)),
aliases=('subtract',),
dtypes=all_types_and_complex_and(torch.bfloat16, torch.float16, torch.chalf),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_add_sub,
supports_two_python_scalars=True,
decorators=(
DecorateInfo(
toleranceOverride({torch.float16: tol(atol=1e-2, rtol=0),
torch.bfloat16: tol(atol=1e-5, rtol=5e-3),
torch.complex32: tol(atol=1e-5, rtol=1e-3)}),
'TestBinaryUfuncs', 'test_reference_numerics'),
DecorateInfo(
toleranceOverride({torch.chalf: tol(atol=1e-2, rtol=0)}),
'TestCommon', 'test_complex_half_reference_testing', device_type='cpu'),
DecorateInfo(
toleranceOverride({torch.chalf: tol(atol=5e-3, rtol=0)}),
'TestDecomp', 'test_comprehensive', device_type='cpu'),
DecorateInfo(
toleranceOverride({torch.chalf: tol(atol=5e-3, rtol=0)}),
'TestDecomp', 'test_quick', device_type='cpu'),
),
skips=(
DecorateInfo(unittest.skip("Skipped!"),
'TestBinaryUfuncs',
'test_reference_numerics',
dtypes=(torch.uint8,)),
DecorateInfo(unittest.skip("Skipped!"),
'TestBinaryUfuncs',
'test_reference_numerics_small_values',
dtypes=(torch.uint8,)),
)),
OpInfo('addmm',
# This addmm OpInfo is for when alpha and beta are not both equal to 1.
# alpha=beta=1 is tested in the following opinfo, because that special case will
# trigger addmm being decomposed by a jit pass.
dtypes=all_types_and_complex_and(torch.float16, torch.bfloat16),
dtypesIfROCM=floating_and_complex_types_and(torch.float16, torch.bfloat16),
dtypesIfCUDA=floating_and_complex_types_and(torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
sample_inputs_func=sample_inputs_addmm,
skips=(
# Issue with conj and torch dispatch, see https://github.com/pytorch/pytorch/issues/82479
DecorateInfo(
unittest.skip("Skipped!"),
'TestSchemaCheckModeOpInfo',
'test_schema_correctness',
dtypes=(torch.complex64, torch.complex128)),
DecorateInfo(toleranceOverride({torch.float16: tol(atol=1e-3, rtol=2e-3)}),
"TestConsistency", "test_output_grad_match", device_type="mps"),
)),
OpInfo('addmm',
# When alpha=beta=1 as compile-time constants, JIT will decompose addmm into mm and add.
variant_test_name='decomposed',
dtypes=all_types_and_complex_and(torch.float16, torch.bfloat16),
dtypesIfCUDA=floating_and_complex_types_and(torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
autodiff_nonfusible_nodes=['aten::add', 'aten::mm'],
sample_inputs_func=partial(sample_inputs_addmm, alpha=1, beta=1),
skips=(
# Issue with conj and torch dispatch, see https://github.com/pytorch/pytorch/issues/82479
DecorateInfo(
unittest.skip("Skipped!"),
'TestSchemaCheckModeOpInfo',
'test_schema_correctness',
dtypes=(torch.complex64, torch.complex128)),
# https://github.com/pytorch/pytorch/issues/71784
DecorateInfo(unittest.skip('Skipped!'), 'TestNNCOpInfo', 'test_nnc_correctness',
device_type='cpu', dtypes=(torch.float16,)),
)),
OpInfo('addmv',
dtypes=all_types_and_complex_and(torch.bfloat16, torch.float16),
dtypesIfCUDA=floating_types_and(torch.float16, torch.complex64, torch.complex128,
torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
decorators=[
DecorateInfo(
toleranceOverride({torch.half: tol(atol=1e-5, rtol=3e-3)}),
'TestInductorOpInfo', 'test_comprehensive', device_type='cpu'),
DecorateInfo(toleranceOverride({torch.float32: tol(atol=2e-5, rtol=3e-6)}),
"TestConsistency", "test_output_match", device_type="mps"),
DecorateInfo(toleranceOverride({torch.float32: tol(atol=2e-5, rtol=3e-6)}),
"TestConsistency", "test_output_grad_match", device_type="mps"),
],
sample_inputs_func=sample_inputs_addmv),
OpInfo('addbmm',
ref=lambda M, batch1, batch2, beta=1, alpha=1: np.add(np.multiply(np.asarray(beta, dtype=M.dtype), M),
np.multiply(np.asarray(alpha, dtype=batch1.dtype),
np.sum(np.matmul(batch1, batch2), axis=0))),
dtypes=all_types_and_complex_and(torch.bfloat16, torch.float16),
dtypesIfCUDA=floating_and_complex_types_and(torch.float16,
*[torch.bfloat16]
if SM53OrLater or TEST_WITH_ROCM else []),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
decorators=[
DecorateInfo(
toleranceOverride({torch.float32: tol(atol=1.3e-05, rtol=1.3e-05),
torch.complex64: tol(atol=1e-05, rtol=1.2e-03)}),
'TestCommon', 'test_numpy_refs'),
# MPS has slightly worse precision. Is this acceptable?
DecorateInfo(
toleranceOverride({torch.float32: tol(atol=1.3e-04, rtol=1.3e-04),
torch.complex64: tol(atol=1e-05, rtol=1.2e-03)}),
'TestCommon', 'test_numpy_ref_mps'),
DecorateInfo(
toleranceOverride({torch.float32: tol(atol=1e-5, rtol=1e-5),
torch.bfloat16: tol(atol=2e-1, rtol=6e-1)}),
'TestConsistency',
'test_output_match',
),
DecorateInfo(
toleranceOverride({torch.float32: tol(atol=1.5e-05, rtol=1e-05)}),
'TestCommon', 'test_out'),
DecorateInfo(
toleranceOverride({torch.half: tol(atol=6e-3, rtol=1e-2)}),
'TestInductorOpInfo', 'test_comprehensive', device_type='cpu'),
],
skips=(
# NVIDIA only assures that bfloat16 is supported by bmm if SM >= 5.3
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_dtypes', device_type='cuda', active_if=not SM53OrLater),
# addbmm does not correctly warn when resizing out= inputs
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'),
# https://github.com/pytorch/pytorch/issues/55907
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_variant_consistency_eager'),
),
sample_inputs_func=sample_inputs_addbmm),
OpInfo('baddbmm',
dtypes=all_types_and_complex_and(torch.bfloat16, torch.float16),
dtypesIfCUDA=floating_types_and(torch.float16, torch.complex64, torch.complex128,
torch.bfloat16),
backward_dtypesIfCUDA=floating_types_and(torch.float16,
*[torch.bfloat16] if SM53OrLater or TEST_WITH_ROCM else [],
torch.complex64, torch.complex128),
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
decorators=[
DecorateInfo(
toleranceOverride({torch.complex64: tol(atol=1e-05, rtol=1.2e-03)}),
'TestCommon', 'test_variant_consistency_eager', device_type='cuda'),
DecorateInfo(
toleranceOverride({torch.complex64: tol(atol=1e-05, rtol=1.2e-03)}),
'TestMathBits', 'test_conj_view', device_type='cuda'),
],
sample_inputs_func=sample_inputs_baddbmm,
skips=(
# Issue with conj and torch dispatch, see https://github.com/pytorch/pytorch/issues/82479
DecorateInfo(
unittest.skip("Skipped!"),
'TestSchemaCheckModeOpInfo',
'test_schema_correctness',
dtypes=(torch.complex64, torch.complex128)),
)),
OpInfo('dot',
dtypes=all_types_and_complex_and(torch.float16, torch.bfloat16),
dtypesIfCUDA=floating_and_complex_types_and(torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
assert_autodiffed=True,
sample_inputs_func=sample_inputs_dot_vdot,
error_inputs_func=error_inputs_dot_vdot,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# Issue with conj and torch dispatch, see https://github.com/pytorch/pytorch/issues/82479
DecorateInfo(
unittest.skip("Skipped!"),
'TestSchemaCheckModeOpInfo',
'test_schema_correctness',
dtypes=(torch.complex64, torch.complex128)),
)),
OpInfo('vdot',
dtypes=all_types_and_complex_and(torch.float16, torch.bfloat16),
dtypesIfCUDA=floating_and_complex_types_and(torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
sample_inputs_func=sample_inputs_dot_vdot,
error_inputs_func=error_inputs_dot_vdot,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# Issue with conj and torch dispatch, see https://github.com/pytorch/pytorch/issues/82479
DecorateInfo(
unittest.skip("Skipped!"),
'TestSchemaCheckModeOpInfo',
'test_schema_correctness',
dtypes=(torch.complex64, torch.complex128)),
)),
OpInfo('bmm',
dtypes=all_types_and_complex_and(torch.float16, torch.bfloat16),
dtypesIfCUDA=floating_and_complex_types_and(torch.float16,
*[torch.bfloat16]
if SM53OrLater or TEST_WITH_ROCM else []),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
assert_autodiffed=True,
assert_jit_shape_analysis=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# NVIDIA only assures that bfloat16 is supported by bmm if SM >= 5.3
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_dtypes', device_type='cuda', active_if=not SM53OrLater),
DecorateInfo(toleranceOverride({torch.float32: tol(atol=1e-5, rtol=1e-5)}),
"TestCommon", "test_out"),
# Fast math on MacOS-13?
DecorateInfo(
toleranceOverride({torch.float32: tol(atol=2e-5, rtol=5e-6)}),
'TestConsistency',
'test_output_match',
active_if=lambda _: MACOS_VERSION < 14.0,
device_type='mps',
dtypes=(torch.float32,)),
),
sample_inputs_func=sample_inputs_bmm),
OpInfo('mv',
dtypes=all_types_and_complex_and(torch.float16, torch.bfloat16),
dtypesIfCUDA=floating_and_complex_types_and(torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_mv),
OpInfo('addr',
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16),
# Reference: https://github.com/pytorch/pytorch/issues/50747
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# Reference: https://github.com/pytorch/pytorch/issues/50747
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_variant_consistency_eager',
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16)),
),
sample_inputs_func=sample_inputs_addr,
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL),
OpInfo('addcmul',
dtypes=all_types_and_complex_and(torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# TODO: update sample inputs with for_inplace_variant kwarg to support this test
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_variant_consistency_eager'),
),
sample_inputs_func=sample_inputs_addcmul_addcdiv,
reference_inputs_func=partial(
reference_inputs_elementwise_ternary, sample_inputs_func=reference_inputs_addcmul_addcdiv)),
OpInfo('addcdiv',
dtypes=floating_and_complex_types_and(torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# TODO: update sample inputs with for_inplace_variant kwarg to support this test
DecorateInfo(unittest.expectedFailure,
'TestCommon',
'test_variant_consistency_eager'),
),
sample_inputs_func=sample_inputs_addcmul_addcdiv,
reference_inputs_func=partial(
reference_inputs_elementwise_ternary, sample_inputs_func=reference_inputs_addcmul_addcdiv)),
UnaryUfuncInfo('asin',
aliases=('arcsin', ),
ref=np.arcsin,
domain=(-1, 1),
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True,
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfCUDA=all_types_and_complex_and(torch.chalf, torch.bool, torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
assert_autodiffed=True,
decorators=[
DecorateInfo(
toleranceOverride({torch.float16: tol(atol=1e-05, rtol=1e-03)}),
'TestUnaryUfuncs', device_type='cuda'
),
DecorateInfo(
toleranceOverride({torch.float32: tol(atol=8e-5, rtol=4e-5)}),
'TestInductorOpInfo', 'test_comprehensive', device_type='cuda'
),
DecorateInfo(
toleranceOverride({torch.complex64: tol(atol=5e-05, rtol=2e-05)}),
'TestUnaryUfuncs', 'test_reference_numerics_extremal', device_type='cpu'
),
precisionOverride({torch.bfloat16: 1e-2}),
],
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
device_type='cuda', dtypes=[torch.cdouble],
active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
device_type='cuda', dtypes=[torch.cdouble],
active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped! sparse backward not supported"),
'TestSparseUnaryUfuncs', 'test_sparse_fn_grad'),
)),
# NOTE: derivative for inplace asinh is not implemented
UnaryUfuncInfo('asinh',
aliases=('arcsinh', ),
ref=np.arcsinh,
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfCUDA=all_types_and_complex_and(torch.chalf, torch.bool, torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
decorators=(precisionOverride({torch.bfloat16: 5e-2}),),
supports_inplace_autograd=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
promotes_int_to_float=True,
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_small',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_normal',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
device_type='cuda', dtypes=[torch.cdouble],
active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
device_type='cuda', dtypes=[torch.cdouble],
active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped! sparse backward not supported"),
'TestSparseUnaryUfuncs', 'test_sparse_fn_grad'),
)),
UnaryUfuncInfo('atan',
aliases=('arctan', ),
ref=np.arctan,
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfCUDA=all_types_and_complex_and(torch.chalf, torch.bool, torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
promotes_int_to_float=True,
decorators=(precisionOverride({torch.bfloat16: 1e-2}),),
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
dtypes=[torch.cfloat, torch.cdouble], active_if=(IS_MACOS or IS_WINDOWS)),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
dtypes=[torch.cfloat, torch.cdouble], active_if=(IS_MACOS or IS_WINDOWS)),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_small',
dtypes=[torch.cfloat, torch.cdouble], active_if=(IS_MACOS or IS_WINDOWS)),
DecorateInfo(unittest.skip("Skipped! sparse backward not supported"),
'TestSparseUnaryUfuncs', 'test_sparse_fn_grad'),
)),
BinaryUfuncInfo('atan2',
aliases=('arctan2',),
dtypes=all_types_and(torch.bool, torch.bfloat16, torch.half),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True,
supports_rhs_python_scalar=False,
skips=(
# Incorrectly attempts to use a scalar for the second argument
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_jit_alias_remapping'),
)),
UnaryUfuncInfo('atanh',
aliases=('arctanh', ),
ref=np.arctanh,
domain=(-1, 1),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfCUDA=all_types_and_complex_and(torch.chalf, torch.bool, torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
decorators=[
precisionOverride({torch.bfloat16: 1e-2}),
DecorateInfo(
toleranceOverride({torch.float32: tol(atol=9e-3, rtol=8e-5)}),
"TestInductorOpInfo",
"test_comprehensive",
device_type="cuda"
),
DecorateInfo(toleranceOverride({torch.float16: tol(atol=1e-3, rtol=2e-3)}),
"TestConsistency", "test_output_grad_match", device_type="mps"),
],
supports_inplace_autograd=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
promotes_int_to_float=True,
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_small',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
device_type='cuda', dtypes=[torch.cfloat, torch.cdouble],
active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
device_type='cuda', dtypes=[torch.cfloat],
active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped! sparse backward not supported"),
'TestSparseUnaryUfuncs', 'test_sparse_fn_grad'),
)),
OpInfo('allclose',
dtypes=floating_and_complex_types_and(torch.float16, torch.bfloat16),
ref=np.allclose,
supports_autograd=False,
supports_forward_ad=False,
sample_inputs_func=sample_inputs_allclose,
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.skip("Skipped!"), 'TestNNCOpInfo', 'test_nnc_correctness'),
DecorateInfo(unittest.skip("Skipped!"), 'TestCudaFuserOpInfo'),
),
supports_out=False),
OpInfo('broadcast_to',
ref=np.broadcast_to,
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
sample_inputs_func=sample_inputs_broadcast_to),
OpInfo('broadcast_shapes',
op=torch.broadcast_shapes,
ref=np.broadcast_shapes if np.lib.NumpyVersion(np.__version__) >= '1.20.0' else None,
dtypes=_dispatch_dtypes((torch.float32,)),
supports_out=False,
supports_gradgrad=False,
assert_autodiffed=False,
supports_autograd=False,
supports_scripting=False,
sample_inputs_func=sample_inputs_broadcast_shapes,
skips=(
# https://github.com/pytorch/pytorch/issues/64997
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# skip dtype tests since broadcast_shape is not device dependent.
# having dtypes limited to torch.float32 would cause test_dtypes to report unexpected success
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_dtypes'),
# skip these tests since we have non tensor input
DecorateInfo(unittest.skip('Skipped!'), "TestCommon", "test_noncontiguous_samples"),
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.skip('Skipped!'), 'TestJit', 'test_variant_consistency_jit'),
)),
OpInfo('broadcast_tensors',
ref=np.broadcast_arrays,
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
sample_inputs_func=sample_inputs_broadcast_tensors,
reference_inputs_func=reference_inputs_broadcast_tensors,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
skips=(
# https://github.com/pytorch/pytorch/issues/64997
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# JIT does not support variadic tensors.
# RuntimeError: input->type()->kind() == TypeKind::OptionalType
# INTERNAL ASSERT FAILED at "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":252,
# please report a bug to PyTorch.
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit', dtypes=[torch.float32]),
)),
OpInfo('block_diag',
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# Default batching rule in core doesn't work for ops with TensorList args
check_batched_forward_grad=False,
skips=(
# https://github.com/pytorch/pytorch/issues/64997
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# JIT does not support variadic tensors.
# RuntimeError: input->type()->kind() == TypeKind::OptionalType
# INTERNAL ASSERT FAILED at "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":252,
# please report a bug to PyTorch.
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit', dtypes=[torch.float32]),
),
sample_inputs_func=sample_inputs_block_diag),
UnaryUfuncInfo('bitwise_not',
ref=np.bitwise_not,
dtypes=integral_types_and(torch.bool),
dtypesIfHpu=custom_types(torch.bool),
operator_variant=operator.invert,
supports_autograd=False),
BinaryUfuncInfo('bitwise_left_shift',
op=torch.bitwise_left_shift,
dtypes=integral_types(),
dtypesIfCUDA=integral_types(),
dtypesIfHpu=custom_types(torch.int32, torch.int8, torch.bool),
operator_variant=operator.lshift,
inplace_operator_variant=operator.ilshift,
supports_autograd=False,
supports_one_python_scalar=True,
rhs_make_tensor_kwargs=dict(low=0),
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs', 'test_type_promotion'),
# https://github.com/pytorch/pytorch/issues/70904
DecorateInfo(unittest.skip("Some inputs produce undefined outputs"), 'TestCommon', 'test_compare_cpu'),
)),
BinaryUfuncInfo('bitwise_right_shift',
op=torch.bitwise_right_shift,
dtypes=integral_types(),
dtypesIfCUDA=integral_types(),
dtypesIfHpu=custom_types(torch.int32, torch.int8, torch.bool),
operator_variant=operator.rshift,
inplace_operator_variant=operator.irshift,
supports_autograd=False,
supports_one_python_scalar=True,
rhs_make_tensor_kwargs=dict(low=0),
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs', 'test_type_promotion'),
# https://github.com/pytorch/pytorch/issues/70904
DecorateInfo(unittest.skip("Some inputs produce undefined outputs"), 'TestCommon', 'test_compare_cpu'),
)),
OpInfo('combinations',
op=torch.combinations,
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
supports_out=False,
sample_inputs_func=sample_inputs_combinations),
OpInfo('cartesian_prod',
op=torch.cartesian_prod,
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
sample_inputs_func=sample_inputs_cartesian_prod,
skips=(
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# RuntimeError: input->type()->kind() == TypeKind::OptionalType
# INTERNAL ASSERT FAILED at "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":270
DecorateInfo(unittest.expectedFailure,
'TestJit', 'test_variant_consistency_jit', dtypes=(torch.float32,)),
)),
OpInfo('cdist',
dtypes=floating_types(),
supports_out=False,
supports_gradgrad=False,
assert_autodiffed=False,
sample_inputs_func=sample_inputs_cdist),
UnaryUfuncInfo('ceil',
ref=np.ceil,
dtypes=all_types_and(torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32, torch.int8),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
DecorateInfo(unittest.expectedFailure,
'TestNNCOpInfo',
'test_nnc_correctness',
dtypes=tuple(t for t in integral_types() if t != torch.uint8)),
),
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
assert_autodiffed=True),
OpInfo('cholesky',
dtypes=floating_and_complex_types(),
sample_inputs_func=sample_inputs_linalg_cholesky,
gradcheck_wrapper=gradcheck_wrapper_hermitian_input,
decorators=[skipCUDAIfNoMagma, skipCPUIfNoLapack],),
OpInfo('cholesky_inverse',
dtypes=floating_and_complex_types(),
backward_dtypes=floating_and_complex_types(),
# https://github.com/pytorch/pytorch/issues/80411
gradcheck_fast_mode=True,
supports_fwgrad_bwgrad=True,
supports_forward_ad=True,
check_batched_gradgrad=True,
sample_inputs_func=sample_inputs_linalg_cholesky_inverse,
gradcheck_wrapper=gradcheck_wrapper_triangular_input_real_positive_diagonal,
decorators=[
skipCUDAIfNoMagma,
skipCPUIfNoLapack,
DecorateInfo(
toleranceOverride({
torch.float32: tol(atol=5e-03, rtol=1e-04)
}),
'TestCommon', device_type='cpu',
),
DecorateInfo(
toleranceOverride({
torch.float32: tol(atol=5e-03, rtol=1e-04)
}),
'TestEagerFusionOpInfo', device_type='cpu',
),
],
skips=(
# Strides are not the same! Original strides were ((4, 2, 1),) and strides are now ((4, 1, 2),)
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out'),),
),
OpInfo('cholesky_solve',
op=torch.cholesky_solve,
dtypes=floating_and_complex_types(),
sample_inputs_func=sample_inputs_cholesky_solve,
check_batched_gradgrad=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
gradcheck_wrapper=lambda *args, **kwargs: gradcheck_wrapper_triangular_input(*args, idx=1, **kwargs),
decorators=[skipCUDAIfNoMagma, skipCPUIfNoLapack]),
OpInfo('chunk',
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16, torch.chalf),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32),
sample_inputs_func=sample_inputs_chunk,
reference_inputs_func=reference_inputs_chunk,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False),
OpInfo('unsafe_chunk',
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16, torch.chalf),
sample_inputs_func=sample_inputs_chunk,
check_batched_forward_grad=False,
reference_inputs_func=reference_inputs_chunk,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False),
OpInfo('clone',
ref=np.copy,
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16, torch.chalf),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32, torch.int8, torch.bool),
sample_inputs_func=sample_inputs_clone_contiguous,
reference_inputs_func=reference_inputs_clone_contiguous,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False,
skips=(
# TypeError: _copy_dispatcher() got an unexpected keyword argument 'memory_format'
# (NumPy reference needs to be extended with memory_format)
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_numpy_ref'),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_numpy_ref_mps'),
),),
OpInfo('contiguous',
op=lambda x, *args, **kwargs: x.contiguous(*args, **kwargs),
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16, torch.chalf),
sample_inputs_func=sample_inputs_clone_contiguous,
reference_inputs_func=reference_inputs_clone_contiguous,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
autodiff_fusible_nodes=['aten::contiguous'],
assert_jit_shape_analysis=True,
supports_out=False,
skips=(
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
)),
OpInfo('sum_to_size',
op=lambda x, *args, **kwargs: x.sum_to_size(*args, **kwargs),
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
sample_inputs_func=sample_inputs_sum_to_size,
error_inputs_func=error_inputs_sum_to_size,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False,
skips=(
# lambda impl
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit', dtypes=(torch.float,)),
)),
OpInfo('clamp',
aliases=('clip',),
ref=_clamp_numpy,
dtypes=all_types_and(torch.bfloat16, torch.half),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32, torch.int8, torch.bool),
sample_inputs_func=sample_inputs_clamp,
reference_inputs_func=partial(reference_inputs_elementwise_ternary, sample_inputs_func=sample_inputs_clamp),
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# NNC appear to not handle boolean clamp
DecorateInfo(unittest.expectedFailure,
'TestNNCOpInfo',
'test_nnc_correctness',
dtypes=(torch.bool,)),
# MPS does not support float64, while numpy does internal computations in float64.
# See https://github.com/pytorch/pytorch/blob/3c1cf03fde145bdbe1f5ffb81765d076c10b4c04/test/test_ops.py#L260-L264
DecorateInfo(unittest.expectedFailure,
'TestCommon',
'test_numpy_ref_mps'),
)),
UnaryUfuncInfo('positive',
ref=np.positive,
dtypes=all_types_and_complex_and(torch.half, torch.bfloat16, torch.chalf),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
),
UnaryUfuncInfo('conj',
ref=np.conj,
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16,
torch.half, torch.chalf),
dtypesIfHpu=custom_types(torch.float32, torch.int32),
supports_sparse=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
supports_out=False),
UnaryUfuncInfo('conj_physical',
decomp_aten_name='_conj_physical',
ref=np.conj,
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16,
torch.half, torch.chalf),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
skips=(
# RuntimeError: inputSet && outputSet
# INTERNAL ASSERT FAILED at "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":118,
# please report a bug to PyTorch.
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit', dtypes=(torch.float32, )),
DecorateInfo(unittest.skip("Skipped! conj_physical_ not implemented for sparse"),
'TestSparseUnaryUfuncs', 'test_inplace'),
)),
OpInfo('resolve_conj',
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
sample_inputs_func=sample_inputs_view_as_real,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False,
),
OpInfo('resolve_neg',
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
sample_inputs_func=sample_inputs_view_as_real,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False,
),
OpInfo('view_as_real',
dtypes=complex_types(),
supports_forward_ad=True,
supports_out=False,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_view_as_real,
test_conjugated_samples=False,
),
OpInfo('view_as_complex',
dtypes=floating_types_and(torch.half),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
test_neg_view=False,
sample_inputs_func=sample_inputs_view_as_complex,
skips=(
# RuntimeError: Tensor must have a last dimension with stride 1
DecorateInfo(unittest.expectedFailure, "TestCommon", "test_noncontiguous_samples"),
# RuntimeError: "eq_cpu" not implemented for 'ComplexHalf'
DecorateInfo(unittest.skip("Skipped!"), 'TestNNCOpInfo', 'test_nnc_correctness', dtypes=(torch.half,)),
# RuntimeError: view size is not compatible with input tensor's size and stride
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_outplace_all_strides"),
)),
BinaryUfuncInfo('complex',
dtypes=floating_types_and(torch.half),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_rhs_python_scalar=False,
error_inputs_func=error_inputs_complex,
skips=(
# Tests don't account for complex's type promotion semantics
DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', 'test_type_promotion'),
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_out', device_type='mps'),
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_binary_ufuncs_mixed_dtype'),)),
BinaryUfuncInfo('copysign',
sample_inputs_func=sample_inputs_copysign,
dtypes=all_types_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
promotes_int_to_float=True,
# https://github.com/pytorch/pytorch/issues/80411
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True),
OpInfo('corrcoef',
dtypes=all_types_and_complex_and(torch.half, torch.bfloat16),
sample_inputs_func=sample_inputs_corrcoef,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
skips=(
# Issue with conj and torch dispatch, see https://github.com/pytorch/pytorch/issues/82479
DecorateInfo(
unittest.skip("Skipped!"),
'TestSchemaCheckModeOpInfo',
'test_schema_correctness',
dtypes=(torch.complex64, torch.complex128)),
),
supports_out=False),
UnaryUfuncInfo('cos',
ref=np.cos,
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfCUDA=all_types_and_complex_and(torch.chalf, torch.bool, torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
assert_autodiffed=True,
handles_large_floats=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True,
decorators=(precisionOverride({torch.bfloat16: 1e-2}),),
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
dtypes=(torch.cfloat, torch.cdouble,), device_type='cpu', active_if=IS_WINDOWS),
# This fails on CUDA but passes on ROCm
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
dtypes=(torch.cdouble,), device_type='cuda'),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
dtypes=[torch.cfloat, torch.cdouble], active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
device_type='cpu',
dtypes=[torch.cfloat, torch.cdouble], active_if=IS_MACOS),
# AssertionError: Tensor-likes are not close!
# Greatest absolute difference: nan at index (700,) (up to 1e-05 allowed)
# Greatest relative difference: nan at index (700,) (up to 0.001 allowed)
DecorateInfo(unittest.expectedFailure, 'TestUnaryUfuncs', 'test_reference_numerics_large',
device_type='cuda',
dtypes=(torch.chalf,), active_if=IS_WINDOWS),
)),
UnaryUfuncInfo('cosh',
ref=np_unary_ufunc_integer_promotion_wrapper(np.cosh),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfCUDA=all_types_and_complex_and(torch.chalf, torch.bool, torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True,
skips=(
# Reference: https://github.com/pytorch/pytorch/issues/48641
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
device_type='cpu', dtypes=[torch.int8]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
dtypes=[torch.cdouble]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
dtypes=[torch.cfloat, torch.cdouble], active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
dtypes=[torch.cfloat, torch.cdouble], active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
device_type='cpu',
dtypes=[torch.cfloat, torch.cdouble], active_if=IS_MACOS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
device_type='cpu',
dtypes=[torch.cfloat, torch.cdouble], active_if=IS_MACOS),
# AssertionError: Tensor-likes are not close!
# Greatest absolute difference: nan at index (6000,) (up to 1e-05 allowed)
# Greatest relative difference: nan at index (6000,) (up to 0.001 allowed)
DecorateInfo(unittest.expectedFailure, 'TestUnaryUfuncs', 'test_reference_numerics_large',
device_type='cuda',
dtypes=(torch.chalf,), active_if=IS_WINDOWS),
)),
OpInfo('cov',
dtypes=all_types_and_complex_and(torch.half, torch.bfloat16),
sample_inputs_func=sample_inputs_cov,
error_inputs_func=error_inputs_cov,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# Issue with conj and torch dispatch, see https://github.com/pytorch/pytorch/issues/82479
DecorateInfo(
unittest.skip("Skipped!"),
'TestSchemaCheckModeOpInfo',
'test_schema_correctness',
dtypes=(torch.complex64, torch.complex128)),
# Float did not match double
DecorateInfo(unittest.expectedFailure, 'TestBwdGradients', 'test_fn_grad'),
# Jacobian mismatch
DecorateInfo(unittest.expectedFailure, 'TestBwdGradients', 'test_fn_gradgrad'),
DecorateInfo(unittest.expectedFailure, 'TestFwdGradients', 'test_forward_mode_AD'),
DecorateInfo(unittest.skip("Barely fails"), 'TestFwdGradients', 'test_fn_fwgrad_bwgrad'),
# JIT test not working for tensor kwargs (https://github.com/pytorch/pytorch/issues/58507)
# RuntimeError:
# undefined value tensor:
# File "<string>", line 3
# def the_method(i0):
# return torch.cov(i0, correction=0, fweights=None, aweights=tensor([0.0518, 0.4681], dtype=torch.float32, requires_grad=True)) # noqa: B950
# ~~~~~~ <--- HERE
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(toleranceOverride({torch.float16: tol(atol=8e-3, rtol=1.4e-3)}),
"TestInductorOpInfo", "test_comprehensive", device_type="cpu"),
DecorateInfo(toleranceOverride({torch.float32: tol(atol=3e-4, rtol=1e-4)}),
"TestConsistency", "test_output_grad_match", device_type="mps"),
)),
OpInfo('cross',
dtypes=all_types_and_complex_and(torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32),
sample_inputs_func=sample_inputs_cross,
supports_fwgrad_bwgrad=True,
supports_out=True,
supports_forward_ad=True),
OpInfo('cumsum',
dtypes=all_types_and_complex_and(torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# cumsum does not handle correctly out= dtypes
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out'),
),
sample_inputs_func=sample_inputs_cumulative_ops),
OpInfo('cumprod',
dtypes=all_types_and_complex_and(torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# cumprod does not handle correctly out= dtypes
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out'),
),
# gradgradcheck fails in fast_mode=True: #56275
sample_inputs_func=sample_inputs_cumprod,
gradcheck_fast_mode=False),
OpInfo('cummax',
dtypes=all_types_and(torch.bool, torch.half, torch.bfloat16),
sample_inputs_func=partial(sample_inputs_cumulative_ops, supports_dtype_kwargs=False),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
),
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL),
OpInfo('cummin',
dtypes=all_types_and(torch.bool, torch.half, torch.bfloat16),
sample_inputs_func=partial(sample_inputs_cumulative_ops, supports_dtype_kwargs=False),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
),
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL),
UnaryUfuncInfo('deg2rad',
ref=np.radians,
decorators=(precisionOverride({torch.bfloat16: 7e-1,
torch.float16: 7e-1}),),
dtypes=all_types_and(torch.bool, torch.half, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
promotes_int_to_float=True),
OpInfo('diff',
op=torch.diff,
# np.diff has np._NoValue as default values for prepend and append, compare_with_reference breaks if prepend/append
# are set as None when converting to numpy
ref=lambda input, n=1, dim=-1, prepend=np._NoValue, append=np._NoValue: (
np.diff(input, n, dim, np._NoValue if prepend is None else prepend, np._NoValue if append is None else append)
),
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_diff,
error_inputs_func=error_inputs_diff,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
skips=(
)),
BinaryUfuncInfo('div',
aliases=('divide',),
variant_test_name='no_rounding_mode',
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfCUDA=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32, torch.int8),
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_forward_ad=True,
promotes_int_to_float=True,
supports_fwgrad_bwgrad=True,
supports_two_python_scalars=True,
assert_autodiffed=True,
rhs_make_tensor_kwargs=dict(exclude_zero=True),),
BinaryUfuncInfo('div',
aliases=('divide',),
variant_test_name='trunc_rounding',
dtypes=all_types_and(torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32, torch.int8),
sample_kwargs=lambda device, dtype, input:
({"rounding_mode": "trunc"}, {"rounding_mode": "trunc"}),
# https://github.com/pytorch/pytorch/issues/80411
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_two_python_scalars=True,
assert_autodiffed=True,
rhs_make_tensor_kwargs=dict(exclude_zero=True),
decorators=(
# See https://github.com/pytorch/pytorch/issues/111126
DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', 'test_type_promotion'),
),
skips=(
# RuntimeError: MALFORMED INPUT: Unhandled node kind (in computeValue): aten::div
DecorateInfo(unittest.expectedFailure, 'TestNNCOpInfo', 'test_working'),
# FIXME:
# torch.autograd.gradcheck.GradcheckError: Jacobian mismatch for
# output 0 with respect to input 1,
# numerical:tensor(-17746.9307, dtype=torch.float64)
# analytical:tensor(0., dtype=torch.float64)
DecorateInfo(unittest.skip("Skipped!"), 'TestBwdGradients',
'test_fn_grad', device_type='cpu',
dtypes=(torch.float64,)),
)),
BinaryUfuncInfo('div',
aliases=('divide',),
variant_test_name='floor_rounding',
dtypes=all_types_and(torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32, torch.int8),
sample_kwargs=lambda device, dtype, input:
({"rounding_mode": "floor"}, {"rounding_mode": "floor"}),
# https://github.com/pytorch/pytorch/issues/80411
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_two_python_scalars=True,
assert_autodiffed=True,
rhs_make_tensor_kwargs=dict(exclude_zero=True),
decorators=(
# See https://github.com/pytorch/pytorch/issues/111126
DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', 'test_type_promotion'),
),
skips=(
# RuntimeError: MALFORMED INPUT: Unhandled node kind (in computeValue): aten::div
DecorateInfo(unittest.expectedFailure, 'TestNNCOpInfo', 'test_working'),
# FIXME:
# torch.autograd.gradcheck.GradcheckError: Jacobian mismatch for
# output 0 with respect to input 1,
# numerical:tensor(-17746.9307, dtype=torch.float64)
# analytical:tensor(0., dtype=torch.float64)
DecorateInfo(unittest.skip("Skipped!"), 'TestBwdGradients',
'test_fn_grad',
dtypes=(torch.float64,),
device_type='cpu'),
DecorateInfo(unittest.skip("Broken on MacOS13"),
'TestConsistency',
'test_output_match',
device_type='mps',
dtypes=(torch.float16,),
active_if=lambda _: MACOS_VERSION < 14.0),
)),
BinaryUfuncInfo('true_divide',
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfCUDA=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
supports_forward_ad=True,
promotes_int_to_float=True,
supports_fwgrad_bwgrad=True,
supports_two_python_scalars=True,
rhs_make_tensor_kwargs=dict(exclude_zero=True)),
OpInfo('equal',
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32, torch.int8, torch.bool),
ref=lambda input, other: (input == other).all(),
sample_inputs_func=sample_inputs_equal,
supports_autograd=False,
supports_tracing=False,
skips=(
)),
UnaryUfuncInfo('exp',
ref=np_unary_ufunc_integer_promotion_wrapper(np.exp),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfCUDA=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
skips=(
# Reference: https://github.com/pytorch/pytorch/issues/48010
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble], active_if=IS_WINDOWS),
),
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True),
OpInfo('expand',
op=lambda self, shape: self.expand(shape),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32),
sample_inputs_func=sample_inputs_expand,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_jit_shape_analysis=True,
supports_out=False,
skips=(
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
)),
OpInfo('expand_as',
op=lambda self, other: self.expand_as(other),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32, torch.int8, torch.bool),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_expand_as,
supports_out=False,
skips=(
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),),
),
OpInfo('expand_copy',
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
sample_inputs_func=sample_inputs_expand,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_jit_shape_analysis=True,
supports_out=True,
skips=(
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit', dtypes=(torch.float32,)),
)),
OpInfo('diag',
ref=np.diag,
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16),
dtypesIfCUDA=all_types_and_complex_and(torch.chalf, torch.bool, torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
check_batched_forward_grad=False,
sample_inputs_func=sample_inputs_diag,
error_inputs_func=error_inputs_diag),
OpInfo('diag_embed',
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16, torch.chalf),
supports_out=False,
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_diagonal_diag_embed,
reference_inputs_func=reference_inputs_diagonal_diag_embed,
error_inputs_func=error_inputs_diagonal_diag_embed),
OpInfo('diagonal',
aten_backward_name='diagonal_backward',
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16, torch.chalf),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_diagonal_diag_embed,
reference_inputs_func=reference_inputs_diagonal_diag_embed,
error_inputs_func=error_inputs_diagonal_diag_embed),
OpInfo('diagonal_copy',
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16, torch.chalf),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_diagonal_diag_embed,
reference_inputs_func=reference_inputs_diagonal_diag_embed,
error_inputs_func=error_inputs_diagonal_diag_embed),
OpInfo('diagonal_scatter',
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_diagonal_scatter),
OpInfo('alias_copy',
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16, torch.chalf),
sample_inputs_func=sample_inputs_alias_copy,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=True),
BinaryUfuncInfo('eq',
ref=np.equal,
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16, torch.chalf),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32, torch.int8, torch.bool),
always_returns_bool=True,
supports_autograd=False,
sample_inputs_func=sample_inputs_comparison_ops,
skips=(
)),
BinaryUfuncInfo('fmax',
op=torch.fmax,
dtypes=all_types_and(torch.float16, torch.bfloat16, torch.bool),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32, torch.int8, torch.bool),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_rhs_python_scalar=False,
skips=(
# RuntimeError: "max_elementwise_cuda" not implemented for 'ComplexFloat'
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs', 'test_type_promotion'),
)),
BinaryUfuncInfo('fmin',
op=torch.fmin,
dtypes=all_types_and(torch.float16, torch.bfloat16, torch.bool),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32, torch.int8, torch.bool),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_rhs_python_scalar=False,
skips=(
# RuntimeError: "min_elementwise_cuda" not implemented for 'ComplexFloat'
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs', 'test_type_promotion'),
)),
BinaryUfuncInfo('fmod',
ref=np.fmod,
dtypes=all_types_and(torch.float16, torch.bfloat16),
dtypesIfCUDA=all_types_and(torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32),
# https://github.com/pytorch/pytorch/issues/80411
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_autodiffed=None,
rhs_make_tensor_kwargs={'exclude_zero': True},
decorators=(
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs',
'test_contig_vs_every_other',
dtypes=(torch.bfloat16,)),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs',
'test_non_contig',
dtypes=(torch.bfloat16,)),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs',
'test_reference_numerics',
dtypes=(torch.bfloat16,)),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs',
'test_reference_numerics_small_values',
dtypes=(torch.uint8,)),
# FIXME:
# torch.autograd.gradcheck.GradcheckError: Jacobian mismatch for
# output 0 with respect to input 1,
# numerical:tensor(101.6283, dtype=torch.float64)
# analytical:tensor(-18.3575, dtype=torch.float64)
DecorateInfo(unittest.skip("Skipped!"), 'TestBwdGradients',
'test_fn_grad',
dtypes=(torch.float64,),
device_type='cpu'),
)),
BinaryUfuncInfo('remainder',
ref=np.remainder,
dtypes=all_types_and(torch.float16, torch.bfloat16),
dtypesIfCUDA=all_types_and(torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32, torch.int8, torch.bool),
# https://github.com/pytorch/pytorch/issues/80411
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_autodiffed=None,
operator_variant=operator.mod,
inplace_operator_variant=operator.imod,
supports_one_python_scalar=True,
rhs_make_tensor_kwargs={'exclude_zero': True},
decorators=(
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs',
'test_contig_vs_every_other',
dtypes=(torch.bfloat16,)),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs',
'test_non_contig',
dtypes=(torch.bfloat16,)),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs',
'test_reference_numerics',
dtypes=(torch.bfloat16,)),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs',
'test_reference_numerics_small_values',
dtypes=(torch.uint8,)),
DecorateInfo(unittest.skip("Skipped!"), 'TestNNCOpInfo',
'test_nnc_correctness',
dtypes=(torch.bfloat16,)),
# Fails on XLA
# False is not true : Tensors failed to compare as equal!
# Attempted to compare equality of tensors with different dtypes
DecorateInfo(unittest.skip("Skipped!"), 'TestOpInfo', device_type='xla', dtypes=(torch.long,)),
# FIXME:
# torch.autograd.gradcheck.GradcheckError: Jacobian mismatch for
# output 0 with respect to input 1,
# numerical:tensor(102.4676, dtype=torch.float64)
# analytical:tensor(-17.5182, dtype=torch.float64)
DecorateInfo(unittest.skip("Skipped!"), 'TestBwdGradients',
'test_fn_grad', device_type='cpu',
dtypes=(torch.float64,)),
DecorateInfo(
toleranceOverride({
torch.float16: tol(atol=5e-4, rtol=3e-3),
}),
"TestInductorOpInfo",
"test_comprehensive",
device_type="cuda"
),
DecorateInfo(unittest.skip("Broken on MacOS13"),
'TestConsistency',
'test_output_match',
device_type='mps',
dtypes=(torch.float16,),
active_if=lambda _: MACOS_VERSION < 14.0),
)),
UnaryUfuncInfo('frac',
ref=lambda x: np.modf(x)[0],
dtypes=floating_types_and(torch.bfloat16, torch.float16),
dtypesIfCUDA=floating_types_and(torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
dtypes=(torch.bfloat16, torch.float16, torch.float32, torch.float64)),
# 76047
DecorateInfo(unittest.expectedFailure, 'TestNNCOpInfo', 'test_nnc_correctness',
dtypes=(torch.bfloat16, torch.float32, torch.float64)),
)),
OpInfo('stft',
decorators=[
skipCPUIfNoFFT,
DecorateInfo(unittest.skip("Skipped! stft does not match the native function"),
'TestJit', 'test_variant_consistency_jit'),
],
dtypes=floating_and_complex_types(),
sample_inputs_func=sample_inputs_stft,
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
check_batched_forward_grad=False,
check_batched_grad=False,
check_batched_gradgrad=False,
supports_out=False,
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
),
OpInfo('istft',
dtypes=complex_types(),
sample_inputs_func=sample_inputs_istft,
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
check_batched_forward_grad=False,
check_batched_grad=False,
check_batched_gradgrad=False,
supports_out=False,
decorators=(
DecorateInfo(unittest.skip("Skipped! istft does not match the native function"),
'TestJit', 'test_variant_consistency_jit'),
),
skips=(
skipCPUIfNoFFT,
# gradcheck fails on ROCm (gh-68429)
# grad is computed improperly (probably for weights tensor)
DecorateInfo(unittest.expectedFailure, 'TestBwdGradients', 'test_fn_grad'),
# Pre-existing condition (calls .item); needs to be fixed
DecorateInfo(unittest.expectedFailure, 'TestCompositeCompliance', 'test_backward'),
)),
UnaryUfuncInfo('floor',
ref=np.floor,
dtypes=all_types_and(torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32, torch.int8),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
DecorateInfo(unittest.expectedFailure,
'TestNNCOpInfo',
'test_nnc_correctness',
dtypes=tuple(t for t in integral_types() if t != torch.uint8)),
),
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
assert_autodiffed=True),
OpInfo('flip',
op=torch.flip,
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32, torch.int8, torch.bool),
sample_inputs_func=sample_inputs_flip,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False),
OpInfo('fliplr',
op=torch.fliplr,
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
sample_inputs_func=sample_inputs_fliplr_flipud,
error_inputs_func=error_inputs_fliplr,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False),
OpInfo('flipud',
op=torch.flipud,
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
sample_inputs_func=sample_inputs_fliplr_flipud,
error_inputs_func=error_inputs_flipud,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False),
OpInfo('sparse.sampled_addmm',
dtypes=floating_and_complex_types(),
supports_autograd=True,
sample_inputs_func=sample_inputs_sparse_sampled_addmm,
decorators=[
skipCUDAIf(not ((_get_torch_cuda_version() >= (11, 3))
or (_get_torch_rocm_version() >= (5, 2))),
"cusparseSDDMM was added in 11.2.1"),
skipCPUIfNoMklSparse, ],
skips=(
# NotImplementedError: Tensors of type SparseCsrTensorImpl do not have is_contiguous
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_noncontiguous_samples'),
# RuntimeError: Sparse CSR tensors do not have strides.
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_out'),
DecorateInfo(unittest.skip("Skipped!"), 'TestTags', 'test_tags'),
# RuntimeError: sampled_addmm: Expected result to have sparse csr layout, but got Strided
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_out_warning'),
# RuntimeError: Sparse CSR tensors do not have strides
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_variant_consistency_eager'),
# RuntimeError: Sparse CSR tensors do not have strides
DecorateInfo(unittest.skip("Skipped!"), 'TestCompositeCompliance', 'test_operator'),
# RuntimeError: Sparse CSR tensors do not have strides
DecorateInfo(unittest.skip("Skipped!"), 'TestCompositeCompliance', 'test_backward'),
# RuntimeError: Sparse CSR tensors do not have strides
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_conj_view'),
# RuntimeError: Sparse CSR tensors do not have strides
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_neg_conj_view'),
# RuntimeError: Sparse CSR tensors do not have strides
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_neg_view'),
# RuntimeError: Sparse CSR tensors do not have strides
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit'),
# RuntimeError: unsupported memory format option Preserve
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit'),
# RuntimeError: sparse_mask does not support automatic differentiation for outputs with complex dtype
# RuntimeError: Sparse CSR tensors do not have strides
DecorateInfo(unittest.skip("Skipped!"), 'TestFwdGradients', 'test_fn_fwgrad_bwgrad'),
# ValueError: Sparse output is not supported at gradcheck yet. Please call to_dense(masked_grad=...) ...
DecorateInfo(unittest.skip("Skipped!"), 'TestBwdGradients', 'test_fn_grad'),
# RuntimeError: sparse_mask does not support automatic differentiation for outputs with complex dtype.
# RuntimeError: Sparse CSR tensors do not have is_contiguous
DecorateInfo(unittest.skip("Skipped!"), 'TestBwdGradients', 'test_fn_gradgrad'),
# ValueError: Sparse output is not supported at gradcheck yet. Please call to_dense(masked_grad=...) ...
DecorateInfo(unittest.skip("Skipped!"), 'TestFwdGradients', 'test_forward_mode_AD'),
# NotImplementedError: Could not run 'aten::sparse_sampled_addmm' with arguments from the 'SparseCsrMeta' backend.
DecorateInfo(unittest.skip("Skipped!"), 'TestMeta', 'test_dispatch_meta_outplace'),
DecorateInfo(unittest.skip("Skipped!"), 'TestMeta', 'test_dispatch_symbolic_meta_outplace'),
DecorateInfo(unittest.skip("Skipped!"), 'TestMeta', 'test_meta_outplace'),
DecorateInfo(unittest.skip("Skipped!"), 'TestMeta', 'test_dispatch_symbolic_meta_outplace_all_strides'),
DecorateInfo(unittest.skip("Skipped!"), 'TestFakeTensor', 'test_fake_crossref_backward_no_amp'),
)),
OpInfo('sparse.mm',
dtypes=floating_types_and(torch.bfloat16, torch.float16),
variant_test_name='reduce',
supports_autograd=True,
supports_out=False,
supports_gradgrad=False,
supports_forward_ad=False,
sample_inputs_func=sample_inputs_sparse_mm_reduce,
decorators=[onlyCPU],
skips=(
# NotImplementedError: Tensors of type SparseCsrTensorImpl do not have is_contiguous
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_noncontiguous_samples'),
# RuntimeError: Sparse CSR tensors do not have strides.
DecorateInfo(unittest.skip("Skipped!"), 'TestTags', 'test_tags'),
# RuntimeError: Sparse CSR tensors do not have strides
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_variant_consistency_eager'),
# RuntimeError: Sparse CSR tensors do not have strides
DecorateInfo(unittest.skip("Skipped!"), 'TestCompositeCompliance', 'test_operator'),
# RuntimeError: Sparse CSR tensors do not have strides
DecorateInfo(unittest.skip("Skipped!"), 'TestCompositeCompliance', 'test_backward'),
# RuntimeError: Sparse CSR tensors do not have strides
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_conj_view'),
# RuntimeError: Sparse CSR tensors do not have strides
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_neg_conj_view'),
# RuntimeError: Sparse CSR tensors do not have strides
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_neg_view'),
# RuntimeError: Sparse CSR tensors do not have strides
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit'),
# RuntimeError: unsupported memory format option Preserve
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit'),
# ValueError: Sparse output is not supported at gradcheck yet. Please call to_dense(masked_grad=...) ...
DecorateInfo(unittest.skip("Skipped!"), 'TestFwdGradients', 'test_fn_fwgrad_bwgrad'),
# RuntimeError: Sparse CSR tensors do not have is_contiguou
DecorateInfo(unittest.skip("Skipped!"), 'TestBwdGradients', 'test_fn_grad'),
# ValueError: Sparse output is not supported at gradcheck yet. Please call to_dense(masked_grad=...) ...
DecorateInfo(unittest.skip("Skipped!"), 'TestBwdGradients', 'test_fn_gradgrad'),
# RuntimeError: Sparse CSR tensors do not have strides
DecorateInfo(unittest.skip("Skipped!"), 'TestFwdGradients', 'test_forward_mode_AD'),
# ValueError: Sparse output is not supported at gradcheck yet. Please call to_dense(masked_grad=...) ...
DecorateInfo(unittest.skip("Skipped!"), 'TestBwdGradients', 'test_fn_fail_gradgrad'),
# NotImplementedError: Could not run 'aten::_sparse_mm_reduce_impl' with arguments from the 'SparseCsrMeta' backend
DecorateInfo(unittest.skip("Skipped!"), 'TestMeta', 'test_dispatch_meta_outplace'),
DecorateInfo(unittest.skip("Skipped!"), 'TestMeta', 'test_dispatch_symbolic_meta_outplace'),
DecorateInfo(unittest.skip("Skipped!"), 'TestMeta', 'test_meta_outplace'),
)),
UnaryUfuncInfo('i0',
ref=np_unary_ufunc_integer_promotion_wrapper(
scipy.special.i0) if TEST_SCIPY else None,
aliases=('special.i0',),
decorators=(precisionOverride({torch.bfloat16: 3e-1,
torch.float16: 5e-1}),),
dtypes=all_types_and(torch.bool, torch.half, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True,
sample_inputs_func=sample_inputs_i0_i1,
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
dtypes=(torch.int8,)),
)),
BinaryUfuncInfo('floor_divide',
ref=_floor_divide_np,
dtypes=all_types_and(torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32, torch.int8, torch.bool),
supports_autograd=False,
rhs_make_tensor_kwargs=dict(exclude_zero=True),
supports_two_python_scalars=True,
skips=(
# AssertionError: Results of original model and exported/imported version of model differed
DecorateInfo(unittest.skip('Skipped!'), 'TestJit', 'test_variant_consistency_jit'),
# bfloat16 floor_divide compared with a float32 reference works inconsistently
DecorateInfo(unittest.skip('Skipped!'), 'TestBinaryUfuncs',
dtypes=(torch.bfloat16,)),
# int8 floor divide has different results for -128 // -1 vs. NumPy
DecorateInfo(unittest.skip('Skipped!'), 'TestBinaryUfuncs', 'test_reference_numerics_small_values',
dtypes=(torch.int8,)),
# The following tests fails on some jobs
DecorateInfo(unittest.skip('Skipped!'), 'TestBinaryUfuncs', 'test_reference_numerics_extremal_values',
dtypes=(torch.float16,)),
DecorateInfo(toleranceOverride({torch.float16: tol(atol=1e-3, rtol=5e-3)}),
'TestBinaryUfuncs', 'test_reference_numerics'),
)),
UnaryUfuncInfo('frexp',
op=torch.frexp,
ref=np.frexp,
dtypes=floating_types_and(torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
# skip testing torch.frexp as it is not supported by ROCm platform yet
decorators=[],
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# skips below tests as torch.frexp returns tuple-like (mantissa, exponent) as outputs,
# while theses tests currently requires output to a single tensor.
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_batch_vs_slicing'),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_contig_vs_every_other'),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_contig_vs_transposed'),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_non_contig_expand'),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_variant_consistency'),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_out_arg_all_dtypes'),
# skips test_reference_numerics due to error in Windows CI.
# The np.frexp returns exponent as np.intc dtype on Windows platform,
# and np.intc does not have the correspond torch dtype
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_small',
active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
active_if=IS_WINDOWS),
)),
UnaryUfuncInfo('log1p',
ref=np.log1p,
aliases=('special.log1p',),
domain=(-1, None),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
decorators=(precisionOverride({torch.bfloat16: 1e-1}),),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
assert_autodiffed=True,
promotes_int_to_float=True),
BinaryUfuncInfo('ge',
ref=np.greater_equal,
aliases=('greater_equal',),
dtypes=all_types_and(torch.bool, torch.bfloat16, torch.float16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32),
always_returns_bool=True,
supports_autograd=False,
skips=(
)),
OpInfo('geqrf',
dtypes=floating_and_complex_types(),
sample_inputs_func=sample_inputs_linalg_qr_geqrf,
decorators=[skipCUDAIfNoMagmaAndNoCusolver, skipCPUIfNoLapack],
supports_autograd=False,
skips=(
# FIXME: geqrf can't forward with complex inputs that require grad
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_dtypes'),
# Strides are not the same!
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out'),
)),
BinaryUfuncInfo('gt',
ref=np.greater,
aliases=('greater',),
dtypes=all_types_and(torch.bool, torch.bfloat16, torch.float16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32, torch.int8),
always_returns_bool=True,
supports_autograd=False,
skips=(
)),
UnaryUfuncInfo('imag',
ref=np.imag,
dtypes=complex_types_and(torch.chalf),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/issues/66357
# RuntimeError: view_as_real doesn't work on unresolved conjugated tensors.
check_batched_forward_grad=False,
skips=(
# Skip since real and imag don't have out variants.
DecorateInfo(unittest.expectedFailure, 'TestUnaryUfuncs', 'test_out_arg_all_dtypes'),
)),
OpInfo('gradient',
dtypes=floating_and_complex_types_and(torch.int8, torch.int16,
torch.int32, torch.int64,
torch.bfloat16, torch.half),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
skips=(
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# following tests give a runtime error with undefined value tensor
# see discussion : https://github.com/pytorch/pytorch/issues/56660
# RuntimeError:
# Arguments for call are not valid.
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit', dtypes=(torch.float32, torch.complex64)), # noqa: B950
DecorateInfo(unittest.skip("Skipped!"), 'TestNNCOpInfo', 'test_nnc_correctness'),
DecorateInfo(unittest.skip("Skipped!"), 'TestCudaFuserOpInfo'),
),
supports_inplace_autograd=False,
sample_inputs_func=sample_inputs_gradient,
error_inputs_func=error_inputs_gradient),
OpInfo('isin',
dtypes=all_types_and(torch.bfloat16, torch.half),
supports_autograd=False,
sample_inputs_func=sample_inputs_isin),
OpInfo('kthvalue',
dtypes=all_types_and(torch.bfloat16, torch.float16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_kthvalue,
error_inputs_func=error_inputs_kthvalue),
BinaryUfuncInfo('le',
ref=np.less_equal,
aliases=('less_equal',),
dtypes=all_types_and(torch.bool, torch.bfloat16, torch.float16),
always_returns_bool=True,
supports_autograd=False,
skips=(
)),
OpInfo('linspace',
dtypes=all_types_and_complex_and(torch.bfloat16, torch.float16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32),
is_factory_function=True,
supports_out=True,
supports_autograd=False,
error_inputs_func=error_inputs_linspace,
sample_inputs_func=sample_inputs_linspace,
skips=(
# FX failed to normalize op - add the op to the op_skip list.
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# Tests that assume input is a tensor or sequence of tensors
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_conj_view'),
# Same failure as arange: cannot find linspace in captured graph
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit', dtypes=(torch.float32,)),
# UserWarning not triggered : Resized a non-empty tensor but did not warn about it.
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'),
# UserWarning: CUDA caching allocator reports a memory leak not verified by the driver API
# in __main__.TestJitCUDA.test_variant_consistency_jit_logspace_cuda_complex64!
# Caching allocator allocated memory was 0 and is now reported as 307200 on device 0.
# CUDA driver allocated memory was 1254555648 and is now 1242955776.
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit',
dtypes=(torch.cfloat,), device_type="cuda"),
)),
OpInfo('linspace',
dtypes=all_types_and_complex_and(torch.bfloat16, torch.float16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32),
is_factory_function=True,
supports_out=True,
supports_autograd=False,
error_inputs_func=error_inputs_linspace,
sample_inputs_func=sample_inputs_linspace_tensor_overload,
variant_test_name="tensor_overload",
skips=(
# FX failed to normalize op - add the op to the op_skip list.
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# TypeError: 'int' object is not subscriptable
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
# Same failure as arange: cannot find linspace in captured graph
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit', dtypes=(torch.float32,)),
# UserWarning not triggered : Resized a non-empty tensor but did not warn about it.
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'),
# UserWarning: CUDA caching allocator reports a memory leak not verified by the driver API
# in __main__.TestJitCUDA.test_variant_consistency_jit_logspace_cuda_complex64!
# Caching allocator allocated memory was 0 and is now reported as 307200 on device 0.
# CUDA driver allocated memory was 1254555648 and is now 1242955776.
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit',
dtypes=(torch.cfloat,), device_type="cuda"),
)),
OpInfo('logspace',
dtypes=all_types_and_complex_and(torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
is_factory_function=True,
supports_out=True,
supports_autograd=False,
error_inputs_func=error_inputs_linspace,
sample_inputs_func=sample_inputs_logspace,
skips=(
# FX failed to normalize op - add the op to the op_skip list.
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# Tests that assume input is a tensor or sequence of tensors
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_conj_view'),
# Same failure as arange: cannot find linspace in captured graph
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit', dtypes=(torch.float32,)),
# UserWarning not triggered : Resized a non-empty tensor but did not warn about it.
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'),
# Off-by-one issue when casting floats to ints
DecorateInfo(unittest.expectedFailure, 'TestDecomp', 'test_quick',
dtypes=(torch.int16, torch.int32, torch.int64), device_type="cuda"),
DecorateInfo(unittest.expectedFailure, 'TestDecomp', 'test_comprehensive',
dtypes=(torch.int16, torch.int32, torch.int64), device_type="cuda"),
# UserWarning: CUDA caching allocator reports a memory leak not verified by the driver API
# in __main__.TestJitCUDA.test_variant_consistency_jit_logspace_cuda_complex64!
# Caching allocator allocated memory was 0 and is now reported as 307200 on device 0.
# CUDA driver allocated memory was 1254555648 and is now 1242955776.
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit',
dtypes=(torch.cfloat,), device_type="cuda"),
)),
OpInfo('logspace',
dtypes=all_types_and_complex_and(torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
is_factory_function=True,
supports_out=True,
supports_autograd=False,
error_inputs_func=error_inputs_linspace,
sample_inputs_func=sample_inputs_logspace_tensor_overload,
variant_test_name="tensor_overload",
skips=(
# FX failed to normalize op - add the op to the op_skip list.
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# TypeError: 'int' object is not subscriptable
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
# Same failure as arange: cannot find linspace in captured graph
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit', dtypes=(torch.float32,)),
# UserWarning not triggered : Resized a non-empty tensor but did not warn about it.
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'),
# Off-by-one issue when casting floats to ints
DecorateInfo(unittest.expectedFailure, 'TestDecomp', 'test_quick',
dtypes=(torch.int16, torch.int32, torch.int64), device_type="cuda"),
DecorateInfo(unittest.expectedFailure, 'TestDecomp', 'test_comprehensive',
dtypes=(torch.int16, torch.int32, torch.int64), device_type="cuda"),
# UserWarning: CUDA caching allocator reports a memory leak not verified by the driver API
# in __main__.TestJitCUDA.test_variant_consistency_jit_logspace_cuda_complex64!
# Caching allocator allocated memory was 0 and is now reported as 307200 on device 0.
# CUDA driver allocated memory was 1254555648 and is now 1242955776.
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit',
dtypes=(torch.cfloat,), device_type="cuda"),
)),
UnaryUfuncInfo('log',
ref=np.log,
domain=(0, None),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfCUDA=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
backward_dtypesIfCUDA=floating_and_complex_types_and(torch.half, torch.bfloat16, torch.chalf),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True,
decorators=(precisionOverride({torch.bfloat16: 5e-2}),),
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble],
active_if=IS_WINDOWS),
),
# log(z)->-inf for |z|->0
reference_numerics_filter=NumericsFilter(condition=lambda x: torch.abs(x) < 0.1, safe_val=1)),
UnaryUfuncInfo('log10',
ref=np.log10,
domain=(0, None),
decorators=(precisionOverride({torch.bfloat16: 5e-2}),),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True,
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble],
active_if=IS_WINDOWS),
),
# log10(z)->-inf for |z|->0
reference_numerics_filter=NumericsFilter(condition=lambda x: torch.abs(x) < 0.1, safe_val=1)),
UnaryUfuncInfo('log2',
ref=np.log2,
domain=(0, None),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True,
decorators=(precisionOverride({torch.bfloat16: 1e-1}),),
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
dtypes=[torch.cfloat, torch.cdouble]),
),
# log2(z)->-inf for |z|->0
reference_numerics_filter=NumericsFilter(condition=lambda x: torch.abs(x) < 0.1, safe_val=1)),
BinaryUfuncInfo('ldexp',
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_inplace_autograd=False,
promotes_int_to_float=True,
supports_out=True,
supports_rhs_python_scalar=False,
skips=(
# RuntimeError: mul(): functions with out=... arguments don't support
# automatic differentiation, but one of the arguments requires grad
# https://github.com/pytorch/pytorch/issues/68966
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_conj_view'),
),
decorators=[
DecorateInfo(
toleranceOverride({
torch.complex64: tol(atol=1e-05, rtol=1e-05)
}),
'TestCommon', device_type='cpu',
),
], ),
BinaryUfuncInfo('logaddexp',
dtypes=floating_and_complex_types_and(torch.bfloat16, torch.float16),
dtypesIfCUDA=floating_types_and(torch.bfloat16, torch.float16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_rhs_python_scalar=False,
skips=(
# TODO: FIXME: RuntimeError: not implemented for 'ComplexFloat'
DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', 'test_type_promotion', device_type='cuda'),
)),
OpInfo('logaddexp2',
dtypes=floating_types_and(torch.bfloat16, torch.half),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_logaddexp),
UnaryUfuncInfo('logical_not',
ref=np.logical_not,
decorators=(precisionOverride({torch.bfloat16: 7e-1,
torch.float16: 5e-1}),),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int8, torch.bool),
supports_autograd=False,
skips=(
# The function variant always returns BoolTensor
# while the inplace variant preserves the input dtype.
# >>> t = torch.randn(3)
# >>> torch.logical_not(t)
# tensor([False, False, False])
# >>> torch.logical_not(t).dtype
# torch.bool
# >>> t.logical_not_().dtype
# torch.float32
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_variant_consistency',
dtypes=all_types_and_complex_and(torch.half, torch.bfloat16)),
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_variant_consistency_eager',
dtypes=all_types_and_complex_and(torch.half, torch.bfloat16)),
)),
BinaryUfuncInfo('lt',
ref=np.less,
aliases=('less',),
dtypes=all_types_and(torch.bool, torch.bfloat16, torch.float16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int8, torch.int32),
always_returns_bool=True,
supports_autograd=False,
skips=(
)),
OpInfo('lu_unpack',
op=torch.lu_unpack,
dtypes=floating_and_complex_types(),
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(skipCPUIfNoLapack,),
sample_inputs_func=sample_inputs_lu_unpack),
OpInfo('lu',
op=torch.lu,
dtypes=floating_and_complex_types(),
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# https://github.com/pytorch/pytorch/issues/66357
check_batched_forward_grad=False,
sample_inputs_func=sample_inputs_lu,
decorators=[skipCUDAIfNoMagmaAndNoCusolver, skipCPUIfNoLapack],
skips=(
# we skip jit tests because `lu` is a torch function
# RuntimeError:
# 'Tensor (inferred)' object has no attribute or method 'lu'.:
# File "<string>", line 3
# def the_method(i0):
# return i0.lu(True, True)
# ~~~~~ <--- HERE
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# RuntimeError not raised: Expected RuntimeError when calling with input.device=cpu and out.device=cuda
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out'),
# UserWarning not triggered : Resized a non-empty tensor but did not warn about it.
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'),
)),
OpInfo('lu_solve',
op=torch.lu_solve,
dtypes=floating_and_complex_types(),
supports_forward_ad=True,
# See https://github.com/pytorch/pytorch/issues/66357
check_batched_forward_grad=False,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_lu_solve,
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_out',
device_type='mps', dtypes=[torch.float32]),
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_variant_consistency_eager',
device_type='mps', dtypes=[torch.float32]),
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit',
device_type='mps', dtypes=[torch.float32]),
DecorateInfo(unittest.skip("Tests different backward paths"),
"TestCommon", "test_floating_inputs_are_differentiable"),),
decorators=[skipCPUIfNoLapack, skipCUDAIfNoMagmaAndNoCusolver]),
OpInfo('masked_fill',
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int8, torch.bool, torch.int32),
sample_inputs_func=sample_inputs_masked_fill,
error_inputs_func=error_inputs_masked_fill,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
check_batched_forward_grad=False,
supports_out=False),
OpInfo('masked_scatter',
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int8, torch.bool, torch.int32),
sample_inputs_func=sample_inputs_masked_scatter,
error_inputs_func=error_inputs_masked_scatter,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# https://github.com/pytorch/pytorch/issues/66357
check_batched_forward_grad=False,
supports_out=False,
skips=(
# Compiler issue on ROCm. Regression started in ROCm 6.4.
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_non_standard_bool_values',
dtypes=[torch.bool], active_if=TEST_WITH_ROCM),
)),
OpInfo('masked_select',
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_masked_select,
error_inputs_func=error_inputs_masked_select,
skips=(
# Compiler issue on ROCm. Might need to skip until ROCm5.5
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_non_standard_bool_values',
dtypes=[torch.bool], active_if=TEST_WITH_ROCM),
)),
OpInfo('matrix_exp',
dtypes=floating_and_complex_types_and(torch.float16, torch.bfloat16),
aliases=('linalg.matrix_exp',),
sample_inputs_func=sample_inputs_matrix_exp,
# Needs to construct a 2nx2n matrix by copy_ ing into it
check_batched_grad=False,
check_batched_gradgrad=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# https://github.com/pytorch/pytorch/issues/66357
check_batched_forward_grad=False,
skips=(
# mexp does not support bf16 and fp16
DecorateInfo(unittest.skip('Skipped!'), 'TestInductorOpInfo', 'test_comprehensive',
dtypes=[torch.half], device_type="cpu"),
),
supports_out=False,
),
OpInfo('matmul',
aliases=('linalg.matmul',),
dtypes=all_types_and_complex_and(torch.float16, torch.bfloat16),
dtypesIfCUDA=floating_and_complex_types_and(torch.float16,
*[torch.bfloat16]
if SM53OrLater or TEST_WITH_ROCM else []),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
assert_autodiffed=True,
assert_jit_shape_analysis=True,
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
check_batched_forward_grad=False,
sample_inputs_func=partial(sample_inputs_matmul, is_rmatmul=False),
decorators=[
# NVIDIA only assures that bfloat16 is supported by bmm if SM >= 5.3
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_dtypes', device_type='cuda', active_if=not SM53OrLater),
# ROCm intermittently fails the test with standard atol/rtol
DecorateInfo(toleranceOverride({torch.float32: tol(atol=1e-4, rtol=0)}),
'TestCommon', 'test_noncontiguous_samples', device_type='cuda',
active_if=TEST_WITH_ROCM),
DecorateInfo(toleranceOverride({torch.float32: tol(atol=1e-4, rtol=0)}),
'TestCommon', 'test_out', device_type='cuda',
active_if=TEST_WITH_ROCM),
# mv for the sample with shapes (S, S, M, M), (M,) has some variance in the
# backward on CPU
DecorateInfo(toleranceOverride({torch.float32: tol(atol=0, rtol=1e-5)}),
'TestCommon', 'test_noncontiguous_samples',
device_type='cpu'),
DecorateInfo(
toleranceOverride({
torch.float32: tol(atol=1e-5, rtol=1e-5),
torch.complex64: tol(atol=1e-5, rtol=1e-5),
}),
"TestDecomp", "test_comprehensive", device_type="cuda",
),
],
skips=(
# Strides are not the same!
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out'),
# https://github.com/pytorch/pytorch/issues/67470
DecorateInfo(unittest.skip("67470!"),
'TestCommon', 'test_noncontiguous_samples',
device_type='cpu', dtypes=(torch.long,)),
# AssertionError: False is not true : Tensors failed to compare as equal!
DecorateInfo(unittest.skip("Skipped!"), 'TestOpInfo',
device_type='xla', dtypes=(torch.long,)),
# https://github.com/pytorch/pytorch/issues/71774
DecorateInfo(unittest.skip('Skipped!'), 'TestNNCOpInfo', 'test_nnc_correctness',
device_type='cpu', dtypes=(torch.long,)),
)),
OpInfo('max',
variant_test_name='reduction_with_dim',
dtypes=all_types_and(torch.float16, torch.bfloat16, torch.bool),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32),
sample_inputs_func=sample_inputs_max_min_reduction_with_dim,
supports_fwgrad_bwgrad=True,
skips=(
),
supports_forward_ad=True),
OpInfo('max',
variant_test_name='reduction_no_dim',
dtypes=all_types_and(torch.float16, torch.bfloat16, torch.bool),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32),
supports_out=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_max_min_reduction_no_dim,
skips=(
)),
OpInfo('median',
dtypes=all_types_and(torch.bfloat16, torch.float16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32),
# TODO: some signatures of median do support out
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
error_inputs_func=error_inputs_median,
sample_inputs_func=partial(sample_inputs_reduction, supports_multiple_dims=False)),
OpInfo('nanmedian',
dtypes=all_types_and(torch.bfloat16, torch.float16),
# TODO: some signatures of nanmedian do support out
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=partial(sample_inputs_reduction, supports_multiple_dims=False)),
OpInfo('var_mean',
dtypes=floating_and_complex_types_and(torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
sample_inputs_func=sample_inputs_std_var,
# TODO: some signatures of var_mean do support out
supports_out=False,
supports_forward_ad=True,
check_batched_forward_grad=False,
supports_fwgrad_bwgrad=True,
decorators=(
DecorateInfo(toleranceOverride({torch.float64: tol(atol=2e-7, rtol=2e-7)}),
"TestDecomp", "test_comprehensive", device_type="cuda"),
DecorateInfo(toleranceOverride({torch.float16: tol(atol=1e-3, rtol=2e-3)}),
"TestInductorOpInfo", "test_comprehensive", device_type="cuda"),
)),
OpInfo('var_mean',
variant_test_name='unbiased',
dtypes=floating_and_complex_types_and(torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
sample_inputs_func=sample_inputs_std_var_unbiased,
# TODO: some signatures of var_mean do support out
supports_out=False,
supports_forward_ad=True,
check_batched_forward_grad=False,
supports_fwgrad_bwgrad=True,
decorators=(
DecorateInfo(toleranceOverride({torch.float64: tol(atol=2e-7, rtol=2e-7)}),
"TestDecomp", "test_comprehensive", device_type="cuda"),
DecorateInfo(toleranceOverride({torch.float16: tol(atol=1e-3, rtol=2e-3)}),
"TestInductorOpInfo", "test_comprehensive", device_type="cuda"),
)),
OpInfo('std_mean',
dtypes=floating_and_complex_types_and(torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
sample_inputs_func=sample_inputs_std_var,
# TODO: some signatures of std_mean do support out
supports_out=False,
supports_forward_ad=True,
check_batched_forward_grad=False,
supports_fwgrad_bwgrad=True,
decorators=(
DecorateInfo(toleranceOverride({torch.float64: tol(atol=2e-7, rtol=2e-7)}),
"TestDecomp", "test_comprehensive", device_type="cuda"),
)),
OpInfo('std_mean',
variant_test_name='unbiased',
dtypes=floating_and_complex_types_and(torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
sample_inputs_func=sample_inputs_std_var_unbiased,
# TODO: some signatures of var_mean do support out
supports_out=False,
supports_forward_ad=True,
check_batched_forward_grad=False,
supports_fwgrad_bwgrad=True,
decorators=(
DecorateInfo(
toleranceOverride({
torch.float16: tol(atol=4e-5, rtol=9e-3),
torch.float64: tol(atol=2e-7, rtol=2e-7),
}),
"TestDecomp",
"test_comprehensive",
device_type="cuda"
),
DecorateInfo(
toleranceOverride({
torch.float16: tol(atol=4e-5, rtol=9e-3),
torch.float64: tol(atol=2e-7, rtol=2e-7),
}),
"TestInductorOpInfo",
"test_comprehensive",
device_type="cuda"
),
)),
OpInfo('meshgrid',
variant_test_name='variadic_tensors',
ref=np.meshgrid,
dtypes=all_types_and_complex_and(torch.bfloat16, torch.bool, torch.float16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
sample_inputs_func=partial(sample_inputs_meshgrid, variant='variadic'),
skips=[
# JIT does not support variadic tensors.
# RuntimeError: input->type()->kind() == TypeKind::OptionalType
# INTERNAL ASSERT FAILED at "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":252,
# please report a bug to PyTorch.
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit'),
# meshgrid is defined in torch.functional to take a
# variadic list of tensors. Variadic parameters are not
# compatible with the normalize operator tests.
DecorateInfo(unittest.skip("Skipped!"), 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# Skip operator schema test because this is a functional and not an operator
DecorateInfo(unittest.skip("Skipped!"), 'TestOperatorSignatures', 'test_get_torch_func_signature_exhaustive'),
],
supports_out=False,
supports_fwgrad_bwgrad=True,
supports_forward_ad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,),
OpInfo('meshgrid',
variant_test_name='list_of_tensors',
# Unlike the variant above, we do not use np.meshgrid as a
# ref since it does not officially support list of numpy
# arrays.
dtypes=all_types_and_complex_and(torch.bfloat16, torch.bool, torch.float16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
sample_inputs_func=partial(sample_inputs_meshgrid, variant='list'),
skips=[
# meshgrid is defined in torch.functional to take a
# variadic list of tensors. Variadic parameters are not
# compatible with the normalize operator tests.
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
],
assert_autodiffed=True,
supports_out=False,
autodiff_nonfusible_nodes=[],
supports_fwgrad_bwgrad=True,
supports_forward_ad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,),
OpInfo('min',
variant_test_name='reduction_with_dim',
dtypes=all_types_and(torch.float16, torch.bfloat16, torch.bool),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32),
sample_inputs_func=sample_inputs_max_min_reduction_with_dim,
supports_fwgrad_bwgrad=True,
supports_forward_ad=True,
skips=(
)),
OpInfo('min',
variant_test_name='reduction_no_dim',
dtypes=all_types_and(torch.float16, torch.bfloat16, torch.bool),
supports_out=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_max_min_reduction_no_dim,
skips=(
)),
OpInfo('quantile',
dtypes=floating_types(),
sample_inputs_func=sample_inputs_reduction_quantile,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/issues/66357
# Relies on copy_ to broadcast, but the forward AD path calls broadcast_to which
# does not have a batching rule in core
check_batched_forward_grad=False),
OpInfo('nanquantile',
dtypes=floating_types(),
sample_inputs_func=sample_inputs_reduction_quantile,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/issues/66357
# Relies on copy_ to broadcast, but the forward AD path calls broadcast_to which
# does not have a batching rule in core
check_batched_forward_grad=False),
BinaryUfuncInfo(
'max',
aliases=('maximum',),
variant_test_name='binary',
dtypes=all_types_and(torch.float16, torch.bfloat16, torch.bool),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_autodiffed=True,
ref=np.maximum,
supports_rhs_python_scalar=False,
skips=(
# Incorrectly attempts to use a scalar for the second argument
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_jit_alias_remapping'),
# TODO: FIXME: RuntimeError: "max_elementwise_cuda" not implemented for 'ComplexFloat'
DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', 'test_type_promotion', device_type='cuda'),
)),
BinaryUfuncInfo(
'maximum',
dtypes=all_types_and(torch.float16, torch.bfloat16, torch.bool),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
ref=np.maximum,
supports_rhs_python_scalar=False,
skips=(
# TODO: FIXME: RuntimeError: "max_elementwise_cuda" not implemented for 'ComplexFloat'
DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', 'test_type_promotion', device_type='cuda'),
)),
BinaryUfuncInfo(
'min',
aliases=('minimum',),
variant_test_name='binary',
dtypes=all_types_and(torch.float16, torch.bfloat16, torch.bool),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_autodiffed=True,
ref=np.minimum,
supports_rhs_python_scalar=False,
skips=(
# Incorrectly attempts to use a scalar for the second argument
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_jit_alias_remapping'),
# TODO: FIXME: RuntimeError: "min_elementwise_cuda" not implemented for 'ComplexFloat'
DecorateInfo(unittest.expectedFailure,
'TestBinaryUfuncs',
'test_type_promotion',
device_type='cuda'),
)),
BinaryUfuncInfo(
'minimum',
dtypes=all_types_and(torch.float16, torch.bfloat16, torch.bool),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
ref=np.minimum,
supports_rhs_python_scalar=False,
skips=(
# TODO: FIXME: RuntimeError: "min_elementwise_cuda" not implemented for 'ComplexFloat'
DecorateInfo(unittest.expectedFailure,
'TestBinaryUfuncs',
'test_type_promotion',
device_type='cuda'),
),
),
BinaryUfuncInfo('logical_and',
ref=np.logical_and,
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32, torch.int8, torch.bool),
supports_autograd=False,
always_returns_bool=True,
supports_rhs_python_scalar=False),
BinaryUfuncInfo('logical_or',
ref=np.logical_or,
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int8, torch.bool),
supports_autograd=False,
always_returns_bool=True,
supports_rhs_python_scalar=False),
BinaryUfuncInfo('logical_xor',
ref=np.logical_xor,
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int8, torch.bool),
supports_autograd=False,
always_returns_bool=True,
supports_rhs_python_scalar=False,
skips=(
)),
BinaryUfuncInfo('bitwise_and',
ref=np.bitwise_and,
dtypes=integral_types_and(torch.bool),
dtypesIfHpu=custom_types(torch.bool),
operator_variant=operator.and_,
inplace_operator_variant=operator.iand,
supports_autograd=False,
supports_one_python_scalar=True,
skips=(
# RuntimeError: "bitwise_and_cuda" not implemented for 'Half'
DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs',
'test_type_promotion', device_type='cuda'),
)),
BinaryUfuncInfo('bitwise_or',
ref=np.bitwise_or,
dtypes=integral_types_and(torch.bool),
dtypesIfHpu=custom_types(torch.bool),
operator_variant=operator.or_,
inplace_operator_variant=operator.ior,
supports_autograd=False,
supports_one_python_scalar=True,
skips=(
# TODO: FIXME: RuntimeError: "bitwise_or_cuda" not implemented for 'Half'
DecorateInfo(unittest.expectedFailure,
'TestBinaryUfuncs',
'test_type_promotion',
device_type='cuda'),
)),
BinaryUfuncInfo('bitwise_xor',
ref=np.bitwise_xor,
dtypes=integral_types_and(torch.bool),
dtypesIfHpu=custom_types(torch.bool),
operator_variant=operator.xor,
inplace_operator_variant=operator.ixor,
supports_autograd=False,
supports_one_python_scalar=True,
skips=(
# TODO: FIXME: RuntimeError: "bitwise_xor_cuda" not implemented for 'Half'
DecorateInfo(unittest.expectedFailure,
'TestBinaryUfuncs',
'test_type_promotion',
device_type='cuda'),
)),
BinaryUfuncInfo('heaviside',
ref=lambda a, b: (
# necessary because np.heaviside incorrectly returns float64 when passed args of dtype int64
np.int64(np.heaviside(a, b)) if a.dtype == np.int64 and b.dtype == np.int64 else np.heaviside(a, b)
),
dtypes=all_types_and(torch.bool, torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32),
supports_autograd=False,
supports_rhs_python_scalar=False,
skips=(
# RuntimeError: heaviside is not yet implemented for tensors with different dtypes.
DecorateInfo(unittest.expectedFailure,
'TestBinaryUfuncs',
'test_type_promotion'),
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_binary_ufuncs_mixed_dtype'),
# PyTorch's heaviside does not appear to propagate NaNs
DecorateInfo(unittest.skip("Skipped!"),
'TestBinaryUfuncs',
'test_reference_numerics_extremal_values'),
)),
BinaryUfuncInfo('lcm',
ref=np.lcm,
dtypes=integral_types_and(),
supports_autograd=False,
supports_rhs_python_scalar=False),
BinaryUfuncInfo('gcd',
ref=np.gcd,
dtypes=integral_types_and(),
supports_autograd=False,
supports_rhs_python_scalar=False,
skips=(
DecorateInfo(unittest.expectedFailure,
'TestBinaryUfuncs',
'test_reference_numerics_small_values',
dtypes=(torch.int8,)),)),
BinaryUfuncInfo('isclose',
ref=np.isclose,
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
sample_inputs_func=sample_inputs_isclose,
error_inputs_func=error_inputs_isclose,
supports_autograd=False,
supports_out=False,
supports_rhs_python_scalar=False,
skips=(
DecorateInfo(unittest.expectedFailure,
'TestCommon',
'test_numpy_refs', dtypes=(torch.complex128,)),
# RuntimeError: Short did not match Int
DecorateInfo(unittest.expectedFailure,
'TestBinaryUfuncs',
'test_type_promotion'),
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_binary_ufuncs_mixed_dtype'),
DecorateInfo(unittest.skip("Skipped!"),
'TestBinaryUfuncs',
'test_reference_numerics_extremal_values'),
)),
# `softmax` supports different dtypes based on whether `dtype` argument,
# is passed or not. Hence two OpInfo entries, one with dtype and other without.
# https://github.com/pytorch/pytorch/issues/68752
OpInfo('softmax',
aliases=('special.softmax', 'nn.functional.softmax',),
aten_name='softmax',
aten_backward_name='_softmax_backward_data',
dtypes=floating_types_and(torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
sample_inputs_func=sample_inputs_softmax_variant,
assert_jit_shape_analysis=True,
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=True),
OpInfo('softmax',
aliases=('special.softmax', 'nn.functional.softmax',),
variant_test_name="with_dtype",
aten_name='softmax',
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
sample_inputs_func=partial(sample_inputs_softmax_variant, with_dtype=True),
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=True),
OpInfo(
'_softmax_backward_data',
op=torch.ops.aten._softmax_backward_data,
aten_name='_softmax_backward_data',
dtypes=floating_types_and(torch.bfloat16, torch.float16),
sample_inputs_func=sample_inputs_softmax_backward_data,
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False,
skips=(
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit', dtypes=(torch.float32,)),
),
),
# `softmin` supports different dtypes based on whether `dtype` argument,
# is passed or not. Hence two OpInfo entries, one with dtype and other without.
# https://github.com/pytorch/pytorch/issues/68752
OpInfo('nn.functional.softmin',
aten_name='softmin',
dtypes=floating_types_and(torch.half, torch.bfloat16),
sample_inputs_func=sample_inputs_softmax_variant,
assert_jit_shape_analysis=False,
assert_autodiffed=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False),
OpInfo('nn.functional.softmin',
variant_test_name="with_dtype",
aten_name='softmin',
dtypes=all_types_and_complex_and(torch.float16, torch.bfloat16),
sample_inputs_func=partial(sample_inputs_softmax_variant, with_dtype=True),
assert_autodiffed=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False),
OpInfo(
"nn.functional.cross_entropy",
dtypes=floating_types_and(torch.float16, torch.bfloat16),
sample_inputs_func=sample_inputs_cross_entropy,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
decorators=(
DecorateInfo(
toleranceOverride({torch.float32: tol(atol=3e-3, rtol=1e-3)}),
"TestJit",
"test_variant_consistency_jit",
device_type="cpu",
),
),
skips=(
# AssertionError: False is not true : Scalars failed to compare as equal! 0 != 1536
# test_ops.TestJitCUDA.test_variant_consistency_jit_nn_functional_cross_entropy_cuda_float32 leaked
# 1536 bytes CUDA memory on device 0
DecorateInfo(
unittest.expectedFailure,
"TestJit",
"test_variant_consistency_jit",
device_type="cuda",
),
DecorateInfo(unittest.skip("FP16 corss_entropy cases have not been enabled on MPS yet"),
dtypes=(torch.half,), device_type="mps"),
)
),
OpInfo('nn.functional.normalize',
dtypes=floating_and_complex_types_and(torch.half, torch.bfloat16),
sample_inputs_func=sample_inputs_normalize,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True),
OpInfo('aminmax',
ref=lambda x, dim=None, keepdim=False: (np.amin(x, axis=dim, keepdims=keepdim), np.amax(x, axis=dim, keepdims=keepdim)),
dtypes=all_types_and(torch.bool, torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32, torch.int8),
decorators=(onlyNativeDeviceTypes,),
supports_autograd=False,
sample_inputs_func=sample_inputs_aminmax,
error_inputs_func=error_inputs_aminmax_amax_amin),
OpInfo('as_strided',
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32, torch.int8, torch.bool),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# vmap does not support inplace views
check_inplace_batched_forward_grad=False,
sample_inputs_func=sample_inputs_as_strided,
skips=(
# Note: This xfail is fine -- it's inherent to how as_strided works
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_noncontiguous_samples'),
# AssertionError: False is not true : Scalars failed to compare as equal!
DecorateInfo(unittest.skip("Errors when storage_offset is included"),
'TestCommon', 'test_variant_consistency_eager'),
# Not close
DecorateInfo(unittest.skip("Errors when storage_offset is included"),
'TestCommon', 'test_complex_half_reference_testing'),
# Not close
DecorateInfo(unittest.skip("Errors when storage_offset is included"), 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.skip("Errors when storage_offset is included"), 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.skip("Numerous errors"), 'TestFwdGradients'),
DecorateInfo(unittest.skip("Numerous errors"), 'TestBwdGradients'),
)),
OpInfo('as_strided',
variant_test_name='partial_views',
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.int32, torch.int8, torch.bool),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# vmap does not support inplace views
check_inplace_batched_forward_grad=False,
sample_inputs_func=sample_inputs_as_strided_partial_views,
skips=(
# Note: This xfail is fine -- it's inherent to how as_strided works
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_noncontiguous_samples'),
# These fail because the test changes the input's in-memory layout
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_complex_half_reference_testing'),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_compare_cpu'),
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.expectedFailure, 'TestFwdGradients', 'test_fn_fwgrad_bwgrad',
dtypes=(torch.complex64, torch.complex128)),
DecorateInfo(unittest.expectedFailure, 'TestFwdGradients', 'test_forward_mode_AD'),
DecorateInfo(unittest.expectedFailure, 'TestFwdGradients', 'test_inplace_forward_mode_AD'),
DecorateInfo(unittest.expectedFailure, 'TestBwdGradients', 'test_inplace_grad'),
DecorateInfo(unittest.expectedFailure, 'TestBwdGradients', 'test_inplace_gradgrad'),
DecorateInfo(unittest.expectedFailure, 'TestProxyTensorOpInfo',
'test_make_fx_symbolic_exhaustive_inplace'),
DecorateInfo(unittest.expectedFailure, 'TestNNCOpInfo', 'test_nnc_correctness'),
# Fail but are also flaky
DecorateInfo(unittest.skip("Test changes in memory layout"), 'TestMathBits'),
DecorateInfo(unittest.skip("Modifies input strides and storage_offset"), 'TestCommon',
'test_non_standard_bool_values'),
# RuntimeError: setStorage: sizes [2, 2], strides [1, 2], storage offset 10, and itemsize 2 requiring a
# storage size of 28 are out of bounds for storage of size 20
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_meta_inplace'),
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_dispatch_meta_inplace'),
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_dispatch_symbolic_meta_inplace'),
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_dispatch_symbolic_meta_inplace_all_strides'),
)),
OpInfo('as_strided_copy',
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
supports_out=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# vmap does not support inplace views
check_inplace_batched_forward_grad=False,
sample_inputs_func=sample_inputs_as_strided,
skips=(
# Note: This xfail is fine -- it's inherent to how as_strided works
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_noncontiguous_samples'),
# AssertionError: False is not true : Scalars failed to compare as equal!
DecorateInfo(unittest.skip("Errors when storage_offset is included"),
'TestCommon', 'test_variant_consistency_eager'),
# Not close
DecorateInfo(unittest.skip("Errors when storage_offset is included"),
'TestCommon', 'test_complex_half_reference_testing'),
# Not close
DecorateInfo(unittest.skip("Errors when storage_offset is included"), 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.skip("Errors when storage_offset is included"), 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.skip("Numerous errors"), 'TestFwdGradients'),
DecorateInfo(unittest.skip("Numerous errors"), 'TestBwdGradients'),
DecorateInfo(unittest.expectedFailure, 'TestDTensorOps', 'test_dtensor_op_db'),
)),
OpInfo('as_strided_scatter',
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# vmap does not support inplace views
check_inplace_batched_forward_grad=False,
sample_inputs_func=sample_inputs_as_strided_scatter,
error_inputs_func=error_inputs_as_strided_scatter,
skips=(
DecorateInfo(unittest.skip('Works for int64, fails for everything else'), 'TestCommon', 'test_noncontiguous_samples'), # noqa: B950
DecorateInfo(unittest.skip('Fails in most cases, passes on LAZY for some reason'), 'TestCommon', 'test_variant_consistency_eager'), # noqa: B950
DecorateInfo(unittest.skip('Fails on cuda + rocm'), 'TestCommon', 'test_complex_half_reference_testing'),
DecorateInfo(unittest.expectedFailure, 'TestBwdGradients', 'test_fn_grad'),
DecorateInfo(unittest.expectedFailure, 'TestFwdGradients', 'test_forward_mode_AD'),
DecorateInfo(unittest.skip('Passes on complex128 and float64 only'), 'TestFwdGradients', 'test_fn_fwgrad_bwgrad'),
# AssertionError: Tensor-likes are not close! (new_empty_strided.default)
DecorateInfo(unittest.skip("Expected: new_empty_strided is not comparable"), 'TestDecomp', 'test_comprehensive'),)),
OpInfo('native_layer_norm',
aten_name='native_layer_norm',
ref=reference_native_layer_norm,
dtypes=floating_types_and(torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_out=False,
assert_jit_shape_analysis=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_native_layer_norm,
error_inputs_func=error_inputs_native_layer_norm,
skips=(
# IndexError: tuple index out of range
DecorateInfo(unittest.skip('Skipped!'), 'TestFwdGradients', 'test_forward_mode_AD'),
# Tests fail when weight=None and bias is defined
# https://github.com/pytorch/pytorch/issues/79705
DecorateInfo(unittest.expectedFailure, 'TestBwdGradients', 'test_fn_gradgrad'),
# JIT test also tries to compute double backward, which fails
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.skip("Unsupported on MPS for now"), 'TestCommon', 'test_numpy_ref_mps'),
DecorateInfo(toleranceOverride({torch.float32: tol(atol=2e-03, rtol=5e-03)}),
"TestDecomp", "test_comprehensive", device_type="cpu"),
)),
OpInfo('native_batch_norm',
aten_name='native_batch_norm',
dtypes=floating_types_and(torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_jit_shape_analysis=True,
allow_cow_input_materialize_forward=[3, 4],
allow_cow_input_materialize_backward=[3, 4],
sample_inputs_func=sample_inputs_native_batch_norm,
skips=(
# NotImplementedError: Could not run
# 'aten::native_batch_norm.out' with arguments from the 'CPU' backend.
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning', device_type="cpu"),
# RuntimeError: out_invstd.dim() == 1 && out_invstd.is_contiguous() && out_invstd.sizes()[0]
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out', device_type="cuda"),
# Problem with _get_numerical_jacobian
# IndexError: tuple index out of range
DecorateInfo(unittest.skip("Skipped!"), 'TestFwdGradients', 'test_forward_mode_AD'),
# RuntimeError: deepEquals(input.iValue, deepCopiedInput) INTERNAL ASSERT FAILED
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# https://github.com/pytorch/pytorch/issues/85960
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_compare_cpu'),
# AssertionError: Booleans mismatch: True is not False
DecorateInfo(unittest.skip("Skipped!"), 'TestFakeTensor', 'test_fake_autocast'),
DecorateInfo(unittest.skip("Skipped!"), 'TestFakeTensor', 'test_fake'),
DecorateInfo(toleranceOverride({torch.float32: tol(atol=5e-5, rtol=5e-5)}),
"TestCompositeCompliance", "test_forward_ad"),
)
),
OpInfo('_native_batch_norm_legit',
aten_name='_native_batch_norm_legit',
dtypes=floating_types_and(torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_jit_shape_analysis=True,
allow_cow_input_materialize_forward=[3, 4],
allow_cow_input_materialize_backward=[3, 4],
sample_inputs_func=sample_inputs__native_batch_norm_legit,
skips=(
# NotImplementedError: Could not run
# 'aten::native_batch_norm.out' with arguments from the 'CPU' backend.
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning', device_type="cpu"),
# RuntimeError: out_invstd.dim() == 1 && out_invstd.is_contiguous() && out_invstd.sizes()[0]
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out', device_type="cuda"),
# Problem with _get_numerical_jacobian
# IndexError: tuple index out of range
DecorateInfo(unittest.skip("Skipped!"), 'TestFwdGradients', 'test_forward_mode_AD'),
# RuntimeError: deepEquals(input.iValue, deepCopiedInput) INTERNAL ASSERT FAILED
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# https://github.com/pytorch/pytorch/issues/85960
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_compare_cpu'),
DecorateInfo(toleranceOverride({torch.float32: tol(atol=5e-5, rtol=5e-5)}),
"TestCompositeCompliance", "test_forward_ad"),
)
),
OpInfo('_batch_norm_with_update',
op=torch.ops.aten._batch_norm_with_update,
aten_name='_batch_norm_with_update',
dtypes=floating_types_and(torch.float16, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_jit_shape_analysis=True,
allow_cow_input_materialize_forward=[3, 4],
allow_cow_input_materialize_backward=[3, 4],
sample_inputs_func=sample_inputs__batch_norm_with_update,
skips=(
# NotImplementedError: Could not run
# 'aten::native_batch_norm.out' with arguments from the 'CPU' backend.
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning', device_type="cpu"),
# RuntimeError: out_invstd.dim() == 1 && out_invstd.is_contiguous() && out_invstd.sizes()[0]
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out', device_type="cuda"),
# Problem with _get_numerical_jacobian
# IndexError: tuple index out of range
DecorateInfo(unittest.skip("Skipped!"), 'TestFwdGradients', 'test_forward_mode_AD'),
# RuntimeError: deepEquals(input.iValue, deepCopiedInput) INTERNAL ASSERT FAILED
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(toleranceOverride({torch.float32: tol(atol=5e-5, rtol=5e-5)}),
"TestCompositeCompliance", "test_forward_ad"),
# _batch_norm_with_update expects contiguous inputs for cudnn and miopen
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_noncontiguous_samples', device_type="cuda"),
DecorateInfo(unittest.expectedFailure,
'TestMeta', 'test_dispatch_symbolic_meta_outplace_all_strides', device_type="cuda"),
# _batch_norm_with_update does not have python bindings
DecorateInfo(unittest.skip("Skipped!"), 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# aten out variants do not accept out= kwarg, only python out variants
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out'),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'),
)
),
OpInfo('nn.functional.cosine_similarity',
aten_name="cosine_similarity",
dtypes=floating_types_and(torch.half, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
decorators=[
DecorateInfo(
toleranceOverride({torch.float16: tol(atol=1.3e-5, rtol=2e-2)}),
"TestInductorOpInfo",
"test_comprehensive",
device_type="cuda"
),
],
sample_inputs_func=sample_inputs_cosine_similarity),
OpInfo('nn.functional.adaptive_avg_pool1d',
dtypes=floating_types_and(torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.float16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
error_inputs_func=error_inputs_adaptive_avg_pool1d,
sample_inputs_func=sample_inputs_adaptive_avg_pool1d),
OpInfo('nn.functional.adaptive_avg_pool2d',
dtypes=floating_types_and(torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.float16),
decorators=(
# RuntimeError:
# adaptive_avg_pool2d(Tensor input, int[2] output_size) -> (Tensor):
# Expected a value of type 'List[int]' for argument 'output_size' but
# instead found type 'Tuple[NoneType, int]'. :
# File "<string>", line 3
# def the_method(i0):
# return torch.nn.functional.adaptive_avg_pool2d(i0, (None, 7))
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
error_inputs_func=error_inputs_adaptive_avg_pool2d,
sample_inputs_func=sample_inputs_adaptive_avg_pool2d),
OpInfo('nn.functional.adaptive_avg_pool3d',
dtypes=floating_types_and(torch.half, torch.bfloat16),
dtypesIfCUDA=floating_types_and(torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.float16),
decorators=(
# RuntimeError:
# adaptive_avg_pool3d(Tensor input, int[3] output_size) -> (Tensor):
# Expected a value of type 'List[int]' for argument 'output_size' but
# instead found type 'Tuple[NoneType, NoneType, NoneType]'. :
# File "<string>", line 3
#
# def the_method(i0):
# return torch.nn.functional.adaptive_avg_pool3d(i0, (None, None, None))
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
#
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
),
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
error_inputs_func=error_inputs_adaptive_avg_pool3d,
sample_inputs_func=sample_inputs_adaptive_avg_pool3d),
OpInfo('nn.functional.adaptive_max_pool1d',
dtypes=floating_types_and(torch.half, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# got: Batching rule not implemented for aten::flatten.using_ints
check_batched_forward_grad=False,
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
error_inputs_func=error_inputs_adaptive_max_pool1d,
sample_inputs_func=sample_inputs_adaptive_max_pool1d),
OpInfo('nn.functional.adaptive_max_pool2d',
dtypes=floating_types_and(torch.half, torch.bfloat16),
decorators=(
# RuntimeError:
# adaptive_max_pool2d(Tensor input, int[2] output_size) -> (Tensor):
# Expected a value of type 'List[int]' for argument 'output_size' but
# instead found type 'Tuple[NoneType, int]'. :
# File "<string>", line 3
# def the_method(i0):
# return torch.nn.functional.adaptive_max_pool2d(i0, (None, 7))
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# got: Batching rule not implemented for aten::flatten.using_ints
check_batched_forward_grad=False,
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
error_inputs_func=error_inputs_adaptive_max_pool2d,
sample_inputs_func=sample_inputs_adaptive_max_pool2d),
OpInfo('nn.functional.adaptive_max_pool3d',
dtypes=floating_types_and(torch.bfloat16, torch.half),
decorators=(
# RuntimeError:
# adaptive_max_pool3d(Tensor input, int[3] output_size) -> (Tensor):
# Expected a value of type 'List[int]' for argument 'output_size' but
# instead found type 'Tuple[NoneType, NoneType, NoneType]'. :
# File "<string>", line 3
#
# def the_method(i0):
# return torch.nn.functional.adaptive_max_pool3d(i0, (None, None, None))
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
#
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# got: Batching rule not implemented for aten::flatten.using_ints
check_batched_forward_grad=False,
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
error_inputs_func=error_inputs_adaptive_max_pool3d,
sample_inputs_func=sample_inputs_adaptive_max_pool3d),
OpInfo('nn.functional.avg_pool1d',
aten_name='avg_pool1d',
supports_autograd=True,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
dtypes=floating_types_and(torch.int64, torch.float16, torch.bfloat16),
dtypesIfCUDA=floating_types_and(torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.float16),
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
error_inputs_func=error_inputs_avg_pool1d,
sample_inputs_func=sample_inputs_avgpool1d),
OpInfo('nn.functional.avg_pool3d',
aten_name='avg_pool3d',
supports_autograd=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
dtypes=floating_types_and(torch.int64),
dtypesIfCUDA=floating_types_and(torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.float16),
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
error_inputs_func=error_inputs_avg_pool3d,
sample_inputs_func=sample_inputs_avgpool3d,
skips=(
# AssertionError: Tensor-likes are not close!
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out', device_type='cpu'),
)),
OpInfo(
"nn.functional.binary_cross_entropy_with_logits",
aten_name="binary_cross_entropy_with_logits",
supports_autograd=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False,
dtypes=floating_types_and(torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.float16),
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
sample_inputs_func=sample_inputs_binary_cross_entropy_with_logits,
skips=(
DecorateInfo(
unittest.skip("Skipped!"),
'TestJit',
'test_variant_consistency_jit',
dtypes=(torch.float32,)
),
DecorateInfo(toleranceOverride({torch.float32: tol(atol=2e-5, rtol=3e-6)}),
"TestConsistency", "test_output_match", device_type="mps"),
),
),
UnaryUfuncInfo(
'nn.functional.relu',
aten_name="relu",
ref=lambda a: np.where(a <= 0, 0, a),
supports_autograd=True,
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
dtypes=all_types_and(torch.half, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16, torch.float16),
sample_inputs_func=sample_inputs_nn_activation_relu,
supports_out=False,
supports_fwgrad_bwgrad=True,
supports_forward_ad=True),
OpInfo('nn.functional.conv_transpose1d',
# `ref` for this function is backward of
# corresponding `conv*d`
ref=partial(conv_transpose_ref, fn=torch.nn.functional.conv_transpose1d),
aten_name='conv_transpose1d',
aliases=('conv_transpose1d',),
dtypes=floating_and_complex_types_and(torch.int64, torch.float16, torch.bfloat16),
dtypesIfCUDA=floating_and_complex_types_and(torch.float16, torch.chalf,
torch.bfloat16),
sample_inputs_func=sample_inputs_conv_transpose1d,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_jit_shape_analysis=True,
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
decorators=(
DecorateInfo(
toleranceOverride({torch.float32: tol(atol=1e-04, rtol=1.3e-06), }),
'TestCommon', 'test_variant_consistency_eager', device_type='cuda'),
DecorateInfo(
toleranceOverride({torch.chalf: tol(atol=5e-2, rtol=5e-2), }),
'TestCommon', 'test_complex_half_reference_testing'),
DecorateInfo(
toleranceOverride({torch.float: tol(atol=1.5e-5, rtol=1.5e-5), }),
'TestCommon', 'test_numpy_ref_mps'),
DecorateInfo(
toleranceOverride({torch.half: tol(atol=1e-3, rtol=5e-3), }),
'TestInductorOpInfo', 'test_comprehensive', device_type='cpu'),
),
skips=(
# Reason for Skip: https://github.com/pytorch/pytorch/pull/79694#issuecomment-1186949486
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit',
dtypes=(torch.complex64,)),
# RuntimeError: UNSUPPORTED DTYPE: complex
DecorateInfo(unittest.expectedFailure, 'TestNNCOpInfo', 'test_nnc_correctness',
dtypes=(torch.complex64, torch.complex128)),
# RuntimeError: !lhs.isAliasOf(rhs)INTERNAL ASSERT FAILED at
# "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":104, please report a bug to PyTorch.
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit',
dtypes=(torch.float,)),
# RuntimeError: "slow_conv2d_cpu_grad_input" not implemented for 'Long'
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_numpy_ref',
dtypes=(torch.int64,)),
),
supports_out=False,),
OpInfo('nn.functional.conv_transpose2d',
aten_name='conv_transpose2d',
aliases=('conv_transpose2d',),
# `ref` for this function is backward of
# corresponding `conv*d`
ref=partial(conv_transpose_ref, fn=torch.nn.functional.conv_transpose2d),
dtypes=floating_and_complex_types_and(torch.int64, torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
dtypesIfCUDA=floating_and_complex_types_and(torch.float16, torch.chalf,
torch.bfloat16),
sample_inputs_func=sample_inputs_conv_transpose2d,
# Runs very slowly on slow-gradcheck for complex.
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_jit_shape_analysis=True,
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
decorators=[
DecorateInfo(
toleranceOverride({torch.float32: tol(atol=1e-04, rtol=1.3e-06), }),
'TestCommon', 'test_variant_consistency_eager', device_type='cuda'),
DecorateInfo(
toleranceOverride({torch.float32: tol(atol=2e-05, rtol=5e-05), }),
'TestCommon', 'test_noncontiguous_samples', device_type='cuda'),
DecorateInfo(
toleranceOverride({torch.chalf: tol(atol=8e-2, rtol=8e-2), }),
'TestCommon', 'test_complex_half_reference_testing'),
DecorateInfo(
toleranceOverride({torch.half: tol(atol=1e-3, rtol=4e-3), }),
'TestInductorOpInfo', 'test_comprehensive', device_type='cpu')],
skips=(
# RuntimeError: !lhs.isAliasOf(rhs)INTERNAL ASSERT FAILED at
# "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":104, please report a bug to PyTorch.
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit'),
# RuntimeError: UNSUPPORTED DTYPE: complex
DecorateInfo(unittest.expectedFailure, 'TestNNCOpInfo', 'test_nnc_correctness',
dtypes=(torch.complex64, torch.complex128)),
# RuntimeError: "slow_conv2d_cpu_grad_input" not implemented for 'Long'
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_numpy_ref',
dtypes=(torch.int64,)),
# Reference: https://github.com/pytorch/pytorch/issues/86356
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_numpy_ref',
dtypes=(torch.double, torch.cdouble)),
DecorateInfo(unittest.skip("Unsupported on MPS for now"), 'TestCommon', 'test_numpy_ref_mps'),
# AssertionError: None mismatch: torch.complex64 is not None
DecorateInfo(unittest.expectedFailure, 'TestDtypeCustomRules', 'test_custom_rules',
dtypes=(torch.complex64, torch.complex128)),
),
supports_out=False,),
OpInfo('nn.functional.conv_transpose3d',
aten_name='conv_transpose3d',
aliases=('conv_transpose3d',),
# `ref` for this function is backward of
# corresponding `conv*d`
ref=partial(conv_transpose_ref, fn=torch.nn.functional.conv_transpose3d),
dtypes=floating_and_complex_types_and(torch.int64, torch.float16, torch.bfloat16),
dtypesIfCUDA=floating_and_complex_types_and(
torch.float16, torch.chalf, torch.bfloat16),
sample_inputs_func=sample_inputs_conv_transpose3d,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_jit_shape_analysis=True,
# Runs very slowly on slow-gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
decorators=[
DecorateInfo(
toleranceOverride({torch.float16: tol(atol=5e-2, rtol=5e-2), }),
'TestInductorOpInfo', 'test_comprehensive', device_type='cuda'),
DecorateInfo(
toleranceOverride({torch.float32: tol(atol=1e-04, rtol=1.3e-06),
torch.complex64: tol(atol=1.3e-04, rtol=1.3e-05)}),
'TestCommon', 'test_variant_consistency_eager', device_type='cuda'),
DecorateInfo(
toleranceOverride({torch.float32: tol(atol=2e-04, rtol=2e-04), }),
'TestCompositeCompliance', 'test_operator', device_type='cuda'),
DecorateInfo(
toleranceOverride({torch.float32: tol(atol=1.3e-04, rtol=1.3e-06),
torch.complex64: tol(atol=1.3e-04, rtol=1.3e-05)}),
'TestCommon', 'test_noncontiguous_samples', device_type='cuda'),
DecorateInfo(
toleranceOverride({torch.float32: tol(atol=1e-04, rtol=2e-05), }),
'TestCompositeCompliance', 'test_forward_ad', device_type='cuda',
active_if=TEST_CUDNN),
DecorateInfo(
toleranceOverride({torch.complex64: tol(atol=1e-4, rtol=1e-4)}),
"TestMathBits", "test_conj_view", device_type='cuda'),
DecorateInfo(
toleranceOverride({torch.chalf: tol(atol=9e-2, rtol=9e-2), }),
'TestCommon', 'test_complex_half_reference_testing'),
DecorateInfo(
toleranceOverride({torch.half: tol(atol=9e-3, rtol=2e-1), }),
'TestInductorOpInfo', 'test_comprehensive', device_type='cpu')],
skips=(
# RuntimeError: !lhs.isAliasOf(rhs)INTERNAL ASSERT FAILED at
# "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":104, please report a bug to PyTorch.
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit'),
# RuntimeError: "slow_conv3d_cpu_grad_input" not implemented for 'Long'
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_numpy_ref',
dtypes=(torch.int64,)),
# Reference: https://github.com/pytorch/pytorch/issues/86356
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_numpy_ref',
dtypes=(torch.double, torch.cdouble)),
DecorateInfo(unittest.skip("Unsupported on MPS for now"), 'TestCommon', 'test_numpy_ref_mps'),
# RuntimeError: UNSUPPORTED DTYPE: complex
DecorateInfo(unittest.expectedFailure, 'TestNNCOpInfo', 'test_nnc_correctness',
dtypes=(torch.complex64, torch.complex128)),
DecorateInfo(unittest.skip('Skipped for ROCm!'), 'TestCommon', 'test_complex_half_reference_testing',
dtypes=[torch.complex32], active_if=TEST_WITH_ROCM),
),
supports_out=False,),
OpInfo('nn.functional.conv1d',
aliases=('conv1d',),
aten_name='conv1d',
dtypes=floating_and_complex_types_and(torch.int64, torch.float16, torch.bfloat16),
dtypesIfCUDA=floating_and_complex_types_and(torch.float16, torch.chalf,
torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
sample_inputs_func=sample_inputs_conv1d,
error_inputs_func=error_inputs_conv1d,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_jit_shape_analysis=True,
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
decorators=(
DecorateInfo(
toleranceOverride({torch.chalf: tol(atol=1e-2, rtol=5e-2)}),
'TestCommon', 'test_complex_half_reference_testing'
),
DecorateInfo(
toleranceOverride({torch.float16: tol(atol=2e-3, rtol=1e-3)}),
'TestInductorOpInfo', 'test_comprehensive', device_type='cuda',
),
),
skips=(
# RuntimeError: !lhs.isAliasOf(rhs)INTERNAL ASSERT FAILED at
# "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":103, please report a bug to PyTorch.
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit'),
# Ref: https://github.com/pytorch/pytorch/issues/75309
# AssertionError: None mismatch: torch.complex128 is not None
DecorateInfo(unittest.expectedFailure, 'TestDtypeCustomRules',
'test_custom_rules', dtypes=(torch.complex64, torch.complex128)),
# Ref: https://github.com/pytorch/pytorch/issues/75309
# RuntimeError: UNSUPPORTED DTYPE: complex
DecorateInfo(unittest.expectedFailure, 'TestNNCOpInfo',
'test_nnc_correctness', dtypes=(torch.complex64, torch.complex128)),
),
supports_expanded_weight=True,
supports_out=False,),
OpInfo('nn.functional.conv2d',
aliases=('conv2d',),
aten_name='conv2d',
dtypes=floating_and_complex_types_and(torch.int64, torch.float16, torch.bfloat16),
dtypesIfCUDA=floating_and_complex_types_and(torch.float16, torch.chalf,
torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
sample_inputs_func=partial(sample_inputs_conv2d),
error_inputs_func=error_inputs_conv2d,
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_jit_shape_analysis=True,
decorators=(
DecorateInfo(
toleranceOverride({torch.chalf: tol(atol=6e-2, rtol=5e-2)}),
'TestCommon', 'test_complex_half_reference_testing',
),
DecorateInfo(
toleranceOverride({torch.float16: tol(atol=5e-3, rtol=1e-3)}),
'TestInductorOpInfo', 'test_comprehensive',
),
),
skips=(
# RuntimeError: !lhs.isAliasOf(rhs)INTERNAL ASSERT FAILED at
# "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":103, please report a bug to PyTorch.
DecorateInfo(unittest.skip("Works on some configs!"), 'TestJit', 'test_variant_consistency_jit'),
# Ref: https://github.com/pytorch/pytorch/issues/75309
# AssertionError: None mismatch: torch.complex128 is not None
DecorateInfo(unittest.expectedFailure, 'TestDtypeCustomRules',
'test_custom_rules', dtypes=(torch.complex64, torch.complex128)),
# RuntimeError: UNSUPPORTED DTYPE: complex
DecorateInfo(unittest.expectedFailure, 'TestNNCOpInfo',
'test_nnc_correctness', dtypes=(torch.complex64, torch.complex128)),
),
supports_expanded_weight=True,
supports_out=False,),
OpInfo('nn.functional.conv3d',
aliases=('conv3d',),
aten_name='conv3d',
dtypes=floating_and_complex_types_and(torch.int64, torch.bfloat16, torch.float16),
dtypesIfCUDA=floating_and_complex_types_and(torch.float16, torch.chalf, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
sample_inputs_func=sample_inputs_conv3d,
error_inputs_func=error_inputs_conv3d,
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
decorators=(
DecorateInfo(
toleranceOverride({torch.chalf: tol(atol=6e-2, rtol=5e-2)}),
'TestCommon', 'test_complex_half_reference_testing',
),
# TF32
DecorateInfo(
toleranceOverride({torch.float32: tol(atol=5e-3, rtol=1e-3),
torch.complex64: tol(atol=5e-3, rtol=1e-3)}),
'TestCommon', 'test_noncontiguous_samples',
),
DecorateInfo(
toleranceOverride({torch.complex64: tol(atol=2e-5, rtol=3e-6)}),
'TestCommon', 'test_variant_consistency_eager',
),
DecorateInfo(
toleranceOverride({torch.complex64: tol(atol=5e-5, rtol=5e-6)}),
'TestMathBits', 'test_conj_view',
),
DecorateInfo(
toleranceOverride({torch.float32: tol(atol=5e-5, rtol=5e-6)}),
'TestOperators', 'test_vjpvmap',
),
DecorateInfo(
toleranceOverride({torch.float16: tol(atol=5e-3, rtol=1e-3)}),
'TestInductorOpInfo', 'test_comprehensive',
),
),
skips=(
# RuntimeError: !lhs.isAliasOf(rhs) INTERNAL ASSERT FAILED at
# "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":103, please report a bug to PyTorch.
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit'),
# RuntimeError: UNSUPPORTED DTYPE: complex
DecorateInfo(unittest.expectedFailure, 'TestNNCOpInfo',
'test_nnc_correctness', dtypes=(torch.complex64, torch.complex128)),
# AssertionError: Tensor-likes are not close!
# break slow tests
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_compare_cpu'),
),
supports_expanded_weight=True,
supports_out=False,),
OpInfo('nn.functional.group_norm',
aten_name='group_norm',
aliases=('group_norm',),
ref=reference_group_norm,
dtypes=floating_types_and(torch.float16, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
error_inputs_func=error_inputs_group_norm,
decorators=[
# RuntimeError: Cannot insert a Tensor that requires grad as a constant.
# Consider making it a parameter or input, or detaching the gradient
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit', dtypes=(torch.float32,)),
DecorateInfo(
toleranceOverride({torch.float32: tol(atol=5e-05, rtol=3e-03)}),
"TestDecomp",
"test_comprehensive",
device_type="cpu"
),
],
sample_inputs_func=sample_inputs_group_norm,
reference_inputs_func=reference_inputs_group_norm,
supports_expanded_weight=True,),
OpInfo('nn.functional.instance_norm',
# no ref because instance_norm will often have numerical instability (large numbers or nan)
dtypes=floating_types_and(torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
allow_cow_input_materialize_forward=['running_mean', 'running_var'],
decorators=[
# RuntimeError: Cannot insert a Tensor that requires grad as a constant.
# Consider making it a parameter or input, or detaching the gradient
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit', dtypes=(torch.float32,)),
],
sample_inputs_func=sample_inputs_instance_norm,
supports_expanded_weight=True,),
OpInfo('nn.functional.layer_norm',
aten_name='layer_norm',
aten_backward_name='layer_norm_backward',
aliases=('layer_norm',),
ref=reference_layer_norm,
dtypes=floating_types_and(torch.half, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_jit_shape_analysis=True,
decorators=[
DecorateInfo(
toleranceOverride({torch.float32: tol(atol=1e-05, rtol=1e-03)}),
'TestCommon', 'test_numpy_refs'
),
DecorateInfo(unittest.skip("Bug in MPS backend!"), 'TestCommon', 'test_numpy_ref_mps'),
],
sample_inputs_func=sample_inputs_layer_norm,
supports_expanded_weight=True,),
OpInfo('nn.functional.rms_norm',
aten_name='rms_norm',
aliases=('rms_norm',),
ref=reference_rms_norm,
dtypes=floating_and_complex_types_and(torch.half, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_rms_norm,
error_inputs_func=error_inputs_rms_norm,),
OpInfo('nn.functional.local_response_norm',
dtypes=floating_types_and(torch.int64, torch.float16, torch.bfloat16),
dtypesIfCUDA=floating_types_and(torch.float16, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
decorators=[
# RuntimeError: falseINTERNAL ASSERT FAILED at
# "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":185, please report a bug to PyTorch.
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit', dtypes=(torch.float32,)),
],
sample_inputs_func=sample_inputs_local_response_norm,),
OpInfo('constant_pad_nd',
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.half),
sample_inputs_func=sample_inputs_constant_pad_nd,
supports_out=False,
skips=(
# bool can't be passed to Scalar arguments in JIT tracer because
# BoolType is not a subtype of ScalarType.
DecorateInfo(
unittest.expectedFailure, 'TestNNCOpInfo',
'test_nnc_correctness', dtypes=(torch.bool,)),
)),
OpInfo('nn.functional.pad',
variant_test_name='constant',
aten_name='constant_pad_nd',
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.half),
sample_inputs_func=partial(sample_inputs_nn_pad, mode='constant'),
supports_out=False),
OpInfo('nn.functional.pad',
variant_test_name='reflect',
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
dtypes=all_types_and_complex_and(torch.bfloat16, torch.half),
sample_inputs_func=partial(sample_inputs_nn_pad, mode='reflect'),
skips=(
# Doesn't have a corresponding aten operator.
# RuntimeError: falseINTERNAL ASSERT FAILED at
# "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":185, please report a bug to PyTorch.
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit', dtypes=(torch.float32,)),
),
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
supports_out=False),
OpInfo('nn.functional.pad',
variant_test_name='replicate',
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
dtypes=all_types_and_complex_and(torch.half, torch.bfloat16),
sample_inputs_func=partial(sample_inputs_nn_pad, mode='replicate'),
skips=(
# Doesn't have a corresponding aten operator.
# RuntimeError: falseINTERNAL ASSERT FAILED at
# "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":185, please report a bug to PyTorch.
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit', dtypes=(torch.float32,)),
),
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
supports_out=False),
OpInfo('nn.functional.pad',
variant_test_name='replicate_negative',
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
dtypes=all_types_and_complex_and(torch.half, torch.bfloat16),
sample_inputs_func=sample_inputs_nn_pad_replicate_negative,
skips=(
# Doesn't have a corresponding aten operator.
# RuntimeError: falseINTERNAL ASSERT FAILED at
# "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":185, please report a bug to PyTorch.
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit', dtypes=(torch.float32,)),
# Some negative padding cases cause a segfault on MPS
DecorateInfo(unittest.skip("Not fully supported on MPS"), 'TestConsistency'),
),
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
supports_out=False),
OpInfo('nn.functional.pad',
variant_test_name='circular',
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.half),
sample_inputs_func=partial(sample_inputs_nn_pad, mode='circular'),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
check_batched_grad=False,
# https://github.com/pytorch/pytorch/issues/66357
check_batched_forward_grad=False,
skips=(
# Doesn't have a corresponding aten operator.
# RuntimeError: falseINTERNAL ASSERT FAILED at
# "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":185, please report a bug to PyTorch.
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit', dtypes=(torch.float32,)),
# Difference from <type> is larger with decomposition new_empty_strided.default than original on output 0
DecorateInfo(unittest.skip("Expected: new_empty_strided is not comparable"), 'TestDecomp', 'test_comprehensive'),
),
supports_out=False),
OpInfo('nn.functional.hardswish',
aten_name="hardswish",
aten_backward_name='hardswish_backward',
supports_autograd=True,
assert_autodiffed=True,
sample_inputs_func=sample_inputs_hardswish,
dtypes=floating_types_and(torch.bfloat16, torch.half),
supports_gradgrad=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False,
autodiff_nonfusible_nodes=["aten::hardswish"]),
OpInfo('nn.functional.unfold',
aten_name='im2col',
dtypes=floating_and_complex_types_and(torch.half, torch.bfloat16, torch.bool),
dtypesIfCUDA=floating_and_complex_types_and(torch.half, torch.bfloat16, torch.bool),
sample_inputs_func=sample_inputs_nn_unfold,
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False,
skips=(
# NOTE: this failure may not reproduce consistently on different systems
# false INTERNAL ASSERT FAILED at "...torch/csrc/jit/passes/utils/check_alias_annotation.cpp":185
DecorateInfo(unittest.skip("Internal assert failed!"), 'TestJit', 'test_variant_consistency_jit'),
# Compiler issue on ROCm. Regression started in ROCm 6.4.
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_non_standard_bool_values',
dtypes=[torch.bool], active_if=TEST_WITH_ROCM),
)),
OpInfo('nn.functional.interpolate',
aten_name="interpolate",
variant_test_name='nearest',
supports_autograd=True,
supports_fwgrad_bwgrad=True,
supports_forward_ad=True,
dtypes=floating_types_and(torch.uint8, torch.half, torch.bfloat16),
sample_inputs_func=partial(sample_inputs_interpolate, 'nearest'),
skips=(
# RuntimeError: false
# INTERNAL ASSERT FAILED at "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":185,
# please report a bug to PyTorch.
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
),
supports_out=False),
OpInfo('nn.functional.interpolate',
aten_name="interpolate",
variant_test_name='nearest-exact',
supports_autograd=True,
supports_fwgrad_bwgrad=True,
supports_forward_ad=True,
dtypes=floating_types_and(torch.half, torch.bfloat16, torch.uint8),
sample_inputs_func=partial(sample_inputs_interpolate, 'nearest-exact'),
skips=(
# RuntimeError: false
# INTERNAL ASSERT FAILED at "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":185,
# please report a bug to PyTorch.
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# RuntimeError: aten::_upsample_nearest_exact*d hit the vmap fallback which is currently disabled
DecorateInfo(unittest.expectedFailure, 'TestOperators', 'test_vmapjvpall_has_batch_rule'),
DecorateInfo(unittest.expectedFailure, 'TestOperators', 'test_vmapvjp_has_batch_rule'),
DecorateInfo(unittest.expectedFailure, 'TestVmapOperatorsOpInfo', 'test_op_has_batch_rule'),
# NotImplementedError: The operator 'aten::_upsample_nearest_exact3d.out' is not currently implemented
# for the MPS device.
DecorateInfo(unittest.expectedFailure, 'TestConsistency'),
),
supports_out=False),
OpInfo('nn.functional.interpolate',
aten_name="interpolate",
variant_test_name='linear',
supports_autograd=True,
supports_fwgrad_bwgrad=True,
supports_forward_ad=True,
dtypes=floating_types_and(torch.half, torch.bfloat16),
sample_inputs_func=partial(sample_inputs_interpolate, 'linear'),
skips=(
# RuntimeError: false
# INTERNAL ASSERT FAILED at "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":185,
# please report a bug to PyTorch.
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
),
supports_out=False),
OpInfo('nn.functional.interpolate',
aten_name="interpolate",
variant_test_name='bilinear',
supports_fwgrad_bwgrad=True,
supports_autograd=True,
supports_forward_ad=True,
dtypes=floating_types_and(torch.uint8, torch.half, torch.bfloat16),
dtypesIfCUDA=floating_types_and(torch.half, torch.bfloat16),
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
sample_inputs_func=partial(sample_inputs_interpolate, 'bilinear'),
reference_inputs_func=partial(reference_inputs_interpolate, 'bilinear'),
skips=(
# RuntimeError: false
# INTERNAL ASSERT FAILED at "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":185,
# please report a bug to PyTorch.
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
),
supports_out=False),
OpInfo('nn.functional.interpolate',
aten_name="interpolate",
variant_test_name='bicubic',
supports_autograd=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
dtypes=floating_types_and(torch.uint8, torch.half, torch.bfloat16),
dtypesIfCUDA=floating_types_and(torch.half, torch.bfloat16),
sample_inputs_func=partial(sample_inputs_interpolate, 'bicubic'),
reference_inputs_func=partial(reference_inputs_interpolate, 'bicubic'),
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
skips=(
# RuntimeError: false
# INTERNAL ASSERT FAILED at "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":185,
# please report a bug to PyTorch.
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
),
supports_out=False),
OpInfo('nn.functional.interpolate',
aten_name="interpolate",
variant_test_name='trilinear',
supports_autograd=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
dtypes=floating_types_and(torch.half, torch.bfloat16),
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
sample_inputs_func=partial(sample_inputs_interpolate, 'trilinear'),
skips=(
# RuntimeError: false
# INTERNAL ASSERT FAILED at "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":185,
# please report a bug to PyTorch.
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
),
supports_out=False),
OpInfo('nn.functional.interpolate',
aten_name="interpolate",
variant_test_name='area',
supports_autograd=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
dtypes=floating_types_and(torch.half, torch.bfloat16),
dtypesIfCUDA=floating_types_and(torch.half, torch.bfloat16),
sample_inputs_func=partial(sample_inputs_interpolate, 'area'),
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
skips=(
# RuntimeError: false
# INTERNAL ASSERT FAILED at "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":185,
# please report a bug to PyTorch.
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
),
supports_out=False),
OpInfo('nn.functional.upsample_bilinear',
supports_autograd=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
dtypes=floating_types_and(torch.uint8, torch.half, torch.bfloat16),
dtypesIfCUDA=floating_types_and(torch.half, torch.bfloat16),
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
sample_inputs_func=partial(sample_inputs_upsample, 'bilinear'),
reference_inputs_func=partial(reference_inputs_upsample, 'bilinear'),
skips=(
# RuntimeError: false
# INTERNAL ASSERT FAILED at "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":185,
# please report a bug to PyTorch.
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
),
supports_out=False),
OpInfo('_upsample_bilinear2d_aa',
op=torch.ops.aten._upsample_bilinear2d_aa,
aten_name='_upsample_bilinear2d_aa',
supports_autograd=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
dtypes=floating_types_and(torch.uint8),
dtypesIfCUDA=floating_types_and(torch.half, torch.bfloat16),
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
sample_inputs_func=partial(sample_inputs_upsample_aa, 'bilinear'),
supports_out=False,
skips=(
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.expectedFailure, 'TestDTensorOps', 'test_dtensor_op_db'),
DecorateInfo(unittest.expectedFailure, 'TestInductorOpInfo', 'test_comprehensive'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
)),
OpInfo(
"nn.functional.soft_margin_loss",
dtypes=floating_types_and(torch.half, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
# doesn't support grad on target
sample_inputs_func=partial(sample_inputs_loss, rhs_requires_grad=False),
error_inputs_func=error_inputs_soft_margin_loss,
),
OpInfo('nn.functional.upsample_nearest',
supports_autograd=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
dtypes=floating_types_and(torch.uint8, torch.half, torch.bfloat16),
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
sample_inputs_func=partial(sample_inputs_upsample, 'nearest'),
skips=(
# RuntimeError: false
# INTERNAL ASSERT FAILED at "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":185,
# please report a bug to PyTorch.
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
),
supports_out=False),
OpInfo(
"nn.functional.margin_ranking_loss",
dtypes=all_types_and(torch.half, torch.bfloat16),
supports_out=False,
sample_inputs_func=sample_inputs_margin_ranking_loss,
error_inputs_func=error_inputs_margin_ranking_loss,
reference_inputs_func=reference_inputs_margin_ranking_loss,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True),
OpInfo(
"nn.functional.multi_margin_loss",
dtypes=floating_types(),
dtypesIfCUDA=floating_types_and(torch.bfloat16, torch.float16),
supports_out=False,
supports_gradgrad=False,
sample_inputs_func=sample_inputs_multi_margin_loss,
reference_inputs_func=reference_inputs_multi_margin_loss,
error_inputs_func=error_inputs_multi_margin_loss,
decorators=(
DecorateInfo(
toleranceOverride({torch.float32: tol(atol=1e-4, rtol=1e-4)}),
"TestJit",
"test_variant_consistency_jit",
),
),
),
OpInfo(
"nn.functional.multilabel_margin_loss",
dtypes=floating_types(),
dtypesIfCUDA=floating_types_and(torch.bfloat16, torch.float16),
supports_out=False,
supports_gradgrad=False,
sample_inputs_func=sample_inputs_multilabel_margin_loss,
reference_inputs_func=reference_inputs_multilabel_margin_loss,
error_inputs_func=error_inputs_multilabel_margin_loss,
),
OpInfo('nn.functional.leaky_relu',
aliases=None,
aten_name="leaky_relu",
aten_backward_name='leaky_relu_backward',
sample_inputs_func=sample_inputs_leaky_relu,
dtypes=floating_types_and(torch.bfloat16, torch.float16),
inplace_variant=lambda x, negative_slope=0.01:
torch.nn.functional.leaky_relu(x, negative_slope, inplace=True),
supports_autograd=True,
assert_autodiffed=True,
supports_gradgrad=True,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
autodiff_nonfusible_nodes=["aten::leaky_relu"]),
OpInfo(
"nn.functional.multilabel_soft_margin_loss",
supports_out=False,
dtypes=floating_types_and(torch.half, torch.bfloat16),
sample_inputs_func=sample_inputs_multilabel_soft_margin_loss,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
decorators=(
DecorateInfo(
toleranceOverride({torch.float32: tol(atol=1e-4, rtol=1e-4)}),
"TestJit",
"test_variant_consistency_jit",
),
DecorateInfo(
toleranceOverride({torch.float16: tol(atol=4e-3, rtol=1.3e-3)}),
"TestInductorOpInfo",
"test_comprehensive",
device_type="cuda"
),
),
skips=(
# AssertionError: False is not true : Scalars failed to compare as equal! 0 != 4096
# __main__.TestJitCUDA.test_variant_consistency_jit_nn_functional_multilabel_soft_margin_loss_cuda_float32
# leaked 4096 bytes CUDA memory on device 0
DecorateInfo(
# Skip instead of expectedFailure because this fails
# locally for me but passes in CI.
unittest.skip("Skipped!"),
"TestJit",
"test_variant_consistency_jit",
device_type="cuda",
),
),
),
OpInfo('nn.functional.avg_pool2d',
aten_name='avg_pool2d',
supports_autograd=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
dtypes=floating_types_and(torch.int64, torch.float16, torch.bfloat16),
dtypesIfCUDA=floating_types_and(torch.float16, torch.bfloat16),
error_inputs_func=error_inputs_avg_pool2d,
sample_inputs_func=sample_inputs_avgpool2d,
skips=(
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out', device_type='cuda'),
)),
OpInfo('nn.functional.fractional_max_pool2d',
supports_autograd=True,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
op=lambda input, *args, **kwargs:
wrapper_set_seed(torch.nn.functional.fractional_max_pool2d, input, *args, **kwargs),
# vmap does not support random operations
check_batched_forward_grad=False,
dtypes=floating_types_and(torch.bfloat16, torch.float16),
test_neg_view=False,
sample_inputs_func=sample_inputs_fractional_max_pool2d,
decorators=(
# FIXME: AssertionError: False is not true : Tensors failed to compare as equal!
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# RuntimeError: input->type()->kind() == TypeKind::OptionalType
# INTERNAL ASSERT FAILED at "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":270
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit')),
skips=(
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),)),
OpInfo('nn.functional.fractional_max_pool3d',
supports_autograd=True,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
op=lambda input, *args, **kwargs:
wrapper_set_seed(torch.nn.functional.fractional_max_pool3d, input, *args, **kwargs),
# vmap does not support random operations
check_batched_forward_grad=False,
dtypes=floating_types_and(torch.bfloat16, torch.float16),
test_neg_view=False,
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
sample_inputs_func=sample_inputs_fractional_max_pool3d,
decorators=(
# FIXME: both derivatives are implemented incorrectly
# https://github.com/pytorch/pytorch/issues/69322
# FIXME: AssertionError: False is not true : Tensors failed to compare as equal!
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# RuntimeError: input->type()->kind() == TypeKind::OptionalType
# INTERNAL ASSERT FAILED at "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":270
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit')),
skips=(
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),)),
OpInfo('nn.functional.max_pool1d',
aten_name='max_pool1d',
supports_autograd=True,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# got: Batching rule not implemented for aten::flatten.using_ints
check_batched_forward_grad=False,
# TODO: add shape checks
assert_jit_shape_analysis=False,
dtypes=floating_types_and(torch.bfloat16, torch.float16),
dtypesIfCUDA=floating_types_and(torch.float16, torch.bfloat16),
skips=(
# Pre-existing condition; Needs to be fixed
DecorateInfo(unittest.skip("Works on some configs"), 'TestNNCOpInfo',
'test_nnc_correctness', dtypes=(torch.bfloat16,)),
# RuntimeError: The tensor has a non-zero number of elements, but its data is not allocated yet.
# Caffe2 uses a lazy allocation, so you will need to call mutable_data() or raw_mutable_data()
# to actually allocate memory
DecorateInfo(unittest.skip("Skipped!"), 'TestTags', 'test_tags'),
),
error_inputs_func=error_inputs_max_pool1d,
sample_inputs_func=sample_inputs_max_pool),
OpInfo('nn.functional.max_pool2d',
aten_name='max_pool2d',
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
# Vmap is not happy with non-contiguous (channels_last) inputs
check_batched_gradgrad=False,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# got: Batching rule not implemented for aten::flatten.using_ints
check_batched_forward_grad=False,
assert_jit_shape_analysis=True,
dtypes=all_types_and(torch.float16, torch.bfloat16),
dtypesIfCUDA=floating_types_and(torch.float16, torch.bfloat16),
error_inputs_func=error_inputs_max_pool2d,
sample_inputs_func=sample_inputs_max_pool),
OpInfo('max_pool2d_with_indices_backward',
op=max_pool2d_backward,
# We've defined a custom op, so there's no corresponding aten op
aten_name=None,
method_variant=None,
inplace_variant=None,
operator_variant=None,
inplace_operator_variant=None,
check_batched_gradgrad=False,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
check_batched_forward_grad=False,
assert_jit_shape_analysis=False,
dtypes=floating_types_and(torch.bfloat16, torch.float16),
sample_inputs_func=sample_inputs_max_pool,
skips=(
# We've defined a custom op here, and we don't handle the case where we receive an out kwarg
DecorateInfo(unittest.skip("Skipped!"), "TestCommon", "test_out"),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'),
# FX failed to normalize op - add the op to the op_skip list.
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# object has no attribute max_pool2d_with_indices_backward (It's not available on torch -- so expected)
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit')
)),
OpInfo('nn.functional.max_pool3d',
aten_name='max_pool3d',
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# got: Batching rule not implemented for aten::flatten.using_ints
check_batched_forward_grad=False,
# TODO: add shape checks
assert_jit_shape_analysis=False,
dtypes=all_types_and(torch.bfloat16, torch.float16),
dtypesIfCUDA=floating_types_and(torch.float16, torch.bfloat16),
# TODO: investigate nondeterminism
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
error_inputs_func=error_inputs_max_pool3d,
sample_inputs_func=sample_inputs_max_pool),
OpInfo('nn.functional.max_unpool1d',
aten_name='max_unpool1d',
supports_autograd=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False,
assert_jit_shape_analysis=False,
dtypes=floating_types_and(torch.float16, torch.bfloat16),
sample_inputs_func=sample_inputs_max_unpool,
skips=(
# Gradients are tested in `variant_test_name=grad` below.
# We skip tests here because there is non-determinism in backward
# with gather, when there are writes into the same memory location,
# and if there are several indices pointing to the same memory,
# gradcheck is oblivious about that and cannot perturb them all at once
# (see sample_inputs_max_unpool_grad to find out more).
DecorateInfo(unittest.skip("Skipped!"), 'TestBwdGradients', 'test_fn_grad'),
DecorateInfo(unittest.skip("Skipped!"), 'TestBwdGradients', 'test_fn_gradgrad'),
DecorateInfo(unittest.skip("Skipped!"), 'TestFwdGradients', 'test_forward_mode_AD',
active_if=(not IS_MACOS)),
DecorateInfo(unittest.skip("Skipped!"), 'TestCompositeCompliance', 'test_forward_ad',
device_type='cpu'),
DecorateInfo(unittest.skip("Skipped!"), 'TestDecomp', 'test_quick_core_backward'),
)),
OpInfo('nn.functional.max_unpool1d',
variant_test_name='grad',
aten_name='max_unpool1d',
supports_autograd=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False,
assert_jit_shape_analysis=False,
dtypes=floating_types_and(torch.float16, torch.bfloat16),
sample_inputs_func=sample_inputs_max_unpool_grad),
OpInfo('nn.functional.max_unpool2d',
aten_name='max_unpool2d',
supports_autograd=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False,
assert_jit_shape_analysis=False,
dtypes=floating_types_and(torch.float16, torch.bfloat16),
sample_inputs_func=sample_inputs_max_unpool,
skips=(
# Gradients are tested in `variant_test_name=grad` below.
# We skip tests here because there is non-determinism in backward
# with gather, when there are writes into the same memory location,
# and if there are several indices pointing to the same memory,
# gradcheck is oblivious about that and cannot perturb them all at once
# (see sample_inputs_max_unpool_grad to find out more).
DecorateInfo(unittest.skip("Skipped!"), 'TestFwdGradients', 'test_forward_mode_AD',
active_if=(not IS_MACOS)),
DecorateInfo(unittest.skip("Skipped!"), 'TestBwdGradients', 'test_fn_gradgrad'),
DecorateInfo(unittest.skip("Skipped!"), 'TestBwdGradients', 'test_fn_grad'),
DecorateInfo(unittest.skip("Skipped!"), 'TestCompositeCompliance', 'test_forward_ad'),
DecorateInfo(unittest.skip("Skipped!"), 'TestDecomp', 'test_quick_core_backward'),
)),
OpInfo('nn.functional.max_unpool2d',
variant_test_name='grad',
aten_name='max_unpool2d',
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# Vmap is not happy with non-contiguous (channels_last) inputs
check_batched_grad=False,
supports_out=False,
assert_jit_shape_analysis=False,
dtypes=floating_types_and(torch.float16, torch.bfloat16),
sample_inputs_func=sample_inputs_max_unpool_grad),
OpInfo('nn.functional.max_unpool3d',
aten_name='max_unpool3d',
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False,
assert_jit_shape_analysis=False,
dtypes=floating_types_and(torch.float16, torch.bfloat16),
sample_inputs_func=sample_inputs_max_unpool,
skips=(
# Gradients are tested in `variant_test_name=grad` below.
# We skip tests here because there is non-determinism in backward
# with gather, when there are writes into the same memory location,
# and if there are several indices pointing to the same memory,
# gradcheck is oblivious about that and cannot perturb them all at once
# (see sample_inputs_max_unpool_grad to find out more).
DecorateInfo(unittest.skip("Skipped!"), 'TestFwdGradients', 'test_forward_mode_AD',
active_if=(not IS_MACOS)),
DecorateInfo(unittest.skip("Skipped!"), 'TestBwdGradients', 'test_fn_gradgrad'),
DecorateInfo(unittest.skip("Skipped!"), 'TestBwdGradients', 'test_fn_grad'),
DecorateInfo(unittest.skip("Skipped!"), 'TestCompositeCompliance', 'test_forward_ad'),
DecorateInfo(unittest.skip("Skipped!"), 'TestDecomp', 'test_quick_core_backward'),
)),
OpInfo('nn.functional.max_unpool3d',
variant_test_name='grad',
aten_name='max_unpool3d',
supports_autograd=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False,
assert_jit_shape_analysis=False,
dtypes=floating_types_and(torch.float16, torch.bfloat16),
sample_inputs_func=sample_inputs_max_unpool_grad),
OpInfo('nn.functional.linear',
aten_name='linear',
supports_autograd=True,
supports_gradgrad=True,
sample_inputs_func=sample_inputs_linear,
dtypes=all_types_and_complex_and(torch.float16, torch.bfloat16),
dtypesIfROCM=floating_and_complex_types_and(torch.float16, torch.bfloat16),
dtypesIfCUDA=floating_and_complex_types_and(torch.float16, torch.bfloat16),
backward_dtypesIfCUDA=floating_and_complex_types_and(torch.float16, torch.bfloat16),
# linear calls mm under the hood which is nondeterministic on CUDA
# https://pytorch.org/docs/stable/generated/torch.use_deterministic_algorithms.html#torch.use_deterministic_algorithms
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/issues/66357
check_batched_forward_grad=False,
supports_expanded_weight=True,
decorators=(
# Strides are not the same!
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out'),
)),
OpInfo('nn.functional.bilinear',
aten_name='bilinear',
supports_autograd=True,
sample_inputs_func=sample_inputs_bilinear,
dtypes=all_types_and(torch.float16, torch.bfloat16),
dtypesIfCUDA=floating_types_and(torch.float16,
*[torch.bfloat16] if SM53OrLater or TEST_WITH_ROCM else []),
decorators=(
DecorateInfo(toleranceOverride({torch.float16: tol(atol=2e-03, rtol=1.3e-03)}),
'TestInductorOpInfo', 'test_comprehensive', device_type='cpu'),
),
skips=(
# NVIDIA only assures that bfloat16 is supported by bmm if SM >= 5.3
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_dtypes', device_type='cuda', active_if=not SM53OrLater),
DecorateInfo(unittest.skip("Skipped!"), 'TestNNCOpInfo', 'test_nnc_correctness', dtypes=(torch.bfloat16,)),
),
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False),
OpInfo('nn.functional.glu',
aten_name='glu',
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
sample_inputs_func=sample_inputs_glu,
dtypes=floating_types_and(torch.bfloat16, torch.float16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False),
UnaryUfuncInfo(
'nn.functional.elu',
aten_backward_name='elu_backward',
ref=lambda x, alpha=1.0, inplace=False:
np.maximum(0., x) + np.minimum(0., alpha * (np.exp(x) - 1)),
dtypes=floating_types_and(torch.bfloat16, torch.float16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_autograd=True,
assert_autodiffed=False,
supports_gradgrad=True,
supports_out=False,
sample_kwargs=lambda device, dtype, input:
({'alpha': 0.8}, {'alpha': 0.8}),
inplace_variant=lambda x, alpha=1.0:
torch.nn.functional.elu(x, alpha, inplace=True),
decorators=[
DecorateInfo(
toleranceOverride({
torch.float16: tol(atol=1e-03, rtol=1.2e-03),
torch.bfloat16: tol(atol=1e-03, rtol=1.2e-03)
}),
'TestUnaryUfuncs', device_type='cuda',
), ],
),
# Marked as a Unary function because it has some rather odd broadcasting semantics in its
# second argument
UnaryUfuncInfo(
'nn.functional.prelu',
aten_backward_name='_prelu_kernel_backward',
ref=lambda x, weight:
np.maximum(0., x) + np.minimum(0., x) *
(weight if x.ndim == 1 else weight.reshape([weight.size if i == 1 else 1 for i in range(0, x.ndim)])),
dtypes=floating_types_and(torch.bfloat16, torch.float16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_autograd=True,
assert_autodiffed=False,
supports_gradgrad=True,
supports_out=False,
# test_reference_numerics only tests the case when the weight tensor is a scalar
sample_kwargs=sample_kwargs_prelu_scalar_weight,
error_inputs_func=error_inputs_prelu,
sample_inputs_func=sample_inputs_prelu,
reference_inputs_func=reference_inputs_prelu,
decorators=[
# RuntimeError: Cannot insert a Tensor that requires grad as a constant.
# Consider making it a parameter or input, or detaching the gradient
# https://github.com/pytorch/pytorch/issues/68752
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'), ],
),
UnaryUfuncInfo(
'nn.functional.celu',
ref=lambda x, alpha=1.0, inplace=False:
np.maximum(0., x) + np.minimum(0., alpha * (np.exp(x / alpha) - 1)),
dtypes=floating_types_and(torch.bfloat16, torch.float16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_autograd=True,
assert_autodiffed=False,
supports_gradgrad=True,
supports_out=False,
sample_kwargs=lambda device, dtype, input:
({'alpha': 0.8}, {'alpha': 0.8}),
inplace_variant=lambda x, alpha=1.0:
torch.nn.functional.celu(x, alpha, inplace=True),
decorators=[
DecorateInfo(
toleranceOverride({
torch.float16: tol(atol=1e-03, rtol=1.2e-03),
torch.bfloat16: tol(atol=1e-03, rtol=1.2e-03)
}),
'TestUnaryUfuncs', device_type='cuda',
), ],
),
UnaryUfuncInfo(
'nn.functional.rrelu',
aten_backward_name='rrelu_with_noise_backward',
op=lambda input, *args, **kwargs:
wrapper_set_seed(torch.nn.functional.rrelu, input, *args, **kwargs),
inplace_variant=lambda input, *args, **kwargs:
wrapper_set_seed(torch.nn.functional.rrelu, input, *args, inplace=True, **kwargs),
dtypes=floating_types_and(torch.bfloat16),
dtypesIfCUDA=floating_types_and(torch.float16, torch.bfloat16),
gradcheck_wrapper=wrapper_set_seed,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False,
sample_kwargs=lambda device, dtype, input:
(dict(lower=0., upper=1., training=True), dict(lower=0., upper=1., training=True)),
sample_inputs_func=sample_inputs_rrelu,
error_inputs_func=error_inputs_rrelu,
decorators=(
DecorateInfo(
toleranceOverride({
torch.float16: tol(atol=1e-03, rtol=1.2e-03),
torch.bfloat16: tol(atol=1e-03, rtol=1.2e-03)
}),
'TestUnaryUfuncs', device_type='cuda',
),),
skips=(
# lambda impl
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# lambda impl
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# In-place operations do not play well with forward AD
# https://github.com/pytorch/pytorch/issues/77447
DecorateInfo(unittest.expectedFailure, 'TestFwdGradients',
'test_inplace_forward_mode_AD'),
# The noise vector that's generated in these tests is not the same elementwise
DecorateInfo(unittest.skip("Different noise"), 'TestUnaryUfuncs', 'test_batch_vs_slicing'),
DecorateInfo(unittest.skip("Different noise"), 'TestUnaryUfuncs', 'test_contig_vs_every_other'),
DecorateInfo(unittest.skip("Different noise"), 'TestUnaryUfuncs', 'test_non_contig_expand'),
DecorateInfo(unittest.skip("Different noise"), 'TestUnaryUfuncs', 'test_contig_vs_transposed'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu')),
skip_correctness_check_compile_vs_eager=True,
),
UnaryUfuncInfo(
'nn.functional.selu',
ref=lambda x, inplace=False:
1.0507009873554804934193349852946 * (
np.maximum(0., x) + np.minimum(0., 1.6732632423543772848170429916717 * (np.exp(x) - 1))
),
dtypes=floating_types_and(torch.bfloat16, torch.float16),
supports_forward_ad=True, # depends on 'elu'
supports_fwgrad_bwgrad=True,
supports_autograd=True,
assert_autodiffed=False,
supports_gradgrad=True,
supports_out=False,
inplace_variant=lambda x: torch.nn.functional.selu(x, inplace=True),
decorators=[
DecorateInfo(
toleranceOverride({
torch.float16: tol(atol=1e-2, rtol=1.8e-2),
torch.bfloat16: tol(atol=1e-2, rtol=1.8e-2)
}),
'TestUnaryUfuncs', device_type='cuda',
), ],
),
OpInfo(
'torch._scaled_mm',
sample_inputs_func=sample_inputs_scaled_mm,
dtypes=float8_types(),
dtypesIfCUDA=empty_types() + (torch.float8_e4m3fn,),
supports_out=True,
supports_forward_ad=False,
supports_autograd=False,
decorators=[skipCUDAIf(not SM89OrLater or TEST_WITH_ROCM, 'Requires CUDA SM >= 8.9')],
skips=(
# Sample inputs isn't really parametrized on dtype
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_dtypes'),
# "add_stub" not implemented for 'Float8_e4m3fn'
# "ufunc_add_CUDA" not implemented for 'Float8_e4m3fn'
# https://github.com/pytorch/pytorch/issues/107256
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_out'),
# "mul_cuda" not implemented for float8_e4m3fn
# "mul_cpu_reduced_float" not implemented for 'Float8_e4m3fn'
# https://github.com/pytorch/pytorch/issues/107256
DecorateInfo(unittest.skip("Skipped!"), 'TestSchemaCheckModeOpInfo', 'test_schema_correctness'),
# aten::_scaled_mm hit the vmap fallback which is currently disabled
DecorateInfo(unittest.skip("Skipped!"), "TestVmapOperatorsOpInfo", "test_op_has_batch_rule"),
DecorateInfo(unittest.skip("Skipped!"), "TestVmapOperatorsOpInfo", "test_vmap_exhaustive"),
DecorateInfo(unittest.expectedFailure, 'TestNNCOpInfo', 'test_nnc_correctness',
dtypes=(torch.float8_e4m3fn, torch.float8_e4m3fnuz, torch.float8_e5m2, torch.float8_e5m2fnuz)),
)
),
OpInfo(
'torch.ops.aten._safe_softmax.default',
dtypes=all_types_and(torch.half, torch.bfloat16, torch.bool),
sample_inputs_func=sample_inputs_safe_softmax,
assert_jit_shape_analysis=True,
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False,
supports_cow_input_no_materialize_backward=False,
decorators=[],
skips=(
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
),
),
OpInfo(
'nn.functional.scaled_dot_product_attention',
op=lambda *args, **kwargs:
wrapper_set_seed(torch.nn.functional.scaled_dot_product_attention, *args, **kwargs),
sample_inputs_func=sample_inputs_scaled_dot_product_attention,
dtypes=floating_types_and(torch.float16, torch.bfloat16),
supports_out=False,
supports_forward_ad=False,
supports_fwgrad_bwgrad=True,
check_batched_forward_grad=False,
decorators=[DecorateInfo(toleranceOverride(
{torch.float32: tol(atol=5e-05, rtol=5e-6)}), 'TestCommon',), ],
skips=(
# When attn mask is a composite tensor this fails backward by returning a none
DecorateInfo(unittest.skip("Skipped!"), 'TestCompositeCompliance', 'test_backward', device_type='cuda'),
# This is only failing on Linux Bionic 3.10 Cuda 11.6
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_dtypes',
device_type='cuda', active_if=_get_torch_cuda_version() >= (11, 6)),
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_noncontiguous_samples',
dtypes=(torch.float32,)),
# AssertionError: JIT Test does not execute any logic
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit'),
# Forward works for dtype=float64 which is the math path
DecorateInfo(unittest.skip("Skipped!"), 'TestFwdGradients', 'test_forward_mode_AD'),
# Not implemented for Forward AD
DecorateInfo(unittest.skip("Skipped!"), 'TestFwdGradients', 'test_fn_fwgrad_bwgrad',
device_type='cpu'),
# Not implemented for backward derivative
DecorateInfo(unittest.skip("Skipped!"), 'TestBwdGradients', 'test_fn_gradgrad',
device_type='cpu'),
# CPU and CUDA have inconsistencies for intermediate outputs
DecorateInfo(unittest.skip("Skipped!"), 'TestMeta', 'test_dispatch_meta_outplace',
device_type='cpu'),
DecorateInfo(unittest.skip("Skipped!"), 'TestMeta', 'test_dispatch_symbolic_meta_outplace',
device_type='cpu'),
# When changing input from Tensor to CompositeCompliantTensor, input.requires_grad() changes from true to false
DecorateInfo(unittest.skip("Skipped!"), 'TestCompositeCompliance', 'test_backward',
device_type='cpu'),
# OpInfo was implemented with a lambda
DecorateInfo(unittest.skip("Skipped!"), 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# TODO Need to understand what this is testing and why it doesn't work
DecorateInfo(unittest.skip("Skipped"), 'TestDecomp', 'test_comprehensive'),
DecorateInfo(unittest.skip('output is non-deterministic (when dropout_p > 0)'), 'TestCommon', 'test_compare_cpu'),
# TODO skip this for now since we can't skip on runtime arch support
DecorateInfo(unittest.skip('This is '), 'TestInductorOpInfo', 'test_comprehensive'),
# skip for sm < 80
DecorateInfo(unittest.skip("Skipped!"), 'TestSchemaCheckModeOpInfo', 'test_schema_correctness',
device_type='cuda', dtypes=(torch.bfloat16,), active_if=not SM80OrLater),
# FIXME
DecorateInfo(unittest.skip('test_cow_input does not work with efficient attention on ROCM'),
'TestCompositeCompliance', 'test_cow_input',
device_type='cuda', dtypes=(torch.bfloat16, torch.float16, torch.float32),
active_if=TEST_WITH_ROCM and PLATFORM_SUPPORTS_MEM_EFF_ATTENTION),),
),
OpInfo(
'torch.ops.aten._flash_attention_forward',
sample_inputs_func=sample_inputs_flash_attention_forward,
dtypes=empty_types(),
dtypesIfCUDA=custom_types(torch.float16)
if not SM80OrLater
else custom_types(torch.float16, torch.bfloat16),
supports_out=False,
supports_autograd=True,
supports_fwgrad_bwgrad=False,
supports_forward_ad=False,
check_batched_forward_grad=False,
decorators=[skipCUDAIf(not PLATFORM_SUPPORTS_FLASH_ATTENTION, "This platform doesn't support Flash Attention")],
skips=(
# Checking the scalar value of the philox seed and offset
DecorateInfo(unittest.expectedFailure, 'TestCompositeCompliance', 'test_operator', device_type='cuda'),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_noncontiguous_samples', device_type='cuda'),
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit', device_type='cuda'),
# None Mismatch Tensor
DecorateInfo(unittest.expectedFailure, 'TestCompositeCompliance', 'test_backward', device_type='cuda'),
)
),
OpInfo(
'torch.ops.aten._efficient_attention_forward',
sample_inputs_func=sample_inputs_efficient_attention_forward,
dtypes=empty_types(),
dtypesIfCUDA=custom_types(torch.float16, torch.float32)
if not SM80OrLater
else custom_types(torch.float16, torch.float32, torch.bfloat16),
supports_out=False,
supports_autograd=True,
supports_fwgrad_bwgrad=False,
supports_forward_ad=False,
check_batched_forward_grad=False,
# TODO: Skip because it produces a CUDA illegal memory access for some reason
skip_cow_input_backward=True,
# FIXME: mask_type == 2 (LowerRight)
decorators=[
skipCUDAIf(not PLATFORM_SUPPORTS_MEM_EFF_ATTENTION, "This platform doesn't support efficient attention"),
skipCUDAIf(TEST_WITH_ROCM, "Efficient attention on ROCM doesn't support custom_mask_type==2")],
skips=(
# Checking the scaler value of the philox seed and offset
DecorateInfo(unittest.expectedFailure, 'TestCompositeCompliance', 'test_operator', device_type='cuda'),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_noncontiguous_samples', device_type='cuda'),
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit', device_type='cuda'),
# None Mismatch Tensor
DecorateInfo(unittest.expectedFailure, 'TestCompositeCompliance', 'test_backward', device_type='cuda'),
)
),
UnaryUfuncInfo(
'nn.functional.silu',
aten_backward_name='silu_backward',
ref=lambda x, inplace=False: x / (1 + np.exp(-x)),
dtypes=floating_types_and(torch.bfloat16, torch.float16),
supports_forward_ad=True,
supports_autograd=True,
supports_fwgrad_bwgrad=True,
assert_autodiffed=True,
supports_out=False,
inplace_variant=lambda x: torch.nn.functional.silu(x, inplace=True),
decorators=[
DecorateInfo(
toleranceOverride({
torch.float16: tol(atol=1e-3, rtol=1e-3),
torch.bfloat16: tol(atol=1e-4, rtol=1e-4)
}),
'TestUnaryUfuncs', device_type='cuda',
), ],
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_normal',
dtypes=(torch.cfloat,), device_type='cpu'),
),
autodiff_nonfusible_nodes=["aten::silu"],
),
# TODO: combine this with the nn.functional.silu OpInfo when
# complex autodiff for silu is supported or when
# the forward bug is fixed
# Note: silu errors when given inputs that require grad
# but it doesn't support grad in their dtype
# This is why the dtypes list above passes test_dtypes,
# because it's getting lucky and failing in forward
# because test_dtypes sets requires_grad to True
# THIS IS A BUG
UnaryUfuncInfo(
'nn.functional.silu',
variant_test_name='complex',
ref=lambda x, inplace=False:
x / (1 + np.exp(-x)),
dtypes=complex_types(),
dtypesIfCUDA=complex_types(),
supports_forward_ad=False,
supports_autograd=False,
assert_autodiffed=False,
supports_out=False,
inplace_variant=lambda x: torch.nn.functional.silu(x, inplace=True),
decorators=[
DecorateInfo(
toleranceOverride({
torch.float16: tol(atol=1e-3, rtol=1e-3),
torch.bfloat16: tol(atol=1e-4, rtol=1e-4)
}),
'TestUnaryUfuncs', device_type='cuda',
), ],
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_normal',
dtypes=(torch.cfloat,)),
# FIXME: intentionally misreports dtypes
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_dtypes'),
# FIXME: numpy reference diverges: Comparing (nan+nanj) and (-0+0j)
DecorateInfo(unittest.skip("Skipped!"),
'TestUnaryUfuncs', 'test_reference_numerics_large',
dtypes=(torch.complex64, torch.cdouble)),
DecorateInfo(unittest.skip("Skipped!"),
'TestUnaryUfuncs', 'test_reference_numerics_small',
dtypes=(torch.complex64,)),
DecorateInfo(unittest.skip("Skipped!"),
'TestUnaryUfuncs', 'test_reference_numerics_extremal',
dtypes=(torch.complex64,)))),
UnaryUfuncInfo(
'nn.functional.hardsigmoid',
aten_backward_name='hardsigmoid_backward',
ref=reference_hardsigmoid,
dtypes=floating_types_and(torch.bfloat16, torch.float16),
supports_autograd=True,
assert_autodiffed=False,
supports_gradgrad=False,
supports_forward_ad=True,
supports_out=False,
inplace_variant=partial(torch.nn.functional.hardsigmoid, inplace=True),
decorators=[
DecorateInfo(
toleranceOverride({torch.float16: tol(atol=1e-04, rtol=0.001)}), 'TestUnaryUfuncs', device_type='cuda',), ],
skips=[
# still want to test that first derivative works though second derivative isn't supported
DecorateInfo(unittest.expectedFailure, 'TestBwdGradients', "test_inplace_gradgrad"),
# produces 0 instead of nan on ROCM
DecorateInfo(unittest.expectedFailure,
'TestUnaryUfuncs', "test_reference_numerics_extremal",
device_type='cuda',
active_if=(TEST_WITH_ROCM)), ]
),
UnaryUfuncInfo(
'nn.functional.logsigmoid',
aten_name="log_sigmoid",
aten_backward_name='log_sigmoid_backward',
ref=reference_logsigmoid,
dtypes=floating_types_and(torch.half, torch.bfloat16),
supports_autograd=True,
assert_autodiffed=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_gradgrad=True,
# autodiff_nonfusible_nodes=["aten::log_sigmoid"],
decorators=[
DecorateInfo(
precisionOverride({torch.float16: 1e-2, torch.bfloat16: 5e-3}),
'TestUnaryUfuncs', 'test_reference_numerics_small'),
DecorateInfo(
precisionOverride({torch.float16: 1e-2, torch.bfloat16: 5e-3}),
'TestUnaryUfuncs', 'test_reference_numerics_large'),
DecorateInfo(
precisionOverride({torch.float16: 1e-2, torch.bfloat16: 5e-3}),
'TestUnaryUfuncs', 'test_reference_numerics_extremal'),
],
skips=(
# Resized a non-empty tensor but did not warn about it.
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning', device_type='cpu'),
),
),
UnaryUfuncInfo(
'nn.functional.mish',
aten_backward_name='mish_backward',
ref=lambda x: x * np.tanh(reference_softplus(x)),
dtypes=floating_types_and(torch.bfloat16, torch.float16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_autograd=True,
assert_autodiffed=False,
supports_gradgrad=True,
supports_out=False,
inplace_variant=partial(torch.nn.functional.mish, inplace=True),
decorators=[
DecorateInfo(
toleranceOverride({torch.float16: tol(atol=1e-02, rtol=1e-03)}), 'TestUnaryUfuncs',), ],
),
UnaryUfuncInfo(
'nn.functional.softsign',
ref=lambda x: x / (np.abs(x) + 1),
dtypes=all_types_and_complex_and(torch.float16, torch.bfloat16),
dtypesIfCUDA=all_types_and_complex_and(torch.float16, torch.bfloat16, torch.bool),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_autograd=True,
assert_autodiffed=False,
supports_gradgrad=True,
supports_out=False,
decorators=[
DecorateInfo(
toleranceOverride({torch.float16: tol(atol=1e-03, rtol=1.3e-04)}), 'TestUnaryUfuncs',), ],
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_small',
dtypes=(torch.int, torch.int8)),),
),
UnaryUfuncInfo(
'nn.functional.tanhshrink',
ref=lambda x: x - np.tanh(x),
dtypes=all_types_and_complex_and(torch.half, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_autograd=True,
assert_autodiffed=False,
supports_gradgrad=True,
supports_out=False,
decorators=[
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_normal',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble]),
DecorateInfo(
toleranceOverride({torch.bfloat16: tol(atol=1e-02, rtol=1.6e-02)}), 'TestUnaryUfuncs',),
DecorateInfo(toleranceOverride({torch.complex64: tol(atol=6e-04, rtol=1e-05),
torch.bfloat16: tol(atol=1e-02, rtol=1.6e-02)}),
'TestUnaryUfuncs', 'test_reference_numerics_extremal', device_type='cuda'),
],
skips=(
# in each case, pytorch will produce a nan while numpy will not
DecorateInfo(unittest.skip("Fails on some jobs works on others!"),
'TestUnaryUfuncs', "test_reference_numerics_large",
dtypes=(torch.complex64, torch.complex128), active_if=(IS_MACOS)),
DecorateInfo(unittest.skip("Fails on some jobs works on others!"),
'TestUnaryUfuncs', "test_reference_numerics_extremal",
dtypes=(torch.complex64, torch.complex128), device_type='cpu',
active_if=(IS_MACOS or IS_WINDOWS)),
),
# tan(j * pi/2 * odd_number) is nan which also make tanhshrink nan.
reference_numerics_filter=NumericsFilter(
condition=lambda x: (close_to_int(x / (math.pi * 0.5j))
if x.is_complex() else x.new_tensor(False, dtype=torch.bool)),
safe_val=0)
),
UnaryUfuncInfo(
'nn.functional.threshold',
ref=lambda x, threshold, value: np.where(x <= threshold, value, x).astype(x.dtype),
dtypes=all_types_and(torch.half, torch.bfloat16),
inplace_variant=lambda x, threshold, value:
torch.nn.functional.threshold(x, threshold, value, inplace=True),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_autodiffed=False,
supports_gradgrad=True,
supports_out=False,
sample_kwargs=lambda device, dtype, input: ({'threshold': float.fromhex('0x1.3ap-3'),
'value': -9},
{'threshold': float.fromhex('0x1.3ap-3'),
'value': -9}),
# TODO(whc) should not need sample_inputs_func, but without it
# kwargs aren't being hooked up properly
sample_inputs_func=sample_inputs_threshold,
),
OpInfo(
"nn.functional.triplet_margin_loss",
sample_inputs_func=sample_inputs_triplet_margin_loss,
error_inputs_func=error_inputs_triplet_margin_loss,
dtypes=all_types_and_complex_and(torch.half, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
),
OpInfo(
"nn.functional.triplet_margin_with_distance_loss",
sample_inputs_func=partial(sample_inputs_triplet_margin_loss, with_distance=True),
error_inputs_func=error_inputs_triplet_margin_loss,
dtypes=all_types_and_complex_and(torch.half, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# This test cannot handle a callable passed to `distance_function`. If we would use
# `distance_function=None`, the test would pass fine.
DecorateInfo(
unittest.expectedFailure,
"TestJit",
"test_variant_consistency_jit",
),
DecorateInfo(
unittest.expectedFailure,
"TestNormalizeOperators",
"test_normalize_operator_exhaustive",
),
),
),
BinaryUfuncInfo('nextafter',
dtypes=floating_types_and(torch.bfloat16, torch.half),
supports_autograd=False,
supports_rhs_python_scalar=False),
OpInfo(
"to",
op=lambda x, *args, **kwargs: x.to(*args, **kwargs),
dtypes=all_types_and_complex_and(torch.bfloat16, torch.float16, torch.bool),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False,
sample_inputs_func=sample_inputs_to,
skips=(
# RuntimeError: undefined value cpu
DecorateInfo(
unittest.skip("Skipped!"),
"TestJit",
"test_variant_consistency_jit",
device_type="cpu",
),
# NotImplementedError: Cannot copy out of meta tensor; no data!
DecorateInfo(
unittest.skip("Skipped!"),
"TestMeta",
"test_meta_outplace",
),
# https://github.com/pytorch/pytorch/issues/84335
DecorateInfo(
unittest.skip("Skipped!"),
"TestProxyTensorOpInfo",
"test_make_fx_symbolic_exhaustive",
),
DecorateInfo(
unittest.skip("Skipped!"),
"TestNormalizeOperators",
"test_normalize_operator_exhaustive",
),
),
),
OpInfo('topk',
dtypes=all_types_and(torch.bfloat16, torch.float16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_jit_shape_analysis=True,
sample_inputs_func=sample_inputs_topk),
# Multiple variants for batch_norm to test with and without cuDNN disabled
# See https://github.com/pytorch/pytorch/pull/63218#discussion_r688549391 for more details
OpInfo('nn.functional.batch_norm',
aten_name='batch_norm',
dtypes=floating_types_and(torch.float16, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_jit_shape_analysis=True,
allow_cow_input_materialize_forward=[1, 2],
allow_cow_input_materialize_backward=[1, 2],
sample_inputs_func=sample_inputs_batch_norm,
skips=(
# see https://github.com/pytorch/pytorch/issues/71286
DecorateInfo(unittest.expectedFailure, 'TestNNCOpInfo', 'test_nnc_correctness'),
DecorateInfo(unittest.skip('Skipped!'), 'TestNNCOpInfo', 'test_nnc_correctness',
device_type='cpu', dtypes=(torch.bfloat16, torch.float16)),
DecorateInfo(toleranceOverride({torch.float32: tol(atol=5e-05, rtol=1e-05)}),
'TestCompositeCompliance', 'test_forward_ad', device_type="cpu"),
)),
# This variant tests batch_norm with cuDNN disabled only on CUDA devices
OpInfo('nn.functional.batch_norm',
variant_test_name='without_cudnn',
aten_name='batch_norm',
dtypes=empty_types(),
dtypesIfCUDA=floating_types_and(torch.float16, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
allow_cow_input_materialize_forward=[1, 2],
allow_cow_input_materialize_backward=[1, 2],
decorators=[onlyCUDA, disablecuDNN],
skips=(
DecorateInfo(toleranceOverride({torch.float32: tol(atol=1e-03, rtol=1e-04)}),
'TestJit', 'test_variant_consistency_jit'),
),
sample_inputs_func=sample_inputs_batch_norm),
OpInfo(
"nn.functional.binary_cross_entropy",
aten_backward_name='binary_cross_entropy_backward',
sample_inputs_func=sample_inputs_binary_cross_entropy,
dtypes=floating_types_and(torch.float16, torch.bfloat16),
dtypesIfCUDA=floating_types_and(torch.float16, torch.bfloat16),
supports_out=False,
gradcheck_fast_mode=False,
supports_autograd=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
decorators=(
# RuntimeError: expected int at position 0, but got: Tensor
DecorateInfo(
unittest.skip("Skipped!"),
"TestCudaFuserOpInfo",
),
# RuntimeError: expected int at position 0, but got: Tensor
DecorateInfo(
unittest.skip("Skipped!"),
"TestNNCOpInfo",
"test_nnc_correctness",
),
# Fails for unknown reason: https://github.com/pytorch/pytorch/issues/120783
DecorateInfo(
unittest.skip("Skipped!"),
"TestCompositeCompliance",
"test_cow_input",
device_type='cuda',
),
DecorateInfo(
toleranceOverride({torch.float32: tol(atol=1e-3, rtol=1e-3)}),
"TestJit",
"test_variant_consistency_jit",
),
# RuntimeError: output with shape [] doesn't match the broadcast shape [5, 5]
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_dispatch_meta_outplace'),
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_dispatch_symbolic_meta_outplace'),
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_dispatch_symbolic_meta_outplace_all_strides'),
),
skips=(
# RuntimeError: expected int at position 0, but got: Tensor
DecorateInfo(
unittest.expectedFailure,
"TestJit",
"test_variant_consistency_jit",
),
),
),
# We have to add 2 OpInfo entry for `igamma` and `igammac`.First is the
# standard entry, second is to run gradcheck tests on the second argument.
BinaryUfuncInfo('igamma',
dtypes=floating_types_and(torch.bfloat16, torch.float16),
aliases=('torch.special.gammainc',),
dtypesIfCUDA=floating_types(),
# TODO: FIXME
supports_rhs_python_scalar=False,
supports_autograd=False,
skips=(
# FIXME: incorrectly tries to pass a rhs scalar
DecorateInfo(unittest.expectedFailure, 'TestJit',
'test_jit_alias_remapping'),
)),
# TODO: FIXME, ideally by implemented grad for both inputs
# BinaryUfuncInfo('igamma',
# variant_test_name='grad_other',
# # Since autograd formula is implemented only for other and
# # gradcheck test verifies the formula for input in SampleInput,
# # we permute the arguments.
# op=lambda self, other, **kwargs: torch.igamma(other, self, **kwargs),
# inplace_variant=None,
# method_variant=None,
# supports_rhs_python_scalar=False,
# rhs_make_tensor_kwargs=dict(requires_grad=False),
# dtypes=floating_types_and(torch.bfloat16, torch.float16),
# backward_dtypesIfCPU=floating_types_and(torch.bfloat16),
# dtypesIfCUDA=floating_types(),
# backward_dtypesIfCUDA=floating_types(),
# supports_inplace_autograd=False,
# skips=(
# # Derivative wrt first tensor not implemented
# DecorateInfo(unittest.expectedFailure, "TestCommon",
# "test_floating_inputs_are_differentiable"),"),
# # test does not work with passing lambda for op
# # AssertionError: False is not true : Tensors failed to compare as equal!
# DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit'),
# # test fails are we permute the arguments function variant
# # but not for inplace or method.
# DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_variant_consistency_eager'),
# # TypeError: igamma(): argument 'input' (position 1) must be Tensor, not float
# DecorateInfo(unittest.skip('Skipped!'), 'TestBinaryUfuncs'),
# )),
BinaryUfuncInfo('igammac',
dtypes=floating_types_and(torch.bfloat16, torch.float16),
aliases=('torch.special.gammaincc',),
dtypesIfCUDA=floating_types(),
supports_autograd=False,
supports_rhs_python_scalar=False,
skips=(
# FIXME: incorrectly tries to pass a rhs scalar
DecorateInfo(unittest.expectedFailure, 'TestJit',
'test_jit_alias_remapping'),
)),
# TODO: FIXME, ideally by implementing grad for both inputs
# BinaryUfuncInfo('igammac',
# variant_test_name='grad_other',
# # Since autograd formula is implemented only for other and
# # gradcheck test verifies the formula for input in SampleInput,
# # we permute the arguments
# op=lambda self, other, **kwargs: torch.igammac(other, self, **kwargs),
# inplace_variant=None,
# method_variant=None,
# supports_rhs_python_scalar=False,
# rhs_make_tensor_kwargs=dict(requires_grad=False),
# dtypes=floating_types_and(torch.bfloat16, torch.float16),
# backward_dtypesIfCPU=floating_types_and(torch.bfloat16),
# dtypesIfCUDA=floating_types(),
# backward_dtypesIfCUDA=floating_types(),
# supports_inplace_autograd=False,
# decorators=[
# # Derivative wrt first tensor not implemented
# DecorateInfo(unittest.expectedFailure, "TestCommon",
# "test_floating_inputs_are_differentiable"),
# ],
# skips=(
# # test does not work with passing lambda for op
# # AssertionError: False is not true : Tensors failed to compare as equal!
# DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit'),
# # test fails are we permute the arguments function variant
# # but not for inplace or method.
# DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_variant_consistency_eager'),
# # TypeError: igammac(): argument 'input' (position 1) must be Tensor, not float
# DecorateInfo(unittest.skip('Skipped!'), 'TestBinaryUfuncs'),
# )),
UnaryUfuncInfo('nn.functional.softshrink',
aten_name="softshrink",
aten_backward_name='softshrink_backward',
dtypes=floating_types_and(torch.bfloat16, torch.float16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_autodiffed=False,
sample_inputs_func=sample_inputs_softshrink,
error_inputs_func=error_inputs_softshrink),
UnaryUfuncInfo('nn.functional.hardshrink',
aten_name="hardshrink",
aten_backward_name='hardshrink_backward',
dtypes=floating_types_and(torch.bfloat16, torch.float16),
assert_autodiffed=True,
sample_inputs_func=sample_inputs_hardshrink,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
autodiff_nonfusible_nodes=["aten::hardshrink"]),
UnaryUfuncInfo('nn.functional.hardtanh',
aten_name="hardtanh",
aten_backward_name='hardtanh_backward',
dtypes=floating_types_and(torch.int8, torch.int16, torch.int32, torch.int64, torch.half, torch.bfloat16),
backward_dtypes=all_types_and(torch.half, torch.bfloat16),
backward_dtypesIfCUDA=floating_types_and(torch.float16, torch.bfloat16),
assert_autodiffed=True,
sample_inputs_func=sample_inputs_hardtanh,
error_inputs_func=error_inputs_hardtanh,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
autodiff_nonfusible_nodes=["aten::hardtanh"]),
OpInfo('nn.functional.gelu',
aten_name="gelu",
aten_backward_name='gelu_backward',
ref=reference_gelu if TEST_SCIPY else None,
error_inputs_func=error_inputs_gelu,
supports_autograd=True,
assert_autodiffed=True,
sample_inputs_func=sample_inputs_gelu,
dtypes=floating_types_and(torch.bfloat16, torch.half),
supports_gradgrad=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
autodiff_nonfusible_nodes=["aten::gelu"],
skips=(
# AssertionError: Tensor-likes are not close!
# May not replicate in CI
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_out'),
DecorateInfo(unittest.skip("Unsupported on MPS for now"), 'TestCommon', 'test_numpy_ref_mps'),
)),
UnaryUfuncInfo('nn.functional.relu6',
aten_name="relu6",
dtypes=all_types_and(torch.half, torch.bfloat16),
backward_dtypes=floating_types_and(torch.half, torch.bfloat16),
assert_autodiffed=True,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
autodiff_nonfusible_nodes=["aten::relu6"]),
OpInfo('mm',
dtypes=all_types_and_complex_and(torch.float16, torch.bfloat16),
dtypesIfCUDA=floating_and_complex_types_and(torch.float16, torch.bfloat16),
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_mm,
skips=(
# Issue with conj and torch dispatch, see https://github.com/pytorch/pytorch/issues/82479
DecorateInfo(
unittest.skip("Skipped!"),
'TestSchemaCheckModeOpInfo',
'test_schema_correctness',
dtypes=(torch.complex64, torch.complex128)),
# Fast math on MacOS-13?
DecorateInfo(
toleranceOverride({torch.float32: tol(atol=2e-5, rtol=5e-6)}),
'TestConsistency',
'test_output_match',
active_if=lambda _: MACOS_VERSION < 14.0,
device_type='mps',
dtypes=(torch.float32,)),
)),
OpInfo('mode',
op=torch.mode,
dtypes=all_types_and(torch.float16, torch.bfloat16, torch.bool),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# Resized a non-empty tensor but did not warn about it
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'),
# FIXME:
# Expected 2114 but got 1123.
# Absolute difference: 991 (up to 0.001 allowed)
# Relative difference: 0.46877956480605487 (up to 0.001 allowed)
DecorateInfo(
unittest.skip("Skipped!"),
"TestCommon",
"test_compare_cpu",
dtypes=(torch.float32,),
device_type="cuda",
),
),
sample_inputs_func=sample_inputs_mode,),
make_mvlgamma_opinfo(variant_test_name='mvlgamma_p_1',
domain=(1, None),
skips=skips_mvlgamma(),
sample_kwargs=lambda device, dtype, input: ({'p': 1}, {'d': 1})),
make_mvlgamma_opinfo(variant_test_name='mvlgamma_p_3',
domain=(2, None),
skips=skips_mvlgamma(),
sample_kwargs=lambda device, dtype, input: ({'p': 3}, {'d': 3})),
make_mvlgamma_opinfo(variant_test_name='mvlgamma_p_5',
domain=(3, None),
skips=skips_mvlgamma(),
sample_kwargs=lambda device, dtype, input: ({'p': 5}, {'d': 5})),
BinaryUfuncInfo('ne',
ref=np.not_equal,
aliases=('not_equal',),
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16),
always_returns_bool=True,
supports_autograd=False,
skips=(
)),
OpInfo('narrow',
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16, torch.chalf),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=partial(sample_inputs_narrow_narrow_copy, is_narrow=True),
reference_inputs_func=partial(reference_inputs_narrow_narrow_copy, is_narrow=True),
error_inputs_func=partial(error_inputs_narrow_narrow_copy, is_narrow=True, is_ref=False),
skips=(
# Use of .item()
DecorateInfo(unittest.expectedFailure, 'TestCompositeCompliance', 'test_operator'),
DecorateInfo(unittest.expectedFailure, 'TestCompositeCompliance', 'test_backward'),
DecorateInfo(unittest.expectedFailure, 'TestCompositeCompliance', 'test_forward_ad'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
)),
OpInfo('narrow_copy',
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16, torch.chalf),
supports_out=True,
supports_forward_ad=False,
supports_fwgrad_bwgrad=False,
supports_autograd=False,
# https://github.com/pytorch/pytorch/issues/86931
sample_inputs_func=partial(sample_inputs_narrow_narrow_copy, is_narrow=False),
reference_inputs_func=partial(reference_inputs_narrow_narrow_copy, is_narrow=False),
error_inputs_func=partial(error_inputs_narrow_narrow_copy, is_narrow=False, is_ref=False),
skips=(
# https://github.com/pytorch/pytorch/issues/84577
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out'),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'),
# Could not run 'aten::narrow_copy.out' with arguments from the 'CUDA' backend
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_meta_outplace',
device_type='cuda'),
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_dispatch_meta_outplace',
device_type='cuda'),
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_dispatch_symbolic_meta_outplace',
device_type='cuda'),
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_dispatch_symbolic_meta_outplace_all_strides'),
)),
OpInfo('view_copy',
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16),
ref=lambda x, newshape: np.reshape(x, newshape).copy(),
supports_out=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_autograd=True,
sample_inputs_func=sample_inputs_view_reshape,
error_inputs_func=error_inputs_view_reshape,
skips=(
# RuntimeError: view size is not compatible with input tensor's size and stride
# (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.
DecorateInfo(
unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_outplace_all_strides"
),
)),
UnaryUfuncInfo('neg',
aliases=('negative', ),
ref=np.negative,
dtypes=all_types_and_complex_and(torch.half, torch.bfloat16, torch.chalf),
error_inputs_func=error_inputs_neg,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
assert_autodiffed=True),
OpInfo('dist',
op=torch.dist,
dtypes=floating_and_complex_types_and(torch.half, torch.bfloat16),
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_out=False,
supports_forward_ad=True,
# torch.autograd.gradcheck.GradcheckError: While computing batched gradients, got:
# Could not allocate memory to change Tensor SizesAndStrides!
check_batched_forward_grad=False,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_dist),
OpInfo('outer',
op=torch.outer,
aliases=('ger', ),
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
sample_inputs_func=sample_inputs_outer,),
OpInfo('ormqr',
op=torch.ormqr,
dtypes=floating_and_complex_types(),
# https://github.com/pytorch/pytorch/issues/80411
gradcheck_fast_mode=True,
supports_forward_ad=False,
supports_fwgrad_bwgrad=False,
sample_inputs_func=sample_inputs_ormqr,
error_inputs_func=error_inputs_ormqr,
decorators=[skipCUDAIfNoCusolver, skipCPUIfNoLapack],
skips=(
# Strides are not the same!
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out'),
)),
OpInfo('permute',
ref=np.transpose,
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
supports_out=False,
assert_autodiffed=True,
autodiff_fusible_nodes=[], # aliases inputs, shouldn't be fused
autodiff_nonfusible_nodes=[], # aliases inputs, shouldn't be fused
assert_jit_shape_analysis=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_varargs=True,
sample_inputs_func=sample_inputs_permute,
reference_inputs_func=reference_inputs_permute),
OpInfo('permute_copy',
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
supports_out=True,
assert_autodiffed=True,
assert_jit_shape_analysis=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_varargs=False, # torch.permute is also not varargs
sample_inputs_func=sample_inputs_permute,
reference_inputs_func=reference_inputs_permute,
skips=(
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit', dtypes=(torch.float32,)),
)),
BinaryUfuncInfo('pow',
dtypes=all_types_and_complex_and(torch.half, torch.bfloat16),
dtypesIfCUDA=all_types_and_complex_and(torch.half, torch.bfloat16, torch.chalf),
ref=np.power,
# Due to AVX2 currently not being fully supported for Float16, log_vml_cpu can't be enabled
# for Float16, causing this test to fail. pow's autograd for Float16 is thus currently
# unsupported on CPU.
backward_dtypes=floating_and_complex_types_and(torch.half, torch.bfloat16),
backward_dtypesIfCUDA=floating_and_complex_types_and(torch.bfloat16, torch.half, torch.chalf),
# https://github.com/pytorch/pytorch/issues/80411
gradcheck_fast_mode=True,
supports_inplace_autograd=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_autodiffed=True,
supports_one_python_scalar=True,
# Integer types do not support negative exponentes
rhs_make_tensor_kwargs=dict(low=0),
# Raising negative real numbers to fractional powers is not supported
lhs_make_tensor_kwargs=dict(low=0),
decorators=(
DecorateInfo(toleranceOverride({torch.complex64: tol(atol=1e-4, rtol=1.3e-05)}),
'TestBinaryUfuncs', 'test_reference_numerics'),
DecorateInfo(toleranceOverride({torch.complex64: tol(atol=1e-4, rtol=1.3e-05),
torch.complex128: tol(atol=1e-4, rtol=1.3e-05)}),
'TestBinaryUfuncs', 'test_scalar_support'),
),
skips=(
# Skipping integers because they are being raised to negative powers causing an error
DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', 'test_reference_numerics_small_values',
dtypes=[torch.int8, torch.int16, torch.int32, torch.int64]),
DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', 'test_reference_numerics_large_values',
dtypes=[torch.int16, torch.int32, torch.int64]),
# FIXME Complex values error with: Greatest absolute difference: nan at index
# Ref: https://github.com/pytorch/pytorch/issues/76853
# For `chalf`, reference computation in `numpy` is computed in `cfloat`.
# Output of `chalf` saturates to `inf` quicker than reference due to its small range
# which leads to failure of this test.
DecorateInfo(unittest.skip("Skipped!"), 'TestDecomp', 'test_quick',
dtypes=(torch.complex32,), active_if=TEST_WITH_ROCM),
# FIXME:
# Mismatched elements: 1 / 500 (0.2%)
# Greatest absolute difference: nan at index (7, 9, 0) (up to 1e-05 allowed)
# Greatest relative difference: nan at index (7, 9, 0) (up to 0.001 allowed)
DecorateInfo(unittest.skip("Skipped!"), 'TestDecomp', 'test_comprehensive',
dtypes=(torch.complex32,)),
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_complex_half_reference_testing',
dtypes=(torch.complex32,), active_if=TEST_WITH_ROCM),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs', 'test_batch_vs_slicing',
dtypes=(torch.complex32,)),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs', 'test_non_contig',
dtypes=(torch.complex32,)),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs', 'test_reference_numerics',
dtypes=(torch.complex32,)),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs', 'test_reference_numerics_small_values',
dtypes=(torch.complex32, torch.complex64, torch.complex128)),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs', 'test_reference_numerics_large_values',
dtypes=(torch.complex32, torch.complex64, torch.complex128)),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs', 'test_reference_numerics_extremal_values',
dtypes=(torch.complex32, torch.complex64, torch.complex128)),
)),
BinaryUfuncInfo('float_power',
ref=np.float_power,
dtypes=all_types_and_complex_and(torch.half, torch.bfloat16, torch.bool),
promotes_int_to_float=True,
# https://github.com/pytorch/pytorch/issues/80411
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_one_python_scalar=True,
# Integer types do not support negative exponentes
rhs_make_tensor_kwargs=dict(low=0),
# Raising negative real numbers to fractional powers is not supported
lhs_make_tensor_kwargs=dict(low=0),
decorators=(
DecorateInfo(toleranceOverride({torch.complex64: tol(atol=1e-4, rtol=1.3e-05),
torch.complex128: tol(atol=1e-4, rtol=1.3e-05)}),
'TestBinaryUfuncs', 'test_scalar_support'),
),
skips=(
# FIXME
# AssertionError: Object comparison failed: torch.float64 != torch.float32
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs', 'test_type_promotion'),
# -3.43399e+38 is outside the range of representable values of type 'float'
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit'),
# Complex values error with: Greatest absolute difference: nan at index
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs', 'test_reference_numerics_small_values',
dtypes=[torch.complex64, torch.complex128]),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs', 'test_reference_numerics_large_values',
dtypes=[torch.complex64, torch.complex128]),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs', 'test_reference_numerics_extremal_values',
dtypes=[torch.complex64, torch.complex128]),
# Inplace always promotes to double and thus other floating dtypes are not supported
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_meta_inplace',
dtypes=[torch.bfloat16, torch.float16, torch.float32]),
)),
OpInfo('qr',
op=torch.qr,
dtypes=floating_and_complex_types(),
sample_inputs_func=sample_inputs_linalg_qr_geqrf,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# In-place ops
check_batched_gradgrad=False,
decorators=[skipCUDAIfNoCusolver, skipCPUIfNoLapack]),
UnaryUfuncInfo('rad2deg',
ref=np.degrees,
decorators=(precisionOverride({torch.bfloat16: 7e-1,
torch.float16: 7e-1}),),
dtypes=all_types_and(torch.bool, torch.half, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
promotes_int_to_float=True),
UnaryUfuncInfo('real',
ref=np.real,
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.half, torch.chalf),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/issues/66357
check_batched_forward_grad=False,
skips=(
# Skip since real and imag don't have out variants.
DecorateInfo(unittest.expectedFailure, 'TestUnaryUfuncs', 'test_out_arg_all_dtypes'),
)),
OpInfo(
"roll",
ref=np.roll,
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.half, torch.chalf),
error_inputs_func=error_inputs_roll,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_roll,
decorators=(onlyNativeDeviceTypes,),
),
OpInfo(
"rot90",
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.half),
error_inputs_func=error_inputs_rot90,
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_rot90,
),
# To test reference numerics against multiple values of argument `decimals`,
# we make multiple OpInfo entries with each entry corresponding to different value of decimals.
UnaryUfuncInfo('round',
ref=np.round,
aliases=('special.round',),
dtypes=all_types_and(torch.half, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
DecorateInfo(unittest.expectedFailure,
'TestNNCOpInfo',
'test_nnc_correctness',
dtypes=tuple(t for t in integral_types() if t != torch.uint8)),
DecorateInfo(unittest.skip("Skipped!"),
'TestNNCOpInfo',
'test_nnc_correctness',
dtypes=(torch.bfloat16,)),
),
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
assert_autodiffed=True,
),
UnaryUfuncInfo('round',
ref=np.round,
variant_test_name='decimals_0',
aliases=('special.round',),
dtypes=floating_types_and(torch.half, torch.bfloat16),
sample_kwargs=lambda device, dtype, input: ({'decimals': 0}, {'decimals': 0}),
sample_inputs_func=partial(sample_inputs_elementwise_unary, op_kwargs={'decimals': 0}),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_autodiffed=False,
supports_sparse_csr=False),
UnaryUfuncInfo('round',
ref=np.round,
variant_test_name='decimals_3',
aliases=('special.round',),
dtypes=floating_types_and(torch.bfloat16),
dtypesIfCUDA=floating_types_and(torch.half, torch.bfloat16),
sample_kwargs=lambda device, dtype, input: ({'decimals': 3}, {'decimals': 3}),
sample_inputs_func=partial(sample_inputs_elementwise_unary, op_kwargs={'decimals': 3}),
skips=(
# test_ops already tested for this overload with `decimals_0` opinfo entry
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon'),
DecorateInfo(unittest.skip("Skipped!"), 'TestFwdGradients'),
DecorateInfo(unittest.skip("Skipped!"), 'TestBwdGradients'),
DecorateInfo(unittest.skip("Skipped!"), 'TestJit'),
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits'),
DecorateInfo(toleranceOverride({torch.bfloat16: tol(atol=1e-3, rtol=0.016)}),
"TestUnaryUfuncs", "test_reference_numerics_extremal",
device_type="cuda"),
DecorateInfo(toleranceOverride({torch.bfloat16: tol(atol=1e-3, rtol=0.016)}),
"TestUnaryUfuncs", "test_reference_numerics_normal",
device_type="cuda"),
),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_autodiffed=False,
supports_sparse_csr=False),
UnaryUfuncInfo('round',
ref=np.round,
variant_test_name='decimals_neg_3',
aliases=('special.round',),
dtypes=floating_types_and(torch.bfloat16),
dtypesIfCUDA=floating_types_and(torch.half, torch.bfloat16),
sample_kwargs=lambda device, dtype, input: ({'decimals': -3}, {'decimals': -3}),
sample_inputs_func=partial(sample_inputs_elementwise_unary, op_kwargs={'decimals': -3}),
skips=(
# test_ops already tested for this overload with `decimals_0` opinfo entry
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon'),
DecorateInfo(unittest.skip("Skipped!"), 'TestFwdGradients'),
DecorateInfo(unittest.skip("Skipped!"), 'TestBwdGradients'),
DecorateInfo(unittest.skip("Skipped!"), 'TestJit'),
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits'),
),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_autodiffed=False,
supports_sparse_csr=False),
UnaryUfuncInfo('sin',
ref=np.sin,
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfCUDA=all_types_and_complex_and(torch.chalf, torch.bool, torch.half, torch.bfloat16),
assert_autodiffed=True,
handles_large_floats=False,
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True,
skips=(
# Fails on CUDA but passes on ROCm
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
dtypes=(torch.cdouble,), device_type='cuda'),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
dtypes=(torch.cfloat, torch.cdouble,), device_type='cpu', active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
dtypes=(torch.cfloat, torch.cdouble,), device_type='cpu', active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped! sparse backward not supported"),
'TestSparseUnaryUfuncs', 'test_sparse_fn_grad'),
DecorateInfo(toleranceOverride({torch.float16: tol(atol=1e-3, rtol=2e-3)}),
"TestConsistency", "test_output_grad_match", device_type="mps"),
),
decorators=(precisionOverride({torch.bfloat16: 1e-2}),)),
UnaryUfuncInfo('sinc',
ref=np_sinc_with_fp16_as_fp32,
aliases=('special.sinc',),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
handles_large_floats=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True),
UnaryUfuncInfo('sinh',
ref=np_unary_ufunc_integer_promotion_wrapper(np.sinh),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfCUDA=all_types_and_complex_and(torch.chalf, torch.bool, torch.half, torch.bfloat16),
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
promotes_int_to_float=True,
decorators=(precisionOverride({torch.float16: 1e-2}),),
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble],
active_if=(IS_MACOS or IS_WINDOWS)),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble],
active_if=(IS_MACOS or IS_WINDOWS)),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
dtypes=(torch.cdouble,)),
# Reference: https://github.com/pytorch/pytorch/issues/48641
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
device_type='cpu', dtypes=[torch.int8]),
DecorateInfo(unittest.skip("Skipped! sparse backward not supported"),
'TestSparseUnaryUfuncs', 'test_sparse_fn_grad'),
)),
UnaryUfuncInfo('sign',
ref=reference_sign,
dtypes=all_types_and(torch.bool, torch.bfloat16, torch.half),
dtypesIfCUDA=all_types_and(torch.bool, torch.bfloat16, torch.half),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
skips=(
# Reference: https://github.com/pytorch/pytorch/issues/41245
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
dtypes=[torch.bfloat16, torch.float16, torch.float32, torch.float64]),
)),
UnaryUfuncInfo('sgn',
ref=reference_sgn,
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.half, torch.chalf),
backward_dtypes=floating_and_complex_types_and(torch.bfloat16, torch.half),
backward_dtypesIfCUDA=floating_and_complex_types_and(torch.bfloat16, torch.half, torch.chalf),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
skips=(
# Reference: https://github.com/pytorch/pytorch/issues/41245
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
dtypes=[torch.bfloat16, torch.float16, torch.float32, torch.float64]),
DecorateInfo(unittest.skip("Skipped! sparse backward not supported"),
'TestSparseUnaryUfuncs', 'test_sparse_fn_grad'),
)),
OpInfo('split',
dtypes=all_types_and_complex_and(torch.bfloat16, torch.half, torch.bool, torch.chalf),
sample_inputs_func=partial(sample_inputs_split, list_args=False),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False,
autodiff_fusible_nodes=[], # aliases inputs, shouldn't be fused
autodiff_nonfusible_nodes=[], # aliases inputs, shouldn't be fused
assert_autodiffed=True),
OpInfo('split',
# Cannot declare this aten_name because of
# test_variant_consistency_jit_split_list_args_cpu_float32
decomp_aten_name='split_with_sizes',
variant_test_name='list_args',
dtypes=all_types_and_complex_and(torch.bfloat16, torch.half, torch.bool),
sample_inputs_func=partial(sample_inputs_split, list_args=True),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False),
# `unsafe_split` supports only `int` for split_size argument
OpInfo('unsafe_split',
dtypes=all_types_and_complex_and(torch.bfloat16, torch.half, torch.bool, torch.chalf),
sample_inputs_func=partial(sample_inputs_split, list_args=False),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False,
autodiff_fusible_nodes=[], # aliases inputs, shouldn't be fused
autodiff_nonfusible_nodes=[], # aliases inputs, shouldn't be fused
assert_autodiffed=True,
check_batched_forward_grad=False),
OpInfo('split_with_sizes',
dtypes=all_types_and_complex_and(torch.bfloat16, torch.half, torch.bool, torch.chalf),
sample_inputs_func=sample_inputs_split_with_sizes,
autodiff_fusible_nodes=[], # aliases inputs, shouldn't be fused
autodiff_nonfusible_nodes=[], # aliases inputs, shouldn't be fused
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_autodiffed=True),
OpInfo('split_with_sizes_copy',
dtypes=all_types_and_complex_and(torch.bfloat16, torch.half, torch.bool, torch.chalf),
sample_inputs_func=sample_inputs_split_with_sizes,
supports_out=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# No error raised
DecorateInfo(unittest.expectedFailure, "TestCommon", "test_out_requires_grad_error"),
)),
BinaryUfuncInfo('__radd__',
op=torch.Tensor.__radd__,
dtypes=all_types_and_complex_and(torch.bfloat16, torch.half, torch.bool),
supports_out=False,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit',),
),
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
autodiff_nonfusible_nodes=['aten::add'],),
BinaryUfuncInfo('__rdiv__',
op=torch.Tensor.__rdiv__,
dtypes=all_types_and_complex_and(torch.bfloat16, torch.half, torch.bool),
promotes_int_to_float=True,
lhs_make_tensor_kwargs={'exclude_zero': True},
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_out=False,
skips=(
# https://github.com/pytorch/pytorch/issues/76806
DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', 'test_type_promotion'),
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit',),
),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_autodiffed=True,
autodiff_nonfusible_nodes=['aten::mul', 'aten::reciprocal'],),
BinaryUfuncInfo('__rmul__',
op=torch.Tensor.__rmul__,
dtypes=all_types_and_complex_and(torch.bfloat16, torch.half, torch.bool),
supports_out=False,
skips=(
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit',),
),
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
autodiff_nonfusible_nodes=['aten::mul'],),
BinaryUfuncInfo('__rand__',
op=torch.Tensor.__rand__,
dtypes=integral_types_and(torch.bool),
supports_out=False,
supports_autograd=False,
supports_forward_ad=True,
skips=(
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
)),
BinaryUfuncInfo('__ror__',
op=torch.Tensor.__ror__,
dtypes=integral_types_and(torch.bool),
supports_out=False,
supports_autograd=False,
supports_forward_ad=True,
skips=(
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
)),
BinaryUfuncInfo('__rxor__',
op=torch.Tensor.__rxor__,
dtypes=integral_types_and(torch.bool),
supports_out=False,
supports_autograd=False,
supports_forward_ad=True,
skips=(
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
)),
OpInfo('__rmatmul__',
op=torch.Tensor.__rmatmul__,
dtypes=all_types_and_complex_and(torch.bfloat16, torch.float16),
dtypesIfCUDA=floating_and_complex_types_and(torch.float16,
*[torch.bfloat16]
if SM53OrLater or TEST_WITH_ROCM else []),
assert_autodiffed=True,
sample_inputs_func=partial(sample_inputs_matmul, is_rmatmul=True),
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
check_batched_forward_grad=False,
decorators=(
# NVIDIA only assures that bfloat16 is supported by bmm if SM >= 5.3
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_dtypes', device_type='cuda', active_if=not SM53OrLater),
DecorateInfo(toleranceOverride({torch.complex64: tol(atol=1e-05, rtol=1.2e-03)}),
'TestMathBits', 'test_conj_view'),
DecorateInfo(toleranceOverride({torch.float32: tol(atol=1e-05, rtol=1.2e-03)}),
'TestCommon', 'test_noncontiguous_samples'),
DecorateInfo(toleranceOverride({torch.complex64: tol(atol=1e-05, rtol=1e-05)}),
"TestDecomp", "test_comprehensive", device_type="cuda",
active_if=TEST_WITH_ROCM),
),
skips=(
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit',),
# https://github.com/pytorch/pytorch/issues/67470
DecorateInfo(unittest.skip("67470!"),
'TestCommon', 'test_noncontiguous_samples',
device_type='cpu', dtypes=(torch.long,)),
# Fails on XLA.
# AssertionError: False is not true : Tensors failed to compare as equal
DecorateInfo(unittest.skip("Skipped!"), 'TestOpInfo', device_type='xla', dtypes=(torch.long,)),
# https://github.com/pytorch/pytorch/issues/71774
DecorateInfo(unittest.skip('Skipped!'), 'TestNNCOpInfo', 'test_nnc_correctness',
device_type='cpu', dtypes=(torch.long,)),
)),
BinaryUfuncInfo('__rmod__',
op=torch.Tensor.__rmod__,
dtypes=floating_types_and(torch.bfloat16, torch.half,),
dtypesIfCUDA=all_types_and(torch.bfloat16, torch.half),
# https://github.com/pytorch/pytorch/issues/80411
gradcheck_fast_mode=True,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_one_python_scalar=True,
skips=(
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit',),
),
# Support autograd after torch.remainder(Tensor, Tensor) supports
# autograd of the second argument.
# https://github.com/pytorch/pytorch/pull/58476/files#r637167630
# supports_autograd=False,
assert_autodiffed=True,
autodiff_nonfusible_nodes=['aten::remainder'],),
BinaryUfuncInfo('__rpow__',
op=torch.Tensor.__rpow__,
dtypes=all_types_and_complex_and(torch.bfloat16, torch.half),
# Reference: https://github.com/pytorch/pytorch/issues/54774
# "log2" "_vml_cpu" not implemented for Half
backward_dtypes=all_types_and_complex_and(torch.bfloat16, torch.half),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_one_python_scalar=True,
skips=(
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit',),
# TODO: FIXME tolerance is too high
DecorateInfo(unittest.skip('Skipped!'), 'TestFwdGradients'),
DecorateInfo(unittest.skip('Skipped!'), 'TestBwdGradients'),
),
assert_autodiffed=True,
autodiff_nonfusible_nodes=['aten::pow'],),
BinaryUfuncInfo('__rsub__',
op=torch.Tensor.__rsub__,
dtypes=all_types_and_complex_and(torch.bfloat16, torch.half),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False,
supports_one_python_scalar=True,
skips=(
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit',),
),
assert_autodiffed=True,
autodiff_nonfusible_nodes=['aten::rsub'],),
BinaryUfuncInfo('rsub',
dtypes=all_types_and_complex_and(torch.bfloat16, torch.half),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False,
supports_inplace_autograd=False,
assert_autodiffed=None,
sample_inputs_func=sample_inputs_add_sub),
OpInfo('select',
aten_backward_name='select_backward',
dtypes=all_types_and_complex_and(torch.bfloat16, torch.half, torch.bool, torch.chalf),
sample_inputs_func=sample_inputs_select,
assert_jit_shape_analysis=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False),
OpInfo('select_scatter',
dtypes=all_types_and(torch.bfloat16, torch.half, torch.bool),
sample_inputs_func=sample_inputs_select_scatter,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False),
OpInfo('slice',
op=torch.ops.aten.slice.Tensor,
dtypes=all_types_and_complex_and(torch.bfloat16, torch.half, torch.bool, torch.chalf),
sample_inputs_func=sample_inputs_slice,
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_scripting=False,
supports_inplace_autograd=False,
supports_out=False),
OpInfo('slice_scatter',
dtypes=all_types_and(torch.bfloat16, torch.half, torch.bool),
sample_inputs_func=sample_inputs_slice_scatter,
# https://github.com/pytorch/pytorch/issues/80411
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=True),
UnaryUfuncInfo('signbit',
ref=np.signbit,
dtypes=all_types_and(torch.bool, torch.bfloat16, torch.half),
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
supports_autograd=False,),
UnaryUfuncInfo('tan',
ref=np.tan,
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfCUDA=all_types_and_complex_and(torch.chalf, torch.bool, torch.half, torch.bfloat16),
decorators=(DecorateInfo(
toleranceOverride({torch.complex64: tol(atol=1e-04, rtol=1e-05)}),
'TestUnaryUfuncs', 'test_reference_numerics_extremal',
device_type='cuda'),),
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
promotes_int_to_float=True,
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble],
active_if=(IS_MACOS or IS_WINDOWS)),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble],
active_if=(IS_MACOS or IS_WINDOWS)),
DecorateInfo(unittest.skip("Skipped! sparse backward not supported"),
'TestSparseUnaryUfuncs', 'test_sparse_fn_grad'),
# FIXME:
# Mismatched elements: 2 / 400 (0.5%)
# Greatest absolute difference: inf at index (7, 16) (up to 1e-05 allowed)
# Greatest relative difference: nan at index (7, 16) (up to 0.001 allowed)
DecorateInfo(
unittest.skip("Skipped!"),
"TestInductorOpInfo",
"test_comprehensive",
dtypes=(torch.float16,),
device_type="cuda",
),
DecorateInfo(toleranceOverride({torch.complex64: tol(atol=3e-5, rtol=7e-6)}),
"TestConsistency", "test_output_match", device_type="mps"),
DecorateInfo(toleranceOverride({torch.float16: tol(atol=1e-3, rtol=2e-3)}),
"TestConsistency", "test_output_grad_match", device_type="mps"),
),
# tan(pi/2 * odd_number) is nan
reference_numerics_filter=NumericsFilter(
condition=lambda x: close_to_int(x / (math.pi * 0.5)), safe_val=math.pi)),
UnaryUfuncInfo('tanh',
ref=np.tanh,
aten_backward_name='tanh_backward',
aliases=('nn.functional.tanh',),
decorators=(precisionOverride({torch.bfloat16: 1e-2}),
DecorateInfo(
toleranceOverride({torch.complex64: tol(atol=1e-04, rtol=2e-05)}),
'TestUnaryUfuncs', 'test_reference_numerics_extremal',
device_type='cuda'),),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfCUDA=all_types_and_complex_and(torch.chalf, torch.bool, torch.half, torch.bfloat16),
assert_autodiffed=True,
assert_jit_shape_analysis=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
promotes_int_to_float=True,
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble],
active_if=(IS_MACOS or IS_WINDOWS)),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble],
active_if=(IS_MACOS or IS_WINDOWS)),
DecorateInfo(unittest.skip("Skipped! sparse backward not supported"),
'TestSparseUnaryUfuncs', 'test_sparse_fn_grad'),
DecorateInfo(toleranceOverride({torch.complex64: tol(atol=3e-5, rtol=7e-6)}),
"TestConsistency", "test_output_match", device_type="mps"),
),
# tan(j * pi/2 * odd_number) is nan
reference_numerics_filter=NumericsFilter(
condition=lambda x: (close_to_int(x / (math.pi * 0.5j))
if x.is_complex() else x.new_tensor(False, dtype=torch.bool)),
safe_val=0)),
OpInfo('tensor_split',
ref=np.array_split,
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16),
dtypesIfCUDA=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# Pre-existing condition; Needs to be fixed
DecorateInfo(unittest.expectedFailure, 'TestCompositeCompliance', 'test_operator'),
DecorateInfo(unittest.expectedFailure, 'TestCompositeCompliance', 'test_backward'),
DecorateInfo(unittest.expectedFailure, 'TestCompositeCompliance', 'test_forward_ad'),
),
sample_inputs_func=sample_inputs_tensor_split,),
OpInfo('hsplit',
dtypes=all_types_and_complex_and(torch.complex32, torch.bool, torch.bfloat16, torch.float16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
sample_inputs_func=sample_inputs_hsplit,
error_inputs_func=error_inputs_hsplit,),
OpInfo('vsplit',
dtypes=all_types_and_complex_and(torch.complex32, torch.bool, torch.bfloat16, torch.float16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
sample_inputs_func=sample_inputs_vsplit,
error_inputs_func=error_inputs_vsplit,),
OpInfo('dsplit',
dtypes=all_types_and_complex_and(torch.complex32, torch.bool, torch.bfloat16, torch.float16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
sample_inputs_func=sample_inputs_dsplit,
error_inputs_func=error_inputs_dsplit,),
OpInfo('triangular_solve',
op=torch.triangular_solve,
dtypes=floating_and_complex_types(),
sample_inputs_func=sample_inputs_legacy_solve,
check_batched_gradgrad=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
gradcheck_wrapper=lambda *args, **kwargs: gradcheck_wrapper_triangular_input(*args, idx=1, **kwargs),
decorators=[
skipCUDAIfNoMagma,
skipCPUIfNoLapack,
DecorateInfo(
toleranceOverride({torch.float32: tol(atol=3e-5, rtol=3e-6)}),
'TestConsistency', 'test_output_match', device_type='cpu',
),
],
skips=(
# AssertionError: Scalars are not equal!
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out'),
# Gradcheck fails
DecorateInfo(unittest.expectedFailure, 'TestFwdGradients', 'test_fn_fwgrad_bwgrad',
dtypes=floating_and_complex_types()),
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_out',
device_type='mps', dtypes=[torch.float32]),
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_variant_consistency_eager',
device_type='mps', dtypes=[torch.float32]),
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit',
device_type='mps', dtypes=[torch.float32]),
)),
UnaryUfuncInfo('trunc',
aliases=('fix', ),
ref=np.trunc,
dtypes=all_types_and(torch.half, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_sparse=True,
skips=(
DecorateInfo(unittest.expectedFailure,
'TestNNCOpInfo',
'test_nnc_correctness',
dtypes=tuple(t for t in integral_types() if t != torch.uint8)),
),
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
assert_autodiffed=True),
UnaryUfuncInfo('exp2',
aliases=('special.exp2', ),
ref=np_unary_ufunc_integer_promotion_wrapper(np.exp2),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True,
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
dtypes=[torch.cdouble]),
# Reference: https://github.com/pytorch/pytorch/issues/48010
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble], active_if=IS_WINDOWS),
)),
UnaryUfuncInfo('expm1',
aliases=('special.expm1', ),
ref=np_unary_ufunc_integer_promotion_wrapper(np.expm1),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
promotes_int_to_float=True,
assert_autodiffed=True,
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
device_type='cuda', dtypes=[torch.complex128]),
DecorateInfo(unittest.skip("Skipped! sparse backward not supported"),
'TestSparseUnaryUfuncs', 'test_sparse_fn_grad'),
)),
UnaryUfuncInfo('nan_to_num',
ref=np.nan_to_num,
dtypes=all_types_and(torch.half, torch.bool, torch.bfloat16),
dtypesIfCUDA=all_types_and(torch.half, torch.bool, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_sparse=True,
skips=(
DecorateInfo(unittest.skip("Skipped! sparse backward not supported"),
'TestSparseUnaryUfuncs', 'test_sparse_fn_grad'),
),
# Passing numpy_kwargs via sample_kwargs, as numpy does comparison
# with BFloat16 in float, since it currently doesn't support BFloat16.
# Ref: https://github.com/pytorch/pytorch/issues/57982#issuecomment-839150556
sample_kwargs=lambda device, dtype, input: ({},
{'posinf': torch.finfo(torch.bfloat16).max,
'neginf': torch.finfo(torch.bfloat16).min})
if dtype is torch.bfloat16 else ({}, {})),
UnaryUfuncInfo('reciprocal',
ref=np_unary_ufunc_integer_promotion_wrapper(np.reciprocal),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True,
skips=(
# Reference: https://github.com/pytorch/pytorch/issues/45690
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
dtypes=[torch.cfloat, torch.cdouble]),
)),
UnaryUfuncInfo('rsqrt',
ref=lambda x: np.reciprocal(np.sqrt(x)),
domain=(0, None),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfCUDA=all_types_and_complex_and(torch.chalf, torch.bool, torch.half, torch.bfloat16),
decorators=(precisionOverride({torch.half: 5e-2}),),
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True,
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
dtypes=(torch.cfloat, torch.cdouble)),
# AssertionError: Tensor-likes are not close!
# Greatest absolute difference: nan at index (700,) (up to 0.01 allowed)
# Greatest relative difference: nan at index (700,) (up to 0.001 allowed)
DecorateInfo(unittest.expectedFailure, 'TestUnaryUfuncs', 'test_reference_numerics_large',
dtypes=(torch.chalf,)),
)),
UnaryUfuncInfo('sqrt',
ref=np.sqrt,
supports_sparse=True,
domain=(0, None),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfCUDA=all_types_and_complex_and(torch.chalf, torch.bool, torch.half, torch.bfloat16),
assert_autodiffed=True,
supports_forward_ad=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True,
decorators=(
precisionOverride({torch.bfloat16: 7e-2}),
DecorateInfo(
toleranceOverride({torch.chalf: tol(atol=1e-2, rtol=0)}),
'TestUnaryUfuncs', 'test_reference_numerics_large'),
),
skips=(
# Reference: https://github.com/pytorch/pytorch/issues/47358
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
device_type='cpu', dtypes=(torch.cfloat, torch.cdouble),
active_if=IS_MACOS),
DecorateInfo(unittest.skip("Skipped! sparse backward not supported"),
'TestSparseUnaryUfuncs', 'test_sparse_fn_grad'),
DecorateInfo(toleranceOverride({torch.complex64: tol(atol=2e-5, rtol=3e-6)}),
"TestConsistency", "test_output_match", device_type="mps"),
)),
UnaryUfuncInfo('square',
ref=np.square,
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
decorators=(precisionOverride({torch.complex64: 3e-4, torch.bfloat16: 3e-1}),),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# Reference: https://github.com/pytorch/pytorch/issues/52549
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
dtypes=[torch.cfloat, torch.cdouble]),
# >>> t = torch.tensor(complex(-0.01, float("inf")))
# >>> np.square(t.numpy())
# (-inf-infj)
# >>> t.square()
# tensor(-inf-infj)
# >>> t.cuda().square()
# tensor(inf+nanj, device='cuda:0')
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
device_type='cuda', dtypes=[torch.cfloat, torch.cdouble]),
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_meta_inplace',
dtypes=[torch.bool]),
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_dispatch_meta_inplace',
dtypes=[torch.bool]),
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_dispatch_symbolic_meta_inplace',
dtypes=[torch.bool]),
),),
OpInfo('lerp',
dtypes=floating_and_complex_types_and(torch.bfloat16, torch.half),
dtypesIfCUDA=floating_and_complex_types_and(torch.chalf, torch.half, torch.bfloat16),
sample_inputs_func=sample_inputs_lerp,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_autodiffed=True),
UnaryUfuncInfo('angle',
ref=np.angle,
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16),
dtypesIfCUDA=all_types_and_complex_and(torch.chalf, torch.bool),
decorators=(precisionOverride({torch.float16: 1e-2,
torch.bfloat16: 1e-2}),),
backward_dtypes=floating_and_complex_types_and(torch.bfloat16, torch.float16),
backward_dtypesIfCUDA=floating_and_complex_types_and(torch.chalf),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
supports_complex_to_float=True,
skips=(
# Ref: https://github.com/pytorch/pytorch/issues/78413
DecorateInfo(unittest.expectedFailure, 'TestUnaryUfuncs', 'test_reference_numerics_small',
dtypes=(torch.bfloat16, torch.float16, torch.float32, torch.float64),),
)),
UnaryUfuncInfo('isfinite',
ref=np.isfinite,
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16, torch.chalf),
supports_out=False,
supports_autograd=False),
UnaryUfuncInfo('isinf',
ref=np.isinf,
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16, torch.chalf),
supports_out=False,
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
supports_autograd=False),
UnaryUfuncInfo('isposinf',
ref=np.isposinf,
dtypes=all_types_and(torch.bool, torch.bfloat16, torch.float16),
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
supports_autograd=False),
UnaryUfuncInfo('isneginf',
ref=np.isneginf,
dtypes=all_types_and(torch.bool, torch.bfloat16, torch.float16),
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
supports_autograd=False),
UnaryUfuncInfo('isreal',
ref=np.isreal,
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16, torch.chalf),
supports_out=False,
supports_autograd=False),
UnaryUfuncInfo('isnan',
ref=np.isnan,
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16),
supports_out=False,
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
supports_autograd=False),
OpInfo('einsum',
# we need this lambda because SampleInput expects tensor input as the first argument
# TODO(@heitorschueroff) update SampleInput to handle such cases
op=lambda tensors, equation: torch.einsum(equation, tensors),
dtypes=all_types_and_complex_and(torch.half, torch.bfloat16),
dtypesIfCUDA=floating_and_complex_types_and(torch.half, torch.bfloat16),
backward_dtypesIfCUDA=floating_and_complex_types_and(torch.half, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
check_batched_forward_grad=False,
# See https://github.com/pytorch/pytorch/issues/66357
sample_inputs_func=sample_inputs_einsum,
skips=(
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# test does not work with passing lambda for op
# there's a test `test_einsum` in `test_jit.py` to handle this case
# AssertionError: JIT Test does not execute any logic
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit'),
)),
OpInfo('svd',
op=torch.svd,
dtypes=floating_and_complex_types(),
sample_inputs_func=sample_inputs_svd,
# Runs very slowly on slow-gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
check_batched_forward_grad=False,
# We're using at::allclose, which does not have a batching rule
check_batched_grad=False,
check_batched_gradgrad=False,
decorators=[skipCUDAIfNoMagmaAndNoCusolver, skipCPUIfNoLapack, with_tf32_off],
skips=(
# Issue with conj and torch dispatch, see https://github.com/pytorch/pytorch/issues/82479
DecorateInfo(
unittest.skip("Skipped!"),
'TestSchemaCheckModeOpInfo',
'test_schema_correctness',
dtypes=(torch.complex64, torch.complex128)),
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_out',
device_type='mps', dtypes=[torch.float32]),
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_variant_consistency_eager',
device_type='mps', dtypes=[torch.float32]),
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit',
device_type='mps', dtypes=[torch.float32]),
)),
OpInfo('svd_lowrank',
op=lambda *args, **kwargs: wrapper_set_seed(
lambda a, b, **kwargs: torch.svd_lowrank(a @ b.mT, **kwargs),
*args, **kwargs
),
dtypes=floating_and_complex_types(),
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_out=False,
# Due to the use of randomness
check_batched_grad=False,
check_batched_gradgrad=False,
check_batched_forward_grad=False,
supports_fwgrad_bwgrad=True,
supports_forward_ad=True,
sample_inputs_func=sample_inputs_svd_lowrank,
decorators=[skipCUDAIfNoCusolver, skipCPUIfNoLapack, with_tf32_off,
DecorateInfo(toleranceOverride({torch.float32: tol(atol=1e-03, rtol=1e-03),
torch.complex64: tol(atol=1e-02, rtol=1e-02)}),
'TestCommon', 'test_noncontiguous_samples'),
# FIXME This should be the following, but the toleranceOverride does not seem to do anything!
# DecorateInfo(toleranceOverride({torch.complex128: tol(atol=1e-04, rtol=1e-04)}),
# 'TestFwdGradients', 'test_fn_fwgrad_bwgrad'),
DecorateInfo(unittest.skip("See comment above"),
'TestFwdGradients',
'test_fn_fwgrad_bwgrad',
dtypes=[torch.complex128]),
],
skips=(
# test does not work with passing lambda for op
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
# Issue with conj and torch dispatch, see https://github.com/pytorch/pytorch/issues/82479
DecorateInfo(unittest.expectedFailure, 'TestSchemaCheckModeOpInfo', 'test_schema_correctness',
dtypes=(torch.complex64, torch.complex128)),
DecorateInfo(slowTest, 'TestCompositeCompliance', 'test_forward_ad'),
)),
OpInfo('pca_lowrank',
op=lambda *args, **kwargs: wrapper_set_seed(
lambda a, b, **kwargs: torch.pca_lowrank(a @ b.mT, **kwargs),
*args, **kwargs
),
dtypes=floating_and_complex_types(),
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_out=False,
check_batched_forward_grad=False,
check_batched_grad=False,
check_batched_gradgrad=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_pca_lowrank,
decorators=[skipCUDAIfNoCusolver, skipCPUIfNoLapack, with_tf32_off,
DecorateInfo(toleranceOverride({torch.float32: tol(atol=1e-03, rtol=1e-03),
torch.complex64: tol(atol=4e-02, rtol=4e-02)}),
'TestCommon', 'test_noncontiguous_samples'),
DecorateInfo(toleranceOverride({torch.float32: tol(atol=1e-05, rtol=5e-05)}),
'TestOperators', 'test_grad'),
# FIXME This should be the following, but the toleranceOverride does not seem to do anything!
# DecorateInfo(toleranceOverride({torch.complex128: tol(atol=1e-04, rtol=1e-04)}),
# 'TestFwdGradients', 'test_fn_fwgrad_bwgrad'),
DecorateInfo(unittest.skip("See comment above"),
'TestFwdGradients',
'test_fn_fwgrad_bwgrad',
dtypes=[torch.complex128]),
DecorateInfo(
toleranceOverride({torch.float32: tol(atol=3e-5, rtol=1e-3)}),
'TestInductorOpInfo', 'test_comprehensive', device_type='cuda'),
],
skips=(
# test does not work with passing lambda for op
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# Issue with conj and torch dispatch, see https://github.com/pytorch/pytorch/issues/82479
DecorateInfo(unittest.expectedFailure, 'TestSchemaCheckModeOpInfo', 'test_schema_correctness',
dtypes=(torch.complex64, torch.complex128)),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
)),
BinaryUfuncInfo('polar',
dtypes=floating_types(),
# this function is undefined if 'abs' values are <0
supports_forward_ad=True,
lhs_make_tensor_kwargs=dict(low=0),
supports_rhs_python_scalar=False,
skips=(
# RuntimeError: Expected object of scalar type Float but got scalar type Double for second argument
DecorateInfo(unittest.skip('Skipped!'), 'TestBinaryUfuncs', 'test_type_promotion'),
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_binary_ufuncs_mixed_dtype'),
# GradcheckError: Jacobian computed with forward mode mismatch for output 0 with respect to input 0
# Numerical:
# tensor([[0.]], dtype=torch.float64)
# Analytical:
# tensor([[-0.0047]], dtype=torch.float64, grad_fn=<CopySlices>)
DecorateInfo(unittest.expectedFailure, 'TestFwdGradients', 'test_fn_fwgrad_bwgrad'),
)),
# TODO(@kshitij12345): Refactor similar to `mvlgamma` entries.
# To test reference numerics against multiple values of argument `n`,
# we make multiple OpInfo entries with each entry corresponding to different value of n (currently 0 to 4).
# We run the op tests from test_ops.py only for `n=0` to avoid redundancy in testing.
UnaryUfuncInfo('polygamma',
op=lambda x, n, **kwargs: torch.polygamma(n, x, **kwargs),
variant_test_name='polygamma_n_0',
ref=reference_polygamma if TEST_SCIPY else None,
dtypes=all_types_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfCUDA=all_types_and(torch.bool, torch.half, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True,
sample_inputs_func=sample_inputs_polygamma,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
),
sample_kwargs=lambda device, dtype, input: ({'n': 0}, {'n': 0}),
# polygamma functions have multiple singularities at x having non-positive integer value
reference_numerics_filter=NumericsFilter(condition=lambda x: (x < 0.1) & ((x - x.round()).abs() < 1e-4),
safe_val=1)),
*(UnaryUfuncInfo('polygamma',
op=lambda x, n, **kwargs: torch.polygamma(n, x, **kwargs),
variant_test_name=f'polygamma_n_{n_}',
ref=reference_polygamma if TEST_SCIPY else None,
dtypes=all_types_and(torch.bool, torch.bfloat16),
dtypesIfCUDA=all_types_and(torch.bool, torch.half, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True,
sample_inputs_func=sample_inputs_polygamma,
decorators=(
DecorateInfo(toleranceOverride({torch.float32: tol(atol=1e-4, rtol=1e-3)}), 'TestUnaryUfuncs'),
DecorateInfo(toleranceOverride({torch.bfloat16: tol(atol=1e1, rtol=1e-1),
torch.float32: tol(atol=1e-4, rtol=1e-2)}),
'TestUnaryUfuncs', 'test_reference_numerics_normal',
active_if=IS_WINDOWS),
),
skips=(
# Redundant tests
DecorateInfo(unittest.skip("Skipped!"), 'TestFwdGradients'),
DecorateInfo(unittest.skip("Skipped!"), 'TestBwdGradients'),
DecorateInfo(unittest.skip("Skipped!"), 'TestJit'),
DecorateInfo(unittest.skip("Skipped!"), 'TestNormalizeOperators'),
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon'),
# Mismatch: https://github.com/pytorch/pytorch/issues/55357
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal'),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large'),
),
sample_kwargs=lambda device, dtype, input: ({'n': n_}, {'n': n_}),
# polygamma functions have multiple singularities at x having non-positive integer value
reference_numerics_filter=NumericsFilter(condition=lambda x: (x < 0.1) & ((x - x.round()).abs() < 1e-4),
safe_val=1))
for n_ in (1, 2, 3, 4)),
OpInfo('ravel',
ref=np.ravel,
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
sample_inputs_func=sample_inputs_ravel,
),
OpInfo('unravel_index',
ref=np.unravel_index,
dtypes=integral_types_and(),
supports_out=False,
supports_autograd=False,
sample_inputs_func=sample_inputs_unravel_index,
),
OpInfo('reshape',
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
sample_inputs_func=sample_inputs_view_reshape,
reference_inputs_func=reference_inputs_view_reshape,
error_inputs_func=error_inputs_view_reshape,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
),
OpInfo('reshape_as',
op=lambda x, other: x.reshape_as(other),
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
sample_inputs_func=partial(sample_inputs_view_reshape, tensor_arg=True),
reference_inputs_func=partial(reference_inputs_view_reshape, tensor_arg=True),
error_inputs_func=partial(error_inputs_view_reshape, tensor_arg=True),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
)),
OpInfo('view',
op=lambda x, shape: x.view(shape),
dtypes=all_types_and_complex_and(torch.complex32, torch.bool, torch.float16, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_jit_shape_analysis=True,
sample_inputs_func=sample_inputs_view_reshape,
reference_inputs_func=reference_inputs_view_reshape,
error_inputs_func=error_inputs_view_reshape,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# RuntimeError: view size is not compatible with input tensor's size and stride
# (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_outplace_all_strides"),
)),
OpInfo('view_as',
op=lambda x, other: x.view_as(other),
dtypes=all_types_and_complex_and(torch.complex32, torch.bool, torch.float16, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=partial(sample_inputs_view_reshape, tensor_arg=True),
reference_inputs_func=partial(reference_inputs_view_reshape, tensor_arg=True),
error_inputs_func=partial(error_inputs_view_reshape, tensor_arg=True),
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# RuntimeError: view size is not compatible with input tensor's size and stride
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_outplace_all_strides")
)),
OpInfo('atleast_1d',
dtypes=all_types_and_complex_and(torch.complex32, torch.bool, torch.float16, torch.bfloat16),
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
sample_inputs_func=sample_inputs_atleast1d2d3d,
skips=(
# JIT does not support variadic tensors.
# RuntimeError: input->type()->kind() == TypeKind::OptionalType
# INTERNAL ASSERT FAILED at "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":252,
# please report a bug to PyTorch.
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit', dtypes=[torch.float32]),
),
),
OpInfo('atleast_2d',
dtypes=all_types_and_complex_and(torch.complex32, torch.bool, torch.float16, torch.bfloat16),
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit', dtypes=[torch.float32]),
),
sample_inputs_func=sample_inputs_atleast1d2d3d,
),
OpInfo('atleast_3d',
dtypes=all_types_and_complex_and(torch.complex32, torch.bool, torch.float16, torch.bfloat16),
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit', dtypes=[torch.float32]),
),
sample_inputs_func=sample_inputs_atleast1d2d3d,
),
OpInfo('flatten',
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
ref=reference_flatten,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
sample_inputs_func=sample_inputs_flatten,
reference_inputs_func=reference_inputs_flatten,
),
OpInfo('unflatten',
op=torch.unflatten,
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_unflatten,
),
OpInfo('column_stack',
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
sample_inputs_func=sample_inputs_column_stack,),
OpInfo('pinverse',
op=torch.pinverse,
dtypes=floating_and_complex_types(),
check_batched_grad=False,
check_batched_gradgrad=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
supports_out=False,
sample_inputs_func=sample_inputs_linalg_invertible,
decorators=[skipCUDAIfNoMagmaAndNoCusolver, skipCPUIfNoLapack],
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_variant_consistency_eager',
device_type='mps', dtypes=[torch.float32]),
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit',
device_type='mps', dtypes=[torch.float32]),
)),
OpInfo('gather',
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
dtypesIfCUDA=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
sample_inputs_func=sample_inputs_gather,
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
error_inputs_func=error_inputs_gather,
),
OpInfo('index_fill',
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.complex32),
inplace_variant=torch.Tensor.index_fill_,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# https://github.com/pytorch/pytorch/issues/66357
check_batched_forward_grad=False,
skips=(
# RuntimeError: Mismatch on aten._unique.default: Shapes torch.Size([2]) and torch.Size([1]) are not equal!
DecorateInfo(unittest.expectedFailure, 'TestFakeTensor', 'test_fake_crossref_backward_no_amp'),
# RuntimeError: Mismatch on aten._unique.default: Shapes torch.Size([2]) and torch.Size([1]) are not equal!
DecorateInfo(unittest.expectedFailure, 'TestFakeTensor', 'test_fake_crossref_backward_amp'),
),
sample_inputs_func=sample_inputs_index,
reference_inputs_func=partial(sample_inputs_index, reference=True)),
OpInfo('index_copy',
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.complex32),
supports_out=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# https://github.com/pytorch/pytorch/issues/66357
check_batched_forward_grad=False,
sample_inputs_func=sample_inputs_index,
reference_inputs_func=partial(sample_inputs_index, reference=True),
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL),
OpInfo('index_select',
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
backward_dtypesIfCUDA=floating_and_complex_types_and(torch.float16, torch.bfloat16, torch.chalf),
sample_inputs_func=sample_inputs_index,
reference_inputs_func=partial(sample_inputs_index, reference=True),
error_inputs_func=error_inputs_index_select,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_jit_shape_analysis=True,
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL),
OpInfo('index_add',
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
inplace_variant=torch.Tensor.index_add_,
supports_out=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# https://github.com/pytorch/pytorch/issues/66357
check_batched_forward_grad=False,
sample_inputs_func=sample_inputs_index,
reference_inputs_func=partial(sample_inputs_index, reference=True),
error_inputs_func=error_inputs_index_add,
skips=(
# boolean alpha not handled properly
DecorateInfo(unittest.expectedFailure,
'TestNNCOpInfo',
'test_nnc_correctness',
dtypes=(torch.bool,)),
),
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL),
*(OpInfo('index_reduce',
variant_test_name=reduction_type,
dtypes=all_types_and(torch.float16, torch.bfloat16),
skips=(
DecorateInfo(toleranceOverride({torch.float16: tol(atol=2e-3, rtol=3e-3)}),
'TestInductorOpInfo', 'test_comprehensive'),
),
supports_out=True,
sample_inputs_func=sample_inputs_index_reduce,
) for reduction_type in ('mean', 'prod', 'amin', 'amax')),
OpInfo('_unsafe_masked_index',
dtypes=all_types_and_complex_and(torch.float16, torch.bfloat16, torch.bool),
supports_out=False,
supports_inplace_autograd=False,
supports_scripting=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs__unsafe_masked_index,
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestNNCOpInfo', 'test_nnc_correctness'),
DecorateInfo(slowTest, 'TestDecomp', 'test_quick_core_backward',
dtypes=(torch.float64,), active_if=IS_WINDOWS),
),),
OpInfo('_unsafe_masked_index_put_accumulate',
dtypes=all_types_and_complex_and(torch.float16, torch.bfloat16, torch.bool),
supports_out=False,
supports_inplace_autograd=False,
supports_scripting=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
decorators=(
DecorateInfo(
toleranceOverride({torch.float16: tol(atol=2e-3, rtol=3e-2)}),
'TestInductorOpInfo', 'test_comprehensive', device_type='cpu'
),
),
sample_inputs_func=sample_inputs__unsafe_masked_index_put_accumulate,
skips=(
DecorateInfo(slowTest, 'TestDecomp', 'test_quick_core_backward',
dtypes=(torch.float64,), active_if=IS_WINDOWS),
),),
OpInfo('__getitem__',
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_inplace_autograd=False,
supports_scripting=False,
op=torch.Tensor.__getitem__,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# AssertionError: False is not true : Scalars failed to compare as equal! 0 != 104448
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit', device_type='cuda'),),
sample_inputs_func=sample_inputs_getitem),
OpInfo('index_put',
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
supports_out=False,
supports_inplace_autograd=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# https://github.com/pytorch/pytorch/issues/66357
check_batched_forward_grad=False,
test_neg_view=False,
sample_inputs_func=sample_inputs_index_put,
skips=(
DecorateInfo(unittest.skip("Skipped"), 'TestBwdGradients', 'test_fn_grad', dtypes=[torch.float64],
device_type='cuda', active_if=(TEST_WITH_ROCM and TEST_WITH_TORCHINDUCTOR)),
)),
OpInfo('sort',
dtypes=all_types_and(torch.bool, torch.float16, torch.bfloat16),
dtypesIfCUDA=all_types_and(torch.bool, torch.float16, torch.bfloat16),
sample_inputs_func=sample_inputs_sort,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_non_standard_bool_values',
dtypes=[torch.bool], device_type='cuda', active_if=not TEST_WITH_ROCM),
)),
OpInfo('unique',
dtypes=all_types_and(torch.bool, torch.float16, torch.bfloat16, torch.uint16, torch.uint32, torch.uint64),
sample_inputs_func=sample_inputs_unique,
supports_out=False,
supports_autograd=False,
skips=(
# lambda impl
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.skip('Output order is undefined when sorted=False'), 'TestCommon', 'test_compare_cpu'),
)),
OpInfo('unique_consecutive',
dtypes=all_types_and(torch.bool, torch.float16, torch.bfloat16),
sample_inputs_func=sample_inputs_unique_consecutive,
supports_out=False,
supports_autograd=False,
skips=(
# lambda impl
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
)),
OpInfo('put',
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
check_batched_forward_grad=False,
check_batched_gradgrad=False, # vmap complains of the sizes
sample_inputs_func=sample_inputs_put),
OpInfo('take',
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
check_batched_grad=False, # vmap complains of the sizes
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_take,
error_inputs_func=error_inputs_take),
OpInfo('scatter',
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_scatter,
error_inputs_func=error_inputs_scatter_and_scatter_add,
skips=(
# Compiler issue on ROCm. Regression started in ROCm 6.4.
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_non_standard_bool_values',
dtypes=[torch.bool], active_if=TEST_WITH_ROCM),
)),
UnaryUfuncInfo(
'bfloat16',
op=lambda x, *args, **kwargs: x.bfloat16(*args, **kwargs),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
supports_out=False,
sample_inputs_func=sample_inputs_conversion,
skips=(
# autograd tests don't handle operators that change dtype
DecorateInfo(unittest.expectedFailure, 'TestFwdGradients'),
DecorateInfo(unittest.expectedFailure, 'TestBwdGradients'),
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# RuntimeError: attribute lookup is not defined on builtin
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.skip("Skipped!"), 'TestNNCOpInfo', 'test_nnc_correctness'),
)),
UnaryUfuncInfo(
'bool',
op=lambda x, *args, **kwargs: x.bool(*args, **kwargs),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
supports_out=False,
sample_inputs_func=sample_inputs_conversion,
supports_autograd=False,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# RuntimeError: attributis not defined on builtin
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
)),
UnaryUfuncInfo(
'byte',
op=lambda x, *args, **kwargs: x.byte(*args, **kwargs),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
supports_out=False,
sample_inputs_func=sample_inputs_byte,
# The autograd test runner cannot handle functions that change dtype
supports_autograd=False,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# RuntimeError: attribute lookup is not defined on builtin
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.skip('Overflow when downcasting signed type is undefined'), 'TestCommon', 'test_compare_cpu'),
)),
UnaryUfuncInfo(
'char',
op=lambda x, *args, **kwargs: x.char(*args, **kwargs),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
supports_out=False,
sample_inputs_func=sample_inputs_conversion,
# The autograd test runner cannot handle functions that change dtype
supports_autograd=False,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# RuntimeError: attribute lookup is not defined on builtin
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.skip('Overflow when downcasting signed type is undefined'), 'TestCommon', 'test_compare_cpu'),
)),
UnaryUfuncInfo(
'double',
op=lambda x, *args, **kwargs: x.double(*args, **kwargs),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
supports_out=False,
sample_inputs_func=sample_inputs_conversion,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# RuntimeError: attribute lookup is not defined on builtin
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
)),
UnaryUfuncInfo(
'float',
op=lambda x, *args, **kwargs: x.float(*args, **kwargs),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
supports_out=False,
sample_inputs_func=sample_inputs_conversion,
skips=(
# autograd tests don't handle operators that change dtype
DecorateInfo(unittest.expectedFailure, 'TestFwdGradients'),
DecorateInfo(unittest.expectedFailure, 'TestBwdGradients'),
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# RuntimeError: attribute lookup is not defined on builtin
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
)),
UnaryUfuncInfo(
'half',
op=lambda x, *args, **kwargs: x.half(*args, **kwargs),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
supports_out=False,
sample_inputs_func=sample_inputs_conversion,
supports_autograd=True,
skips=(
# autograd tests don't handle operators that change dtype
DecorateInfo(unittest.expectedFailure, 'TestFwdGradients'),
DecorateInfo(unittest.expectedFailure, 'TestBwdGradients'),
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# RuntimeError: attribute lookup is not defined on builtin
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
)),
UnaryUfuncInfo(
'int',
op=lambda x, *args, **kwargs: x.int(*args, **kwargs),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
supports_out=False,
sample_inputs_func=sample_inputs_conversion,
supports_autograd=False,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# RuntimeError: attribute lookup is not defined on builtin
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.skip('Overflow when downcasting signed type is undefined'), 'TestCommon', 'test_compare_cpu'),
)),
UnaryUfuncInfo(
'long',
op=lambda x, *args, **kwargs: x.long(*args, **kwargs),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
supports_out=False,
sample_inputs_func=sample_inputs_conversion,
supports_autograd=False,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# RuntimeError: attribute lookup is not defined on builtin
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.skip('Overflow when downcasting signed type is undefined'), 'TestCommon', 'test_compare_cpu'),
)),
UnaryUfuncInfo(
'short',
op=lambda x, *args, **kwargs: x.short(*args, **kwargs),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
supports_out=False,
sample_inputs_func=sample_inputs_conversion,
supports_autograd=False,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# RuntimeError: attribute lookup is not defined on builtin
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.skip('Overflow when downcasting signed type is undefined'), 'TestCommon', 'test_compare_cpu'),
)),
UnaryUfuncInfo(
'cdouble',
op=torch.Tensor.cdouble,
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
supports_out=False,
sample_inputs_func=sample_inputs_conversion,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# RuntimeError: attribute lookup is not defined on builtin
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.skip("Skipped!"), 'TestNNCOpInfo', 'test_nnc_correctness'),
)),
UnaryUfuncInfo(
'cfloat',
op=torch.Tensor.cfloat,
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
supports_out=False,
sample_inputs_func=sample_inputs_conversion,
skips=(
# autograd tests don't handle operators that change dtype
DecorateInfo(unittest.expectedFailure, 'TestFwdGradients'),
DecorateInfo(unittest.expectedFailure, 'TestBwdGradients'),
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# RuntimeError: attribute lookup is not defined on builtin
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.skip("Skipped!"), 'TestNNCOpInfo', 'test_nnc_correctness'),
)),
UnaryUfuncInfo(
'chalf',
op=lambda x, *args, **kwargs: x.chalf(*args, **kwargs),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
supports_out=False,
sample_inputs_func=sample_inputs_conversion,
skips=(
# autograd tests don't handle operators that change dtype
DecorateInfo(unittest.expectedFailure, 'TestFwdGradients'),
DecorateInfo(unittest.expectedFailure, 'TestBwdGradients'),
# use of lambda doesn't work with test_normalize_operator_exhaustive
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# RuntimeError: "sum_cpu" not implemented for 'ComplexHalf'
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_variant_consistency_eager',
device_type='cpu'),
# TypeError: 'int' object is not iterable
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# RuntimeError: "sum_cpu" not implemented for 'ComplexHalf'
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view',
device_type='cpu'),
# RuntimeError: "sum_cpu" not implemented for 'ComplexHalf'
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view',
device_type='cpu'),
# RuntimeError: "sum_cpu" not implemented for 'ComplexHalf'
# RuntimeError: "neg_conj_cuda" not implemented for 'ComplexHalf'
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_conj_view'),
)
),
OpInfo('empty_like',
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
supports_out=False,
sample_inputs_func=sample_inputs_like_fns,
reference_inputs_func=reference_inputs_like_fns,
supports_autograd=False,
skips=(
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"),
"TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_noncontiguous_samples'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_conj_view'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_neg_view'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_neg_conj_view'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestNNCOpInfo', 'test_nnc_correctness'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestCudaFuserOpInfo'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_complex_half_reference_testing'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_non_standard_bool_values'),
DecorateInfo(unittest.skip("Expected: empty_like is not comparable"), 'TestCompositeCompliance',
'test_operator'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
)),
OpInfo('zeros_like',
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
supports_out=False,
sample_inputs_func=sample_inputs_like_fns,
supports_autograd=False,
error_inputs_sparse_func=error_inputs_sparse_like_fns,
sample_inputs_sparse_coo_func=partial(sample_inputs_sparse_like_fns, layout=torch.sparse_coo),
sample_inputs_sparse_csr_func=partial(sample_inputs_sparse_like_fns, layout=torch.sparse_csr),
sample_inputs_sparse_csc_func=partial(sample_inputs_sparse_like_fns, layout=torch.sparse_csc),
sample_inputs_sparse_bsr_func=partial(sample_inputs_sparse_like_fns, layout=torch.sparse_bsr),
sample_inputs_sparse_bsc_func=partial(sample_inputs_sparse_like_fns, layout=torch.sparse_bsc),
skips=(
)),
OpInfo('ones_like',
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
supports_out=False,
sample_inputs_func=sample_inputs_like_fns,
supports_autograd=False,
skips=(
)),
OpInfo('randn',
dtypes=floating_and_complex_types_and(torch.half, torch.bfloat16, torch.complex32),
op=lambda *args, **kwargs: wrapper_set_seed(torch.randn, *args, **kwargs),
supports_out=True,
sample_inputs_func=sample_inputs_randn,
supports_autograd=False,
skips=(
# Tests that assume input is a tensor or sequence of tensors
DecorateInfo(unittest.skip("Test expects tensor input"), "TestCommon", "test_noncontiguous_samples"),
DecorateInfo(unittest.skip("Test expects tensor input"), "TestVmapOperatorsOpInfo", "test_vmap_exhaustive"),
DecorateInfo(unittest.skip("Test expects tensor input"), "TestVmapOperatorsOpInfo", "test_op_has_batch_rule"),
# CPU randn generates different values based on the strides of out tensor
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out', device_type='cpu'),
# randn fails to warn when resizing its out tensor
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'),
# FX failed to normalize op - add the op to the op_skip list.
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# Tests that assume input tensor has a meaningful effect on output tensor
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_conj_view'),
# AssertionError: JIT Test does not execute any logic
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.expectedFailure, 'TestDecomp', 'test_quick'),
)),
OpInfo('randn_like',
dtypes=floating_and_complex_types_and(torch.half, torch.bfloat16, torch.complex32),
op=lambda inp, *args, **kwargs:
wrapper_set_seed(torch.randn_like, inp, *args, **kwargs),
supports_out=False,
sample_inputs_func=sample_inputs_like_fns,
supports_autograd=False,
error_inputs_sparse_func=error_inputs_sparse_like_fns,
sample_inputs_sparse_coo_func=partial(sample_inputs_sparse_like_fns, layout=torch.sparse_coo),
sample_inputs_sparse_csr_func=partial(sample_inputs_sparse_like_fns, layout=torch.sparse_csr),
sample_inputs_sparse_csc_func=partial(sample_inputs_sparse_like_fns, layout=torch.sparse_csc),
sample_inputs_sparse_bsr_func=partial(sample_inputs_sparse_like_fns, layout=torch.sparse_bsr),
sample_inputs_sparse_bsc_func=partial(sample_inputs_sparse_like_fns, layout=torch.sparse_bsc),
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# AssertionError: JIT Test does not execute any logic
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.skip("Expected: randn_like is not comparable between dtypes"),
'TestCommon', 'test_complex_half_reference_testing'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
)),
OpInfo('rand_like',
dtypes=floating_types_and(torch.half, torch.bfloat16, torch.complex32, torch.complex64, torch.complex128),
op=lambda inp, *args, **kwargs:
wrapper_set_seed(torch.randn_like, inp, *args, **kwargs),
supports_out=False,
sample_inputs_func=sample_inputs_like_fns,
supports_autograd=False,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# AssertionError: JIT Test does not execute any logic
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.skip("Expected: randn_like is not comparable between dtypes"),
'TestCommon', 'test_complex_half_reference_testing'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
)),
OpInfo('randint',
dtypes=all_types_and(torch.half, torch.bfloat16),
op=lambda *args, **kwargs:
wrapper_set_seed(torch.randint, *args, **kwargs),
supports_out=False,
sample_inputs_func=sample_inputs_randint,
supports_autograd=False,
skips=(
# Tests that assume input is a tensor or sequence of tensors
DecorateInfo(unittest.skip("Test expects tensor input"), "TestCommon", "test_noncontiguous_samples"),
DecorateInfo(unittest.skip("Test expects tensor input"), "TestVmapOperatorsOpInfo", "test_vmap_exhaustive"),
DecorateInfo(unittest.skip("Test expects tensor input"), "TestVmapOperatorsOpInfo", "test_op_has_batch_rule"),
# CPU randint generates different values based on the strides of out tensor
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out'),
# randint fails to warn when resizing its out tensor
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'),
# FX failed to normalize op - add the op to the op_skip list.
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# Tests that assume input tensor has a meaningful effect on output tensor
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_conj_view'),
# AssertionError: JIT Test does not execute any logic
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# Might need to skip until ROCm5.5
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_multiple_devices',
dtypes=[torch.float32, torch.int64], active_if=TEST_WITH_ROCM),
)),
OpInfo('randint_like',
dtypes=all_types_and(torch.half, torch.bfloat16),
op=lambda inp, *args, **kwargs:
wrapper_set_seed(torch.randint_like, inp, *args, **kwargs),
supports_out=False,
sample_inputs_func=sample_inputs_randint_like,
supports_autograd=False,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# AssertionError: JIT Test does not execute any logic
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
)),
OpInfo('full_like',
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16,
torch.uint16, torch.uint32),
supports_out=False,
sample_inputs_func=sample_inputs_full_like,
supports_autograd=False,
),
OpInfo('new_zeros',
op=lambda x, *args, **kwargs: x.new_zeros(*args, **kwargs),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
supports_out=False,
sample_inputs_func=sample_inputs_new_fns,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
),
supports_autograd=False),
OpInfo('new_ones',
op=lambda x, *args, **kwargs: x.new_ones(*args, **kwargs),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
supports_out=False,
sample_inputs_func=sample_inputs_new_fns,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
),
supports_autograd=False),
OpInfo('ones',
op=torch.ones,
supports_autograd=False,
supports_varargs=True,
is_factory_function=True,
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
supports_out=True,
sample_inputs_func=sample_inputs_ones_zeros,
skips=(
# Tests that assume input is a tensor or sequence of tensors
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_conj_view'),
# Same failure as arange: cannot find linspace in captured graph
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit'),
# UserWarning not triggered : Resized a non-empty tensor but did not warn about it.
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'),
)),
OpInfo('zeros',
op=torch.zeros,
supports_autograd=False,
is_factory_function=True,
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
supports_out=True,
sample_inputs_func=sample_inputs_ones_zeros,
skips=(
# Tests that assume input is a tensor or sequence of tensors
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_conj_view'),
# Same failure as arange: cannot find linspace in captured graph
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit'),
# UserWarning not triggered : Resized a non-empty tensor but did not warn about it.
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'),
)),
OpInfo('full',
op=torch.full,
supports_autograd=False,
is_factory_function=True,
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
supports_out=True,
sample_inputs_func=sample_inputs_full,
skips=(
# Tests that assume input is a tensor or sequence of tensors
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_conj_view'),
# Same failure as arange: cannot find linspace in captured graph
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit'),
# UserWarning not triggered : Resized a non-empty tensor but did not warn about it.
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'),
# RuntimeError: UNSUPPORTED DTYPE: bool
DecorateInfo(unittest.expectedFailure, 'TestNNCOpInfo', 'test_nnc_correctness', dtypes=(torch.bool,)),
)),
OpInfo('new_empty',
op=lambda x, *args, **kwargs: x.new_empty(*args, **kwargs),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
supports_out=False,
sample_inputs_func=sample_inputs_new_fns,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_variant_consistency_eager'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_noncontiguous_samples'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_conj_view'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_neg_view'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_neg_conj_view'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestNNCOpInfo', 'test_nnc_correctness'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestCudaFuserOpInfo'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_non_standard_bool_values'),
DecorateInfo(unittest.skip("Expected: new_empty is not comparable"), 'TestCompositeCompliance',
'test_operator'),
DecorateInfo(unittest.skip("Expected: new_empty is not comparable"),
'TestCommon', 'test_complex_half_reference_testing'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
),
supports_autograd=False),
OpInfo('new_empty_strided',
op=lambda x, *args, **kwargs: x.new_empty_strided(*args, **kwargs),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
supports_out=False,
sample_inputs_func=partial(sample_inputs_new_fns, is_strided=True),
supports_autograd=False,
skips=(
# FX failed to normalize op
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# Lazy tensor failures
DecorateInfo(unittest.skip("Skipped!"), 'TestLazyOpInfo', 'test_correctness'),
DecorateInfo(unittest.skip("Skipped!"), 'TestLazyOpInfo', 'test_correctness_with_reusing_ir'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Expected: new_empty_strided is not comparable"),
'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.skip("Expected: new_empty_strided is not comparable"),
'TestCommon', 'test_noncontiguous_samples'),
DecorateInfo(unittest.skip("Expected: new_empty_strided is not comparable"),
'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.skip("Expected: new_empty_strided is not comparable"),
'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.skip("Expected: new_empty_strided is not comparable"),
'TestMathBits', 'test_neg_conj_view'),
DecorateInfo(unittest.skip("Expected: new_empty_strided is not comparable"),
'TestCommon', 'test_non_standard_bool_values'),
DecorateInfo(unittest.skip("Expected: new_empty_strided is not comparable"),
'TestCommon', 'test_complex_half_reference_testing'),
DecorateInfo(unittest.skip("Expected: new_empty_strided is not comparable"),
'TestCompositeCompliance', 'test_operator'),
DecorateInfo(unittest.skip("Expected: new_empty_strided is not comparable"),
'TestDecomp', 'test_comprehensive'),
DecorateInfo(unittest.skip("Expected: new_empty_strided is not comparable"),
'TestDecomp', 'test_quick'),
DecorateInfo(unittest.skip("Expected: new_empty_strided is not comparable"),
'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.skip("Expected: new_empty_strided is not comparable"),
'TestProxyTensorOpInfo', 'test_make_fx_exhaustive'),
DecorateInfo(unittest.skip("Expected: new_empty_strided is not comparable"),
'TestProxyTensorOpInfo', 'test_make_fx_fake_exhaustive'),
DecorateInfo(unittest.skip("Expected: new_empty_strided is not comparable"),
'TestProxyTensorOpInfo', 'test_make_fx_symbolic_exhaustive'),
DecorateInfo(unittest.skip("Expected: new_empty_strided is not comparable"),
'TestNNCOpInfo', 'test_nnc_correctness'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
)),
OpInfo('empty_strided',
op=lambda inp, *args, **kwargs: wrapper_set_seed(torch.empty_strided, inp, *args, **kwargs),
dtypes=all_types_and_complex_and(torch.bfloat16, torch.bool, torch.half),
supports_out=False,
supports_autograd=False,
sample_inputs_func=sample_inputs_empty_strided,
skips=(
# FX failed to normalize op - add the op to the op_skip list.
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# AssertionError: JIT Test does not execute any logic
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_noncontiguous_samples'),
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_non_standard_bool_values'),
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_neg_conj_view'),
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_compare_cpu'),
DecorateInfo(unittest.skip("Expected: empty is not comparable"), 'TestCompositeCompliance', 'test_operator'),
# Lazy tensor failures
DecorateInfo(unittest.skip("Expected: empty is not comparable"), 'TestLazyOpInfo'),
# RuntimeError: unsupported operation: more than one element of the written-to tensor refers to a single
# memory location. Please clone() the tensor before performing the operation.
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_dispatch_meta_outplace'),
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_dispatch_symbolic_meta_outplace'),
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_dispatch_symbolic_meta_outplace_all_strides'),
)),
OpInfo('empty',
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
sample_inputs_func=sample_inputs_empty,
supports_autograd=False,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_variant_consistency_eager'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_noncontiguous_samples'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_conj_view'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_neg_view'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_neg_conj_view'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestNNCOpInfo', 'test_nnc_correctness'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestCudaFuserOpInfo'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_non_standard_bool_values'),
DecorateInfo(unittest.skip("Expected: empty is not comparable"), 'TestCompositeCompliance',
'test_operator'),
# requires_grad doesn't exist in the jit schema
DecorateInfo(unittest.expectedFailure, 'TestOperatorSignatures', 'test_get_torch_func_signature_exhaustive'),
DecorateInfo(unittest.skip("Expected: empty is not comparable"),
'TestCommon',
'test_out'),
DecorateInfo(unittest.skip("Expected: empty is not comparable"),
'TestCommon',
'test_out_warning'),
DecorateInfo(unittest.skip("Expected: empty is not comparable"),
'TestLazyOpInfo'),
DecorateInfo(unittest.skip("Expected: empty is not comparable"),
'TestCommon', 'test_complex_half_reference_testing'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
)),
OpInfo('eye',
dtypes=all_types_complex_float8_and(torch.bool, torch.half, torch.bfloat16),
sample_inputs_func=sample_inputs_eye,
error_inputs_func=error_inputs_eye,
supports_out=True,
supports_autograd=False,
skips=(
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# TODO: same as this?
# https://github.com/pytorch/pytorch/issues/81774
# also see: arange, new_full
# fails to match any schemas despite working in the interpreter
DecorateInfo(unittest.expectedFailure, 'TestOperatorSignatures', 'test_get_torch_func_signature_exhaustive'),
# fails to match any schemas despite working in the interpreter
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# skip these tests since we have non tensor input
DecorateInfo(unittest.skip('Skipped!'), "TestCommon", "test_noncontiguous_samples"),
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_neg_conj_view'),
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_neg_view'),
# UserWarning not triggered : Resized a non-empty tensor but did not warn about it.
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'),
# "mul_cpu_reduced_float" not implemented for 'Float8_e4m3fn'
DecorateInfo(unittest.expectedFailure, 'TestNNCOpInfo', 'test_nnc_correctness',
dtypes=(torch.float8_e4m3fn, torch.float8_e4m3fnuz, torch.float8_e5m2, torch.float8_e5m2fnuz)),
)),
OpInfo('empty_permuted',
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
sample_inputs_func=sample_inputs_empty_permuted,
error_inputs_func=error_inputs_empty_permuted,
supports_out=False,
supports_autograd=False,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_variant_consistency_eager'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_noncontiguous_samples'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_conj_view'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_neg_view'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_neg_conj_view'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestNNCOpInfo', 'test_nnc_correctness'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestCudaFuserOpInfo'),
# Empty tensor data is garbage so it's hard to make comparisons with it.
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_non_standard_bool_values'),
DecorateInfo(unittest.skip("Expected: empty_permuted is not comparable"), 'TestCompositeCompliance',
'test_operator'),
# requires_grad doesn't exist in the jit schema
DecorateInfo(unittest.expectedFailure, 'TestOperatorSignatures', 'test_get_torch_func_signature_exhaustive'),
DecorateInfo(unittest.skip("Expected: empty_permuted is not comparable"),
'TestCommon',
'test_out'),
DecorateInfo(unittest.skip("Expected: empty_permuted is not comparable"),
'TestCommon',
'test_out_warning'),
DecorateInfo(unittest.skip("Expected: empty_permuted is not comparable"),
'TestLazyOpInfo'),
DecorateInfo(unittest.skip("Expected: empty_permuted is not comparable"),
'TestCommon', 'test_complex_half_reference_testing'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
)),
OpInfo('scalar_tensor',
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
sample_inputs_func=sample_inputs_scalar_tensor,
supports_autograd=False,
supports_out=False,
skips=(
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# fails to match any schemas despite working in the interpreter
DecorateInfo(unittest.expectedFailure, 'TestOperatorSignatures', 'test_get_torch_func_signature_exhaustive'),
# fails to match any schemas despite working in the interpreter
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# skip these tests since we have non tensor input
DecorateInfo(unittest.skip('Skipped!'), "TestCommon", "test_noncontiguous_samples"),
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_neg_conj_view'),
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_neg_view'),
)),
OpInfo('new_full',
op=lambda x, *args, **kwargs: x.new_full(*args, **kwargs),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
supports_out=False,
sample_inputs_func=sample_inputs_new_full,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
),
supports_autograd=False),
OpInfo('multinomial',
op=lambda inp, *args, **kwargs:
wrapper_set_seed(torch.multinomial, inp, *args, **kwargs),
method_variant=lambda inp, *args, **kwargs:
wrapper_set_seed(torch.Tensor.multinomial, inp, *args, **kwargs),
dtypes=floating_types_and(torch.bfloat16, torch.half),
supports_out=True,
sample_inputs_func=sample_inputs_multinomial,
error_inputs_func=error_inputs_multinomial,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# Strides are not the same!
# This may not be reproducible in CI
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_out'),
# AssertionError: JIT Test does not execute any logic
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# UserWarning not triggered : Resized a non-empty tensor but did not warn about it.
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu')),
supports_autograd=False),
OpInfo('normal',
op=lambda inp, *args, **kwargs:
wrapper_set_seed(torch.normal, inp, *args, **kwargs),
# The inplace variant (Tensor.normal_) is different from torch.normal
inplace_variant=None,
dtypes=floating_types_and(torch.bfloat16, torch.half),
dtypesIfCUDA=floating_types_and(torch.bfloat16, torch.half),
supports_out=True,
sample_inputs_func=sample_inputs_normal_tensor_first,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# Tensor-likes are not close!
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out'),
# AssertionError: JIT Test does not execute any logic
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# UserWarning not triggered : Resized a non-empty tensor but did not warn about it.
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'),
# Computed gradient is incorrect -- would be an exfail but gradgrad somehow passes
DecorateInfo(unittest.skip("Gradients are incorrect!"), 'TestFwdGradients'),
DecorateInfo(unittest.skip("Gradients are incorrect!"), 'TestBwdGradients'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
# RuntimeError: Difference from {dtype} is larger with decomposition
DecorateInfo(unittest.skip("Skipped!"), 'TestDecomp', 'test_comprehensive'),
DecorateInfo(unittest.skip("Skipped!"), 'TestDecomp', 'test_quick'),
# The inplace variant (Tensor.normal_) is different from torch.normal
# inplace varaint Tensor.normal_ is decomposed using randn_like()
DecorateInfo(unittest.skip("Skipped!"), 'TestMeta', 'test_dispatch_symbolic_meta_outplace_all_strides'))),
OpInfo('normal',
# This has its own variant b/c OpInfos assume the first arg is a Tensor but it is not here
variant_test_name='number_mean',
op=lambda std, mean, *args, **kwargs:
wrapper_set_seed(torch.normal, mean, std, *args, **kwargs),
# The inplace variant (Tensor.normal_) is different from torch.normal
inplace_variant=None,
dtypes=floating_types_and(torch.bfloat16, torch.half),
dtypesIfCUDA=floating_types_and(torch.bfloat16, torch.half),
supports_out=True,
sample_inputs_func=sample_inputs_normal_tensor_second,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# AssertionError: JIT Test does not execute any logic
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_noncontiguous_samples'),
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_out'),
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_out_warning'),
DecorateInfo(unittest.skip("Skipped!"), 'TestCompositeCompliance', 'test_backward'),
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.skip("Skipped!"), 'TestFwdGradients'),
DecorateInfo(unittest.skip("Skipped!"), 'TestBwdGradients'),
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_compare_cpu'),
DecorateInfo(unittest.skip("Skipped!"), 'TestEagerFusionOpInfo'),
DecorateInfo(unittest.skip("Skipped!"), 'TestOperators'),
# AssertionError
DecorateInfo(unittest.skip("Skipped!"), 'TestDecomp', 'test_comprehensive'),
# AssertionError
DecorateInfo(unittest.skip("Skipped!"), 'TestDecomp', 'test_quick'),
# AssertionError in CUDA variant
DecorateInfo(unittest.skip("Skipped!"), 'TestFakeTensor', device_type='cuda'),
DecorateInfo(unittest.skip("Skipped!"), 'TestDeviceUtils', 'test_device_mode_ops'))),
OpInfo('bernoulli',
op=lambda inp, *args, **kwargs:
wrapper_set_seed(torch.bernoulli, inp, *args, **kwargs),
# The inplace variant (Tensor.bernoulli_) is different from torch.bernoulli
inplace_variant=None,
method_variant=lambda inp, *args, **kwargs:
wrapper_set_seed(torch.Tensor.bernoulli, inp, *args, **kwargs),
dtypes=floating_types_and(torch.bfloat16, torch.half),
supports_out=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_bernoulli,
error_inputs_func=error_inputs_bernoulli,
skips=(
# vmap: We do not yet support calling random operations inside of vmap
DecorateInfo(unittest.expectedFailure, 'TestFwdGradients', 'test_forward_mode_AD'),
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# AssertionError: JIT Test does not execute any logic
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# Expected RuntimeError when doing an unsafe cast from a result of
# dtype torch.float32 into an out= with dtype torch.lon
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out'),
# UserWarning not triggered : Resized a non-empty tensor but did not warn about it.
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'))),
OpInfo('scatter_add',
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
inplace_variant=torch.Tensor.scatter_add_,
sample_inputs_func=sample_inputs_scatter_add,
error_inputs_func=error_inputs_scatter_and_scatter_add,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# Compiler issue on ROCm. Regression started in ROCm 6.4.
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_non_standard_bool_values',
dtypes=[torch.bool], active_if=TEST_WITH_ROCM),
)),
OpInfo('stack',
dtypes=all_types_and_complex_and(torch.complex32, torch.bool, torch.float16, torch.bfloat16),
sample_inputs_func=sample_inputs_stack,
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# https://github.com/pytorch/pytorch/issues/77046
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
)),
OpInfo('_chunk_cat',
dtypes=all_types_and_complex_and(torch.complex32, torch.bool, torch.float16, torch.bfloat16),
sample_inputs_func=sample_inputs_chunk_cat,
error_inputs_func=error_inputs_chunk_cat,
supports_autograd=False,
supports_out=True,
),
OpInfo('hstack',
dtypes=all_types_and_complex_and(torch.complex32, torch.bool, torch.float16, torch.bfloat16),
sample_inputs_func=sample_inputs_hstack_dstack_vstack,
error_inputs_func=error_inputs_hstack_dstack_vstack,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
),
BinaryUfuncInfo('hypot',
dtypes=floating_types_and(torch.bfloat16, torch.half),
dtypesIfCUDA=floating_types_and(torch.half, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_rhs_python_scalar=False),
OpInfo('histogram',
dtypes=floating_types(),
dtypesIfCUDA=_dispatch_dtypes(), # histogram is only implemented on CPU
sample_inputs_func=sample_inputs_histogram,
supports_autograd=False,
skips=(
# JIT tests don't work with Tensor keyword arguments
# https://github.com/pytorch/pytorch/issues/58507
# RuntimeError:
# undefined value tensor:
# File "<string>", line 3
# def the_method(i0):
# return torch.histogram(i0, 1, weight=tensor(-0.5735, dtype=torch.float32), density=False)
# ~~~~~~ <--- HERE
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# Not Implemented on XLA.
DecorateInfo(unittest.skip("Skipped!"), 'TestOpInfo', device_type='xla'),
)),
OpInfo('histogramdd',
dtypes=floating_types(),
dtypesIfCUDA=_dispatch_dtypes(), # histogramdd is only implemented on CPU
sample_inputs_func=sample_inputs_histogramdd,
error_inputs_func=error_inputs_histogramdd,
supports_autograd=False,
skips=(
# Not implemented on CUDA
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_errors', device_type='cuda'),
# JIT tests don't work with Tensor keyword arguments
# https://github.com/pytorch/pytorch/issues/58507
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
)),
OpInfo('histc',
dtypes=floating_types_and(torch.bfloat16, torch.float16),
dtypesIfCUDA=floating_types_and(torch.int8, torch.uint8, torch.int16, torch.int32, torch.int64),
sample_inputs_func=sample_inputs_histc,
supports_out=True,
supports_autograd=False,
skips=(
# CUDA histc returns a float tensor but does not correctly warn when passed an integral out tensor
# "AssertionError: RuntimeError not raised : Expected RuntimeError when doing an unsafe cast
# from a result of dtype torch.float32 into an out= with dtype torch.long"
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out', device_type='cuda'),
)),
OpInfo('bincount',
dtypes=integral_types_and(),
sample_inputs_func=sample_inputs_bincount,
supports_out=False,
supports_autograd=False,
skips=(
# JIT tests don't work with Tensor keyword arguments
# https://github.com/pytorch/pytorch/issues/58507
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit'),
)),
OpInfo('bucketize',
dtypes=all_types_and(torch.float16, torch.bfloat16),
dtypesIfCUDA=all_types_and(torch.bfloat16, torch.float16),
sample_inputs_func=sample_inputs_bucketize,
reference_inputs_func=reference_inputs_bucketize,
error_inputs_func=error_inputs_bucketize,
supports_autograd=False,
skips=(
# JIT tests don't work with Tensor keyword arguments
DecorateInfo(unittest.skip("Expected failure!"), 'TestJit', 'test_variant_consistency_jit'),
)),
OpInfo('searchsorted',
dtypes=all_types_and(torch.bfloat16, torch.float16),
dtypesIfCUDA=all_types_and(torch.bfloat16, torch.float16),
sample_inputs_func=sample_inputs_searchsorted,
supports_autograd=False,
ref=reference_searchsorted,
skips=(
# JIT tests don't work with Tensor keyword arguments
# https://github.com/pytorch/pytorch/issues/58507
DecorateInfo(unittest.skip("Expected failure!"), 'TestJit', 'test_variant_consistency_jit'),
)),
OpInfo('cat',
ref=_cat_np,
aliases=('concat', 'concatenate'),
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.complex32),
sample_inputs_func=sample_inputs_cat_concat,
reference_inputs_func=reference_inputs_cat,
error_inputs_func=error_inputs_cat,
# https://github.com/pytorch/pytorch/issues/80411
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/issues/66357
check_batched_forward_grad=False,
assert_autodiffed=True,
skips=(
# https://github.com/pytorch/pytorch/issues/89353
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_numpy_ref_mps'),
# RuntimeError: Arguments for call not valid.
# Expected a value of type 'List[Tensor]' for argument
# 'tensors' but instead found type 'Tensor (inferred)'.
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_jit_alias_remapping'),
# see https://github.com/pytorch/pytorch/issues/71286
DecorateInfo(unittest.expectedFailure, 'TestNNCOpInfo', 'test_nnc_correctness'),
# see https://github.com/pytorch/pytorch/issues/99806
# RuntimeError: The size of tensor a (25) must match the size of tensor b (0) at non-singleton dimension 0.
DecorateInfo(unittest.expectedFailure, 'TestBwdGradients', 'test_fn_gradgrad'),
)),
OpInfo('unbind',
dtypes=all_types_and_complex_and(torch.complex32, torch.bool, torch.float16, torch.bfloat16),
ref=reference_unbind,
sample_inputs_func=sample_inputs_unbind,
error_inputs_func=error_inputs_unbind,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_gradgrad=True,
supports_out=False,
),
OpInfo('unbind_copy',
dtypes=all_types_and_complex_and(torch.complex32, torch.bool, torch.float16, torch.bfloat16),
ref=reference_unbind,
sample_inputs_func=sample_inputs_unbind,
error_inputs_func=error_inputs_unbind,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_gradgrad=True,
supports_out=True,
check_batched_grad=False,
skips=(
# Expected __torch_dispatch__ for aten::unbind_copy.int_out to return None
# but it returned something else instead.
DecorateInfo(
unittest.expectedFailure,
'TestProxyTensorOpInfo',
'test_make_fx_symbolic_exhaustive_out'
),
)),
OpInfo('vstack',
aliases=('row_stack',),
dtypes=all_types_and_complex_and(torch.complex32, torch.bool, torch.float16, torch.bfloat16),
sample_inputs_func=sample_inputs_hstack_dstack_vstack,
error_inputs_func=error_inputs_hstack_dstack_vstack,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# RuntimeError: _fn() Expected a value of type
# 'Tensor (inferred)' for argument 't0' but instead found type 'tuple'.
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_jit_alias_remapping'),)),
OpInfo('dstack',
dtypes=all_types_and_complex_and(torch.complex32, torch.bool, torch.float16, torch.bfloat16),
sample_inputs_func=sample_inputs_hstack_dstack_vstack,
error_inputs_func=error_inputs_hstack_dstack_vstack,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
),
OpInfo('unfold',
op=lambda x, *args: x.unfold(*args),
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
backward_dtypes=floating_and_complex_types_and(torch.float16, torch.bfloat16),
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
check_batched_gradgrad=False,
# See https://github.com/pytorch/pytorch/issues/66357
check_batched_forward_grad=False,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# Skip operator schema test because this is a functional and not an operator
DecorateInfo(unittest.expectedFailure, 'TestOperatorSignatures', 'test_get_torch_func_signature_exhaustive'),
),
sample_inputs_func=sample_inputs_unfold),
OpInfo('unfold_copy',
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
backward_dtypes=floating_and_complex_types_and(torch.float16, torch.bfloat16),
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_out=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
check_batched_gradgrad=False,
# See https://github.com/pytorch/pytorch/issues/66357
check_batched_forward_grad=False,
sample_inputs_func=sample_inputs_unfold),
OpInfo('msort',
dtypes=all_types_and(torch.bool, torch.float16, torch.bfloat16),
dtypesIfCUDA=all_types_and(torch.bool, torch.float16, torch.bfloat16),
check_batched_gradgrad=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_msort),
OpInfo('movedim',
aliases=('moveaxis',),
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
sample_inputs_func=sample_movedim_moveaxis,
reference_inputs_func=reference_movedim_moveaxis,
error_inputs_func=error_movedim_moveaxis),
OpInfo('renorm',
dtypes=floating_and_complex_types_and(torch.float16, torch.bfloat16),
sample_inputs_func=sample_inputs_renorm,
error_inputs_func=error_inputs_renorm,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# RuntimeError: Difference from float64 is larger with decomposition
# linalg_vector_norm.default than original on output 0.
# Original max diff: 2.560596747969157e-07,
# Decomp max diff: 1.8187482915266173e-06
DecorateInfo(unittest.skip("Inconsistent accuracy"), 'TestDecomp', 'test_comprehensive',
device_type='cpu', dtypes=(torch.float16,)),
DecorateInfo(toleranceOverride({torch.float16: tol(atol=3e-4, rtol=3e-6)}),
"TestConsistency", "test_output_match", device_type="mps"),
)),
ShapeFuncInfo('repeat',
op=lambda x, dims: x.repeat(dims),
ref=np.tile,
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
# https://github.com/pytorch/pytorch/issues/80411
gradcheck_fast_mode=True,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_repeat_tile,
skips=(
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
)),
OpInfo('squeeze',
ref=_squeeze_ref,
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
supports_out=False,
assert_autodiffed=True,
autodiff_fusible_nodes=[], # aliases inputs, shouldn't be fused
autodiff_nonfusible_nodes=[], # aliases inputs, shouldn't be fused
assert_jit_shape_analysis=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# vmap does not support inplace views
check_inplace_batched_forward_grad=False,
# https://github.com/pytorch/pytorch/issues/66357
check_batched_forward_grad=False,
sample_inputs_func=sample_inputs_squeeze),
OpInfo('squeeze',
ref=_squeeze_ref,
variant_test_name="multiple",
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
supports_out=False,
assert_autodiffed=True,
autodiff_fusible_nodes=[], # aliases inputs, shouldn't be fused
autodiff_nonfusible_nodes=[], # aliases inputs, shouldn't be fused
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# vmap does not support inplace views
check_inplace_batched_forward_grad=False,
# https://github.com/pytorch/pytorch/issues/66357
check_batched_forward_grad=False,
sample_inputs_func=sample_inputs_squeeze_multiple),
OpInfo('squeeze_copy',
ref=_squeeze_ref,
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
supports_out=True,
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# vmap does not support inplace views
check_inplace_batched_forward_grad=False,
# https://github.com/pytorch/pytorch/issues/66357
check_batched_forward_grad=False,
sample_inputs_func=sample_inputs_squeeze,
skips=(
DecorateInfo(
unittest.expectedFailure,
'TestJit',
'test_variant_consistency_jit',
dtypes=(torch.float32,),
),
)),
UnaryUfuncInfo(
'fill',
ref=_fill_np,
method_variant=None,
sample_kwargs=_fill_sample_kwargs,
sample_inputs_func=partial(sample_inputs_elementwise_unary, op_kwargs={'value': True}),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# https://github.com/pytorch/pytorch/issues/66357
check_batched_forward_grad=False,
dtypes=all_types_and_complex_and(torch.complex32, torch.bool, torch.float16, torch.bfloat16),
supports_out=False,
skips=(
# JIT has issue when op is passed as lambda
# AssertionError: JIT Test does not execute any logic
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.skip("No fill_ op"), 'TestCudaFuserOpInfo'),
DecorateInfo(unittest.skip("No fill_ op"), 'TestNNCOpInfo'),
)),
OpInfo('resize_',
op=lambda x, shape: x.clone().resize_(shape),
method_variant=None,
inplace_variant=torch.Tensor.resize_,
# the test fails because resize_ doesn't work with imag views as expected by the test
# https://github.com/pytorch/pytorch/issues/65945
test_neg_view=False,
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
supports_out=False,
supports_autograd=False,
skips=(
# Cannot resize variables that require grad
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_dtypes'),
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
DecorateInfo(unittest.skip("Allowed exception"), 'TestCompositeCompliance', 'test_operator'),
),
sample_inputs_func=sample_inputs_resize_ops),
OpInfo('resize_as_',
op=lambda x, other: torch.resize_as_(x.clone(), other),
method_variant=None,
inplace_variant=torch.Tensor.resize_as_,
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
supports_out=False,
supports_autograd=False,
skips=(
# Cannot resize variables that require grad
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_dtypes'),
DecorateInfo(unittest.skip('Allowed exemption'), 'TestCompositeCompliance', 'test_operator'),
),
sample_inputs_func=sample_inputs_resize_ops),
OpInfo('take_along_dim',
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
dtypesIfCUDA=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
supports_inplace_autograd=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
sample_inputs_func=sample_inputs_take_along_dim,
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
decorators=(
# RuntimeError: view size is not compatible with input tensor's size and stride
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_outplace_all_strides"),
)),
ShapeFuncInfo('tile',
ref=np.tile,
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
# https://github.com/pytorch/pytorch/issues/80411
gradcheck_fast_mode=True,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_repeat_tile),
OpInfo('trapz', # TODO: in the future, 'trapz' should be made a proper alias of 'trapezoid'
dtypes=all_types_and_complex_and(torch.float16, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
decorators=[
DecorateInfo(
toleranceOverride({torch.half: tol(atol=9e-4, rtol=4.3e-3)}),
'TestInductorOpInfo', 'test_comprehensive', device_type='cuda'
),
],
sample_inputs_func=sample_trapezoid),
OpInfo('trapezoid',
dtypes=all_types_and_complex_and(torch.float16, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
decorators=[
DecorateInfo(
toleranceOverride({torch.half: tol(atol=9e-4, rtol=4.3e-3)}),
'TestInductorOpInfo', 'test_comprehensive', device_type='cuda'
),
],
sample_inputs_func=sample_trapezoid),
OpInfo('cumulative_trapezoid',
dtypes=all_types_and_complex_and(torch.bfloat16, torch.float16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
supports_out=False,
decorators=(
DecorateInfo(
toleranceOverride({torch.float16: tol(atol=4e-3, rtol=4e-3)}),
'TestInductorOpInfo', 'test_comprehensive',
),
),
sample_inputs_func=sample_cumulative_trapezoid,),
OpInfo('unsqueeze',
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
# vmap does not support inplace views
check_inplace_batched_forward_grad=False,
assert_jit_shape_analysis=True,
assert_autodiffed=True,
autodiff_fusible_nodes=[], # aliases inputs, shouldn't be fused
autodiff_nonfusible_nodes=[], # aliases inputs, shouldn't be fused
sample_inputs_func=sample_unsqueeze),
OpInfo('unsqueeze_copy',
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
supports_out=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
# vmap does not support inplace views
check_inplace_batched_forward_grad=False,
assert_jit_shape_analysis=True,
assert_autodiffed=True,
autodiff_fusible_nodes=[], # aliases inputs, shouldn't be fused
autodiff_nonfusible_nodes=[], # aliases inputs, shouldn't be fused
sample_inputs_func=sample_unsqueeze,
skips=(
DecorateInfo(unittest.expectedFailure, 'TestDTensorOps', 'test_dtensor_op_db'),
DecorateInfo(
unittest.expectedFailure,
'TestJit',
'test_variant_consistency_jit',
dtypes=(torch.float32,),
),
)),
BinaryUfuncInfo('xlogy',
aliases=('special.xlogy',),
dtypes=all_types_and(torch.bool, torch.half, torch.bfloat16),
promotes_int_to_float=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_one_python_scalar=True,
# We don't test 0 as the gradient will be NaN and it'll break
rhs_make_tensor_kwargs=dict(low=0.01)),
OpInfo('zero_',
op=lambda x: torch.zero_(x.clone()),
method_variant=None,
inplace_variant=torch.Tensor.zero_,
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
# https://github.com/pytorch/pytorch/issues/80411
gradcheck_fast_mode=True,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_gradgrad=True,
skips=(
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
),
sample_inputs_func=sample_inputs_zero_),
OpInfo('logsumexp',
aliases=('special.logsumexp',),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
gradcheck_fast_mode=False,
sample_inputs_func=sample_inputs_logsumexp,
reference_inputs_func=reference_inputs_logsumexp),
OpInfo('trace',
dtypes=all_types_and_complex(),
dtypesIfCUDA=all_types_and_complex_and(torch.chalf, torch.bool, torch.half, torch.bfloat16),
error_inputs_func=error_inputs_trace,
supports_inplace_autograd=False,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_trace),
OpInfo('transpose',
ref=_numpy_ref_transpose,
aliases=('swapdims', 'swapaxes'),
assert_jit_shape_analysis=True,
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.half, torch.chalf),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# vmap does not support inplace views
check_inplace_batched_forward_grad=False,
sample_inputs_func=sample_inputs_transpose_swapdims),
OpInfo('transpose_copy',
assert_jit_shape_analysis=True,
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.half, torch.chalf),
supports_out=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# vmap does not support inplace views
check_inplace_batched_forward_grad=False,
sample_inputs_func=sample_inputs_transpose_swapdims,
skips=(
DecorateInfo(unittest.expectedFailure, 'TestDTensorOps', 'test_dtensor_op_db'),
DecorateInfo(
unittest.expectedFailure,
'TestJit',
'test_variant_consistency_jit',
dtypes=(torch.float32,)
),
)),
OpInfo('T',
op=lambda x: x.T,
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.half, torch.chalf),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# lambda impl
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
DecorateInfo(unittest.expectedFailure, "TestJit", "test_variant_consistency_jit"),),
sample_inputs_func=sample_inputs_T,
error_inputs_func=error_inputs_T),
OpInfo('H',
op=lambda x: x.H,
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.half, torch.chalf),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
skips=(
# lambda impl
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
DecorateInfo(unittest.expectedFailure, "TestJit", "test_variant_consistency_jit"),),
sample_inputs_func=sample_inputs_T),
OpInfo('mT',
op=lambda x: x.mT,
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.half, torch.chalf),
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# lambda impl
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
DecorateInfo(unittest.expectedFailure, "TestJit", "test_variant_consistency_jit"),),
sample_inputs_func=sample_inputs_adjoint),
OpInfo('mH',
op=lambda x: x.mH,
aliases=('adjoint',),
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.half, torch.chalf),
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
skips=(
# lambda impl
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
DecorateInfo(unittest.expectedFailure, "TestJit", "test_variant_consistency_jit"),),
sample_inputs_func=sample_inputs_adjoint),
OpInfo('tril',
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
error_inputs_func=error_inputs_tril_triu,
sample_inputs_func=sample_inputs_tril_triu,
skips=(
# Compiler issue on ROCm. Regression started in ROCm 6.4.
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_non_standard_bool_values',
dtypes=[torch.bool], active_if=TEST_WITH_ROCM),
)),
OpInfo('triu',
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
error_inputs_func=error_inputs_tril_triu,
sample_inputs_func=sample_inputs_tril_triu,
skips=(
# Compiler issue on ROCm. Regression started in ROCm 6.4.
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_non_standard_bool_values',
dtypes=[torch.bool], active_if=TEST_WITH_ROCM),
)),
OpInfo('triu_indices',
dtypes=_dispatch_dtypes((torch.int32, torch.int64)),
sample_inputs_func=sample_inputs_trilu_indices,
ref=lambda h, w, ofs=0, dtype=torch.long, device='cpu' : np.array(np.triu_indices(h, ofs, w), dtype=dtype),
supports_out=False,
supports_autograd=False,
skips=(
# skip these tests since we have non tensor input
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_noncontiguous_samples'),
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.skip('Skipped!'), 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.skip('Skipped!'), 'TestMathBits', 'test_neg_view'),
)),
OpInfo('tril_indices',
dtypes=_dispatch_dtypes((torch.int32, torch.int64)),
sample_inputs_func=sample_inputs_trilu_indices,
ref=lambda h, w, ofs=0, dtype=torch.long, device='cpu' : np.array(np.tril_indices(h, ofs, w), dtype=dtype),
supports_out=False,
supports_autograd=False,
skips=(
# skip these tests since we have non tensor input
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_noncontiguous_samples'),
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.skip('Skipped!'), 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.skip('Skipped!'), 'TestMathBits', 'test_neg_view'),
)),
OpInfo('kron',
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfCUDA=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16),
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_inplace_autograd=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_kron,
decorators=(
# RuntimeError: view size is not compatible with input tensor's size and stride
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_outplace_all_strides"),
)),
OpInfo('inner',
dtypes=all_types_and_complex_and(torch.float16, torch.bfloat16),
dtypesIfCUDA=floating_and_complex_types_and(torch.float16, torch.bfloat16),
dtypesIfROCM=floating_and_complex_types_and(torch.half, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
sample_inputs_func=sample_inputs_inner,
),
OpInfo('tensordot',
dtypes=all_types_and_complex_and(torch.float16, torch.bfloat16),
dtypesIfCUDA=floating_and_complex_types_and(torch.float16, torch.bfloat16),
dtypesIfROCM=floating_and_complex_types_and(torch.half, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
sample_inputs_func=sample_inputs_tensordot,
skips=(
# Skip operator schema test because this is a functional and not an operator.
# Reference: https://github.com/pytorch/pytorch/issues/54574
DecorateInfo(unittest.skip("Skipped!"), 'TestOperatorSignatures', 'test_get_torch_func_signature_exhaustive'),
)
),
OpInfo('to_sparse',
op=lambda x, *args: x.to_sparse(*args),
sample_inputs_func=sample_inputs_to_sparse,
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
backward_dtypes=floating_types(),
backward_dtypesIfCUDA=floating_types_and(torch.float16, torch.bfloat16),
supports_out=False,
supports_sparse_csr=True,
supports_sparse_csc=True,
check_batched_grad=False,
check_batched_gradgrad=False,
skips=(
# NotImplementedError: Could not run 'aten::normal_' with arguments from the 'SparseCPU' backend
DecorateInfo(unittest.skip(""), 'TestCommon', 'test_noncontiguous_samples'),
# TODO: FIXME: complex inputs requiring grad error in forward
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_dtypes'),
# lambda impl
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# Allowed exception: sparse tensors don't have strides
DecorateInfo(unittest.skip("Allowed exception"), 'TestCompositeCompliance', 'test_operator'),
DecorateInfo(unittest.skip("Allowed exception"), 'TestCompositeCompliance', 'test_backward'),
DecorateInfo(unittest.skip("Allowed exception"), 'TestTags', 'test_tags'),
# TODO: implement csr.to_sparse(sample_dim) where sampled_dim is 1.
DecorateInfo(unittest.skip("csr.to_sparse(1) not implemented. Skipped!"),
'TestSparseCSR', 'test_sparse_csr_consistency'),
# Compiler issue on ROCm. Might need to skip until ROCm5.5
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_non_standard_bool_values',
dtypes=[torch.bool], active_if=TEST_WITH_ROCM),
)
),
OpInfo('logcumsumexp',
dtypes=floating_and_complex_types_and(torch.bfloat16, torch.half),
backward_dtypes=floating_and_complex_types_and(torch.bfloat16),
backward_dtypesIfCUDA=floating_and_complex_types_and(torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# AssertionError: UserWarning not triggered : Resized a non-empty tensor but did not warn about it.
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning', device_type='cuda'),
# RuntimeError: "max_values_cpu" not implemented for 'ComplexDouble'
# Falling back to non-numerically stablized exp, causing nan in the results.
DecorateInfo(unittest.expectedFailure, 'TestFwdGradients', 'test_forward_mode_AD', dtypes=[torch.complex128]),
DecorateInfo(unittest.expectedFailure, 'TestFwdGradients', 'test_fn_fwgrad_bwgrad', dtypes=[torch.complex128]),
DecorateInfo(
toleranceOverride({
torch.float16: tol(atol=7e-5, rtol=6e-3),
}),
"TestInductorOpInfo",
"test_comprehensive",
device_type="cuda"
),
),
sample_inputs_func=sample_inputs_logcumsumexp,
error_inputs_func=error_inputs_logcumsumexp),
UnaryUfuncInfo('sigmoid',
aliases=('special.expit', 'nn.functional.sigmoid'),
aten_backward_name='sigmoid_backward',
ref=reference_sigmoid if TEST_SCIPY else None,
decorators=(precisionOverride({torch.float16: 1e-2,
torch.complex64: 1e-1,
torch.bfloat16: 1e-2}),),
skips=(
# Reference: https://github.com/pytorch/pytorch/issues/56012
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
dtypes=[torch.complex64, torch.cdouble], device_type='cuda'),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
dtypes=[torch.chalf, torch.complex64, torch.cdouble], device_type='cuda')),
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
dtypesIfCUDA=all_types_and_complex_and(torch.complex32, torch.bool, torch.half, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True,
assert_autodiffed=True,
# sigmoid(z) = 1 / (1 + exp(-z)), at z = j * pi * odd_number, the denominator is zero
reference_numerics_filter=NumericsFilter(
condition=lambda x: (close_to_int(x / (math.pi * 1j))
if x.is_complex() else x.new_tensor(False, dtype=torch.bool)),
safe_val=0)),
UnaryUfuncInfo('digamma',
ref=scipy.special.digamma if TEST_SCIPY else None,
aliases=('special.psi', 'special.digamma',),
decorators=(precisionOverride({torch.float16: 5e-1}),),
dtypes=all_types_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfCUDA=all_types_and(torch.bool, torch.half, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True),
UnaryUfuncInfo('erf',
ref=scipy.special.erf if TEST_SCIPY else None,
aliases=('special.erf', ),
decorators=(precisionOverride({torch.float16: 1e-2,
torch.bfloat16: 1e-2}),),
skips=(
DecorateInfo(unittest.skip("Skipped! sparse backward not supported"),
'TestSparseUnaryUfuncs', 'test_sparse_fn_grad'),
),
dtypes=all_types_and(torch.bool, torch.half, torch.bfloat16),
assert_autodiffed=True,
assert_jit_shape_analysis=True,
supports_sparse=True,
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True),
UnaryUfuncInfo('erfc',
ref=scipy.special.erfc if TEST_SCIPY else None,
aliases=('special.erfc', ),
decorators=(precisionOverride({torch.float16: 1e-2,
torch.bfloat16: 1e-2}),),
dtypes=all_types_and(torch.bool, torch.half, torch.bfloat16),
assert_autodiffed=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True),
UnaryUfuncInfo('erfinv',
ref=scipy.special.erfinv if TEST_SCIPY else None,
aliases=('special.erfinv', ),
decorators=(precisionOverride({torch.float16: 1e-2,
torch.bfloat16: 1e-2,
torch.float32: 1e-4}),),
dtypes=all_types_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfCUDA=all_types_and(torch.bool, torch.half, torch.bfloat16),
supports_sparse_csr=True,
supports_sparse_csc=True,
supports_sparse_bsr=True,
supports_sparse_bsc=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True,
domain=(-1, 1),
skips=(
# Reference: https://github.com/pytorch/pytorch/pull/49155#issuecomment-742664611
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
active_if=TEST_SCIPY and version.parse(scipy.__version__) < version.parse("1.4.0")),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
active_if=TEST_SCIPY and version.parse(scipy.__version__) < version.parse("1.4.0")),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_small',
active_if=TEST_SCIPY and version.parse(scipy.__version__) < version.parse("1.4.0")),
)),
OpInfo("nn.functional.smooth_l1_loss",
ref=reference_smooth_l1_loss,
sample_inputs_func=sample_inputs_smooth_l1_loss,
dtypes=floating_types_and(torch.float16, torch.bfloat16),
backward_dtypes=floating_types_and(torch.bfloat16),
dtypesIfCUDA=floating_types_and(torch.float16, torch.bfloat16),
backward_dtypesIfCUDA=floating_types_and(torch.float16, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# RuntimeError: input->type()->kind() == TypeKind::OptionalTypeINTERNAL ASSERT FAILED
# at "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":270, please report a bug to PyTorch.
DecorateInfo(unittest.expectedFailure, "TestJit", "test_variant_consistency_jit"),)),
OpInfo(
"nn.functional.l1_loss",
ref=loss_reference_reduction_wrapper(lambda input, target: np.abs(input - target)),
sample_inputs_func=sample_inputs_l1_loss,
error_inputs_func=error_inputs_l1_loss,
dtypes=floating_and_complex_types_and(torch.float16, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# RuntimeError: input->type()->kind() == TypeKind::OptionalTypeINTERNAL ASSERT FAILED
# at "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":270, please report a bug to PyTorch.
DecorateInfo(
unittest.expectedFailure,
"TestJit",
"test_variant_consistency_jit",
dtypes=(torch.float32,),
),
),
),
UnaryUfuncInfo('lgamma',
ref=reference_lgamma if TEST_SCIPY else None,
aliases=('special.gammaln', ),
decorators=(precisionOverride({torch.float16: 7e-1}),),
dtypes=all_types_and(torch.bool, torch.half, torch.bfloat16),
dtypesIfCUDA=all_types_and(torch.bool, torch.half, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True,
skips=(
# Reference: https://github.com/pytorch/pytorch/pull/50140#issuecomment-756150214
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
dtypes=[torch.float32, torch.float64], active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
dtypes=[torch.float32, torch.float64], active_if=IS_WINDOWS),
),
# lgamma have multiple singularities at x <= 0
reference_numerics_filter=NumericsFilter(condition=lambda x: x < 0.1, safe_val=1)),
OpInfo(
'logdet',
dtypes=floating_and_complex_types(),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_linalg_det_logdet_slogdet,
decorators=[skipCUDAIfNoMagma, skipCPUIfNoLapack]),
# `log_softmax` supports different dtypes based on whether `dtype` argument,
# is passed or not. Hence two OpInfo entries, one with dtype and other without.
OpInfo(
'log_softmax',
aliases=('special.log_softmax', 'nn.functional.log_softmax'),
supports_out=True,
aten_backward_name='_log_softmax_backward_data',
dtypes=floating_types_and(torch.float16, torch.bfloat16),
sample_inputs_func=sample_inputs_softmax_variant,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_autodiffed=True),
OpInfo(
'log_softmax',
variant_test_name='with_dtype',
aliases=('special.log_softmax', 'nn.functional.log_softmax'),
supports_out=True,
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
sample_inputs_func=partial(sample_inputs_softmax_variant, with_dtype=True),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_autodiffed=True),
UnaryUfuncInfo('logit',
aten_backward_name='logit_backward',
ref=scipy.special.logit if TEST_SCIPY else None,
domain=(0, 1),
aliases=('special.logit', ),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_float=True,
decorators=(precisionOverride({torch.bfloat16: 5e-1,
torch.float16: 5e-1}),),
dtypes=all_types_and(torch.bool, torch.half, torch.bfloat16),
sample_inputs_func=sample_inputs_logit),
OpInfo('where',
# Currently only the `input` is tested in gradcheck.
# If we pass `condition` first, none of the input which supports
# autograd will be tested. Hence the following lambda.
op=lambda self, condition, other, **kwargs: torch.where(condition, self, other, **kwargs),
ref=lambda self, condition, other: np.where(condition, self, other),
sample_inputs_func=sample_inputs_where,
reference_inputs_func=reference_inputs_where,
error_inputs_func=error_inputs_where,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
decorators=(
DecorateInfo(onlyCUDA, "TestCommon", 'test_errors'),),
skips=(
# lambda impl
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit'),
),
dtypes=all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.chalf)),
OpInfo('nonzero',
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16, torch.chalf),
sample_inputs_func=sample_inputs_nonzero,
supports_autograd=False,
skips=(
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# nonzero(): argument 'out' must be Tensor, not tuple
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out'),
# https://github.com/pytorch/pytorch/issues/67458
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# nonzero is not raising a warning when the out is resized
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'),
# Can't find schemas for this operator for some reason
DecorateInfo(unittest.expectedFailure, 'TestOperatorSignatures', 'test_get_torch_func_signature_exhaustive'),
# Compiler issue on ROCm. Might need to skip until ROCm5.5
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_non_standard_bool_values',
dtypes=[torch.bool], active_if=TEST_WITH_ROCM),
)),
OpInfo('nonzero_static',
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16, torch.chalf),
sample_inputs_func=sample_inputs_nonzero_static,
supports_out=False,
supports_autograd=False,
decorators=[onlyCPU],
skips=(
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out'),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning'),
DecorateInfo(unittest.expectedFailure, 'TestDTensorOps', 'test_dtensor_op_db'),
DecorateInfo(unittest.expectedFailure, 'TestInductorOpInfo', 'test_comprehensive'),
DecorateInfo(unittest.expectedFailure, 'TestVmapOperatorsOpInfo', 'test_op_has_batch_rule'),
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_non_standard_bool_values',
dtypes=[torch.bool], active_if=TEST_WITH_ROCM),
)),
# Following tests are for jiterator's python interface
# Jiterator can be used to author elementwise CUDA kernel
# jiterator._create_jit_fn returns a callable that behaves like a regular pytorch op
# See create_jit_fn in jiterator.py for more information
UnaryUfuncInfo(
'jiterator_unary',
op=torch.cuda.jiterator._create_jit_fn("template <typename T> T unary(T x) { return x * x + x; }"),
ref=lambda x: x * x + x,
dtypes=all_types_and_complex_and(torch.bfloat16, torch.float16, torch.bool),
supports_out=False,
supports_autograd=False, # jiterator ops doesn't have backward defined
decorators=[
onlyCUDA,
DecorateInfo(toleranceOverride({torch.float16: tol(atol=1e-02, rtol=1e-02)}),
'TestUnaryUfuncs', 'test_reference_numerics_extremal'),
DecorateInfo(toleranceOverride({torch.float16: tol(atol=1e-02, rtol=1e-02)}),
'TestUnaryUfuncs', 'test_reference_numerics_hard'),
DecorateInfo(toleranceOverride({torch.float16: tol(atol=1e-02, rtol=1e-02)}),
'TestUnaryUfuncs', 'test_reference_numerics_normal'),
DecorateInfo(toleranceOverride({torch.float16: tol(atol=1e-02, rtol=1e-02)}),
'TestUnaryUfuncs', 'test_reference_numerics_small'),
],
skips=(
# Jiterator ops doesn't support neg or conj view
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_conj_view'),
# Jiterator ops doesn't support CompositeCompliantTensor
# Following test should expectedFailure, but it's causing cascading failures in CUDA, thus skipped
DecorateInfo(unittest.skip("skip"), 'TestCompositeCompliance', 'test_operator'),
# Skip reference_numerics tests for bool type, as the defined function doesn't work for bool
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
dtypes=[torch.bool]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_hard',
dtypes=[torch.bool]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_normal',
dtypes=[torch.bool]),
# ROCm generates -inf+infj instead of nan+infj for complex64 for some of the results
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
dtypes=[torch.complex64], active_if=TEST_WITH_ROCM),
# Newer numpy generates -inf+infj instead of nan+infj for complex64 for some of the results
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_large',
dtypes=[torch.complex64], device_type='cuda'),
# Expected failure: torch.jiterator_unary is not a valid op
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# Skip Nvfuser
DecorateInfo(unittest.skip('Skipped!'), 'TestCudaFuserOpInfo'),
)
),
BinaryUfuncInfo(
'jiterator_binary',
op=torch.cuda.jiterator._create_jit_fn(
"template <typename T> T binary(T x, T y, T alpha) { return x + alpha * y; }", alpha=1),
ref=lambda input, other, *, alpha=1: np.add(input, other) if alpha == 1 \
else np.add(input, np.multiply(alpha, other)),
dtypes=all_types_and_complex_and(torch.bfloat16, torch.float16, torch.bool),
sample_inputs_func=partial(sample_inputs_jiterator, num_inputs=2, alpha=-3.14),
supports_out=False,
supports_autograd=False, # jiterator ops doesn't have backward defined
supports_rhs_python_scalar=False,
decorators=[onlyCUDA],
skips=(
# Jiterator ops doesn't support neg or conj view
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_conj_view'),
# Jiterator ops doesn't support CompositeCompliantTensor
# Following test should expectedFailure, but it's causing cascading failures in CUDA, thus skipped
DecorateInfo(unittest.skip("skip"), 'TestCompositeCompliance', 'test_operator'),
# Expected failure: torch.jiterator_binary is not a valid op
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# Skip Nvfuser
DecorateInfo(unittest.skip('Skipped!'), 'TestCudaFuserOpInfo'),
)
),
OpInfo(
'jiterator_4inputs_with_extra_args',
op=torch.cuda.jiterator._create_jit_fn(
"template <typename T> T binary(T i0, T i1, T i2, T i3, T alpha, T beta) { return alpha * i0 + beta * i1 + i2 + i3; }",
alpha=1, beta=1),
ref=lambda i0, i1, i2, i3, *, alpha=1, beta=1: alpha * i0 + beta * i1 + i2 + i3,
dtypes=all_types_and_complex_and(torch.bfloat16, torch.float16, torch.bool),
sample_inputs_func=partial(sample_inputs_jiterator, num_inputs=4, alpha=3.14, beta=-4.20),
supports_out=False,
supports_autograd=False, # jiterator ops doesn't have backward defined
decorators=[onlyCUDA],
skips=(
# Jiterator ops doesn't support neg or conj view
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_conj_view'),
# Jiterator ops doesn't support CompositeCompliantTensor
# Following test should expectedFailure, but it's causing cascading failures in CUDA, thus skipped
DecorateInfo(unittest.skip("skip"), 'TestCompositeCompliance', 'test_operator'),
# Expected failure: torch.jiterator_4inputs_with_extra_args is not a valid op
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# Skip Nvfuser
DecorateInfo(unittest.skip('Skipped!'), 'TestCudaFuserOpInfo'),
)
),
BinaryUfuncInfo(
'jiterator_binary_return_by_ref',
op=torch.cuda.jiterator._create_multi_output_jit_fn(
"""
template <typename T>
void binary_return_by_ref(T i0, T i1, T& out0) {
out0 = i0 + i1;
}
""",
num_outputs=1),
ref=operator.add,
dtypes=all_types_and_complex_and(torch.bfloat16, torch.float16, torch.bool),
sample_inputs_func=partial(sample_inputs_jiterator, num_inputs=2, alpha=-0.42),
supports_out=False,
supports_autograd=False, # jiterator ops doesn't have backward defined
supports_rhs_python_scalar=False,
decorators=[onlyCUDA],
skips=(
# Jiterator ops doesn't support neg or conj view
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_conj_view'),
# Jiterator ops doesn't support CompositeCompliantTensor
# Following test should expectedFailure, but it's causing cascading failures in CUDA, thus skipped
DecorateInfo(unittest.skip("skip"), 'TestCompositeCompliance', 'test_operator'),
# Expected failure: torch.jiterator_4inputs_with_extra_args is not a valid op
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# Skip Nvfuser
DecorateInfo(unittest.skip('Skipped!'), 'TestCudaFuserOpInfo'),
)
),
OpInfo(
'jiterator_2inputs_2outputs',
op=torch.cuda.jiterator._create_multi_output_jit_fn(
"""
template <typename T>
void binary_2outputs(T i0, T i1, T& out0, T& out1) {
out0 = i0 + i1;
out1 = i0 - i1;
}
""",
num_outputs=2),
ref=lambda i0, i1, *, alpha=1: (i0 + i1, i0 - i1),
dtypes=all_types_and_complex_and(torch.bfloat16, torch.float16, torch.bool),
sample_inputs_func=partial(sample_inputs_jiterator, num_inputs=2),
supports_out=False,
supports_autograd=False, # jiterator ops doesn't have backward defined
decorators=[onlyCUDA],
skips=(
# Jiterator ops doesn't support neg or conj view
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_conj_view'),
# Jiterator ops doesn't support CompositeCompliantTensor
# Following test should expectedFailure, but it's causing cascading failures in CUDA, thus skipped
DecorateInfo(unittest.skip("skip"), 'TestCompositeCompliance', 'test_operator'),
# Expected failure: torch.jiterator_4inputs_with_extra_args is not a valid op
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# Skip Nvfuser
DecorateInfo(unittest.skip('Skipped!'), 'TestCudaFuserOpInfo'),
)
),
# `torch.norm` has multiple code paths depending on the value of `p`.
# These paths have different dtype support. Also JIT supports,
# most variants but not all of them. So we split the OpInfo entries,
# for `norm` based on the code-paths and JIT support.
OpInfo(
"norm",
sample_inputs_func=sample_inputs_norm,
dtypes=floating_and_complex_types_and(torch.float16, torch.bfloat16, torch.chalf),
dtypesIfCUDA=floating_and_complex_types_and(torch.float16, torch.bfloat16),
# TODO Benchmark again with the new implementation
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
check_batched_forward_grad=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# Dispatches in Python to vector_norm. Not sure how to make this test happy
# Happens to pass on complex64. Also a mystery
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit',
dtypes=(torch.float32,)),)
),
OpInfo('norm',
variant_test_name='nuc',
sample_inputs_func=sample_inputs_norm_nuc,
decorators=[skipCUDAIfNoMagmaAndNoCusolver, skipCPUIfNoLapack],
check_batched_gradgrad=False,
# torch.autograd.gradcheck.GradcheckError: While computing batched gradients
# got: Could not allocate memory to change Tensor SizesAndStrides!
check_batched_forward_grad=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
dtypes=floating_and_complex_types(),
dtypesIfCUDA=floating_and_complex_types(),
skips=(
# Dispatches in Python to matrix_norm. Not sure how to make this test happy
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit',
dtypes=(torch.complex64, torch.float32,)),)
),
OpInfo('norm',
variant_test_name='fro',
sample_inputs_func=sample_inputs_norm_fro,
dtypes=floating_and_complex_types_and(torch.bfloat16, torch.float16),
dtypesIfCUDA=floating_and_complex_types_and(torch.float16, torch.bfloat16),
supports_forward_ad=True,
# torch.autograd.gradcheck.GradcheckError: While computing batched gradients
# got: Could not allocate memory to change Tensor SizesAndStrides!
check_batched_forward_grad=False,
supports_fwgrad_bwgrad=True,
skips=(
# MPS has some mild accuracy issues for float16. We divide the tolerances by 10
DecorateInfo(
toleranceOverride({torch.float16: tol(atol=1e-4, rtol=0.01)}),
'TestConsistency',
'test_output_match',
),
# Issue with conj and torch dispatch, see https://github.com/pytorch/pytorch/issues/82479
DecorateInfo(
unittest.skip("Skipped!"),
'TestSchemaCheckModeOpInfo',
'test_schema_correctness',
dtypes=(torch.complex64, torch.complex128)),
# Dispatches in Python to vector_norm. Not sure how to make this test happy
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit',
dtypes=(torch.complex64, torch.float32,)),)
),
OpInfo(
"norm",
variant_test_name="inf",
sample_inputs_func=sample_inputs_norm_inf,
dtypes=floating_and_complex_types_and(torch.float16, torch.bfloat16, torch.chalf),
dtypesIfCUDA=floating_and_complex_types_and(torch.float16, torch.bfloat16),
supports_forward_ad=True,
check_batched_forward_grad=False,
supports_fwgrad_bwgrad=True,
# fast gradcheck produces NaNs
gradcheck_fast_mode=False,
skips=(
DecorateInfo(
toleranceOverride({torch.float16: tol(atol=2e-3, rtol=1e-3)}),
'TestInductorOpInfo', 'test_comprehensive', device_type='cuda',
),
# Dispatches in Python to vector_norm. Not sure how to make this test happy
# Happens to pass on complex64. Also a mystery
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit',
dtypes=(torch.float32,))
),
),
OpInfo('t',
sample_inputs_func=sample_inputs_t,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
# vmap does not support inplace views
check_inplace_batched_forward_grad=False,
autodiff_fusible_nodes=[], # aliases inputs, shouldn't be fused
autodiff_nonfusible_nodes=[], # aliases inputs, shouldn't be fused
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
assert_autodiffed=True,
error_inputs_func=error_inputs_t),
OpInfo('t_copy',
sample_inputs_func=sample_inputs_t,
supports_out=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
# vmap does not support inplace views
check_inplace_batched_forward_grad=False,
autodiff_fusible_nodes=[], # aliases inputs, shouldn't be fused
autodiff_nonfusible_nodes=[], # aliases inputs, shouldn't be fused
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
assert_autodiffed=True,
error_inputs_func=error_inputs_t),
OpInfo(
"nn.functional.dropout",
op=lambda input, *args, **kwargs:
wrapper_set_seed(torch.nn.functional.dropout, input, *args, **kwargs),
dtypes=floating_types_and(torch.float16, torch.bfloat16),
skips=(
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# Probably because we have used lambda for the op here
# AssertionError: JIT Test does not execute any logic
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# inplace variant dispatches to dropout kernel, while on CUDA
# the op dispatches to _fused_dropout (with a few more conditions)
# hence, different values and this skip here
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_neg_view', device_type='cuda'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu')),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# https://github.com/pytorch/pytorch/issues/66357
check_batched_forward_grad=False,
supports_out=False,
sample_inputs_func=sample_inputs_dropout,
inplace_variant=lambda input, *args, **kwargs:
wrapper_set_seed(torch.nn.functional.dropout, input, *args, **kwargs, inplace=True)),
OpInfo(
"native_dropout_backward",
op=torch.ops.aten.native_dropout_backward.default,
aten_name="native_dropout_backward",
dtypes=all_types_and(torch.float16, torch.bfloat16, torch.bool),
dtypesIfCUDA=floating_types_and(torch.float16, torch.bfloat16),
supports_out=False,
sample_inputs_func=sample_inputs_dropout_backward,
skips=(
DecorateInfo(unittest.skip('Skipped!'), 'TestJit', 'test_variant_consistency_jit'),
# Lazy tensor failures
DecorateInfo(unittest.skip('Skipped!'), 'TestLazyOpInfo', 'test_dispatched_to_lazy'),
# These tests fail only when built with ASAN
DecorateInfo(unittest.skip("Fails with ASAN"), 'TestLazyOpInfo', 'test_correctness', active_if=TEST_WITH_ASAN),
DecorateInfo(
unittest.skip("Fails with ASAN"),
'TestLazyOpInfo',
'test_correctness_with_reusing_ir',
active_if=TEST_WITH_ASAN
),
),
),
OpInfo(
"nn.functional.dropout2d",
op=lambda input, *args, **kwargs:
wrapper_set_seed(torch.nn.functional.dropout2d, input, *args, **kwargs),
dtypes=floating_types_and(torch.float16, torch.bfloat16),
skips=(
# lambda impl
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu')),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False,
check_batched_forward_grad=False,
# As per the docs, valid input dims are (3, 4)
sample_inputs_func=partial(sample_inputs_dropout, valid_input_dim=(3, 4)),
inplace_variant=lambda input, *args, **kwargs:
wrapper_set_seed(torch.nn.functional.dropout2d, input, *args, **kwargs, inplace=True)),
OpInfo(
"nn.functional.dropout3d",
op=lambda input, *args, **kwargs:
wrapper_set_seed(torch.nn.functional.dropout3d, input, *args, **kwargs),
dtypes=floating_types_and(torch.float16, torch.bfloat16),
skips=(
# lambda impl
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu')),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False,
check_batched_forward_grad=False,
# As per the docs, valid input dims are (4, 5)
sample_inputs_func=partial(sample_inputs_dropout, valid_input_dim=(4, 5)),
inplace_variant=lambda input, *args, **kwargs:
wrapper_set_seed(torch.nn.functional.dropout3d, input, *args, **kwargs, inplace=True)),
OpInfo(
"nn.functional.alpha_dropout",
op=lambda input, *args, **kwargs:
wrapper_set_seed(torch.nn.functional.alpha_dropout, input, *args, **kwargs),
dtypes=floating_types_and(torch.float16, torch.bfloat16),
gradcheck_wrapper=wrapper_set_seed,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False,
sample_inputs_func=sample_inputs_dropout,
check_batched_forward_grad=False,
inplace_variant=lambda input, *args, **kwargs:
wrapper_set_seed(torch.nn.functional.alpha_dropout, input, *args, **kwargs, inplace=True),
skips=(
# lambda impl
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# AssertionError: Tensor-likes are not close!
# Fails in cuda11.7
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_compare_cpu', device_type='cuda'),
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),),),
# In training mode, feature_alpha_dropout currently doesn't support inputs of complex dtype
# unlike when `train=False`, it supports complex inputs, hence 2 OpInfos to cover all cases
OpInfo(
"nn.functional.feature_alpha_dropout",
op=lambda input, *args, **kwargs:
wrapper_set_seed(torch.nn.functional.feature_alpha_dropout, input, *args, **kwargs),
variant_test_name="with_train",
dtypes=floating_types_and(torch.float16, torch.bfloat16),
skips=(
# lambda impl
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
# torch.autograd.gradcheck.GradcheckError: While computing batched gradients, got:
# vmap: We do not yet support calling random operations inside of vmap.
# Please perform random operations outside of vmap as a workaround
DecorateInfo(unittest.expectedFailure, 'TestFwdGradients', "test_forward_mode_AD"),
DecorateInfo(unittest.expectedFailure, 'TestFwdGradients', "test_inplace_forward_mode_AD"),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu')),
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False,
# As per the docs, valid input dims are (4, 5)
sample_inputs_func=partial(sample_inputs_dropout, train=True, valid_input_dim=(4, 5)),
inplace_variant=lambda input, *args, **kwargs:
wrapper_set_seed(torch.nn.functional.feature_alpha_dropout, input, *args, **kwargs, inplace=True)),
OpInfo(
"nn.functional.feature_alpha_dropout",
op=lambda input, *args, **kwargs:
wrapper_set_seed(torch.nn.functional.feature_alpha_dropout, input, *args, **kwargs),
variant_test_name="without_train",
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
skips=(
# lambda impl
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),),
gradcheck_wrapper=wrapper_set_seed,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
supports_out=False,
sample_inputs_func=partial(sample_inputs_dropout, train=False),
inplace_variant=lambda input, *args, **kwargs:
wrapper_set_seed(torch.nn.functional.feature_alpha_dropout, input, *args, **kwargs, inplace=True)),
OpInfo(
"nn.functional.one_hot",
ref=reference_one_hot,
supports_out=False,
dtypes=_dispatch_dtypes((torch.int64,)),
sample_inputs_func=sample_inputs_one_hot,
),
OpInfo(
"nn.functional.embedding",
aten_backward_name="embedding_dense_backward",
# We use lambda to reshuffle the positional arguments.
# This is because currently only the `input` field of SampleInput
# is tested in gradient tests.
op=lambda weight, idx, **kwargs: torch.nn.functional.embedding(idx, weight, **kwargs),
dtypes=floating_types_and(torch.bfloat16, torch.float16),
sample_inputs_func=sample_inputs_embedding,
allow_cow_input_materialize_forward=[0],
error_inputs_func=error_inputs_embedding,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
# lambda impl
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# Fails on CI https://github.com/pytorch/pytorch/issues/85377
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_compare_cpu'),
# Reference: https://github.com/pytorch/pytorch/issues/67084
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_neg_view', device_type='cuda'),
# Not a problem: embedding does weird stuff to its input (it renormalizes)
DecorateInfo(unittest.skip('Allowed exemption'), 'TestCompositeCompliance', 'test_operator'),
# Fails due to non-determinism (see issue #74679)
# TODO: Investigate why more granular skips in the test don't work in CI
DecorateInfo(unittest.skip('Skipped!'),
'TestExpandedWeightFunctional',
'test_expanded_weight_forward'),
),
supports_expanded_weight=True,
supports_out=False,
),
OpInfo(
"nn.functional.embedding_bag",
# We use lambda to reshuffle the positional arguments.
# This is because currently only the `input` field of SampleInput
# is tested in gradient tests.
op=lambda weight, idx, **kwargs: torch.nn.functional.embedding_bag(idx, weight, **kwargs),
dtypes=floating_types_and(torch.bfloat16, torch.float16),
dtypesIfCUDA=floating_types_and(torch.bfloat16, torch.float16),
# backward is not supported for mode `max` and dtype `bfloat16`
backward_dtypesIfCUDA=floating_types_and(torch.float16),
sample_inputs_func=sample_inputs_embedding_bag,
skips=(
# lambda impl
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.expectedFailure, 'TestNormalizeOperators', 'test_normalize_operator_exhaustive'),
# Not a problem: embedding_bag does weird stuff to its input (it renormalizes)
DecorateInfo(unittest.skip('Allowed exemption'), 'TestCompositeCompliance', 'test_operator'),
),
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
supports_out=False,
supports_gradgrad=False,
allow_cow_input_materialize_forward=[0],
),
OpInfo(
"nn.functional.multi_head_attention_forward",
op=lambda input, *args, **kwargs:
wrapper_set_seed(torch.nn.functional.multi_head_attention_forward, input, *args, **kwargs),
dtypes=floating_types_and(torch.bfloat16, torch.float16),
sample_inputs_func=sample_inputs_multi_head_attention_forward,
skips=(
# Tensor-likes are not close
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_noncontiguous_samples', dtypes=(torch.float32,)),
DecorateInfo(toleranceOverride({torch.float32: tol(atol=5e-3, rtol=0)}), 'TestDecomp', 'test_comprehensive'),
# TODO skip this for now since we can't skip on runtime arch support (taken from scaled_dot_product_attention)
DecorateInfo(unittest.skip("Skipped!"), 'TestInductorOpInfo', 'test_comprehensive'),
# randomness
DecorateInfo(unittest.skip("Skipped!"), 'TestFwdGradients', 'test_forward_mode_AD'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
# lambda impl
# AssertionError: JIT Test does not execute any logic
DecorateInfo(unittest.expectedFailure, 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.expectedFailure, "TestNormalizeOperators", "test_normalize_operator_exhaustive"),
# tests running very slowly break slow tests, so we skip them instead of using `slowTest`.
DecorateInfo(unittest.skip("Skipped!"), 'TestCompositeCompliance', 'test_forward_ad'),
DecorateInfo(unittest.skip("Skipped!"), 'TestCompositeCompliance', 'test_operator'),
DecorateInfo(
unittest.skip("Skipped - baddbmm decomp does not have enough precision for 16-bit float"),
'TestDecomp',
'test_comprehensive',
dtypes=(torch.bfloat16, torch.float16),
),
DecorateInfo(
unittest.skip("Skipped - baddbmm decomp does not have enough precision for 16-bit float"),
'TestDecomp',
'test_quick',
dtypes=(torch.bfloat16, torch.float16))),
supports_out=False,
supports_gradgrad=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
),
UnaryUfuncInfo(
"nn.functional.softplus",
aten_backward_name='softplus_backward',
ref=reference_softplus,
sample_kwargs=lambda device, dtype, input: ({'beta': 3, 'threshold': .2}, {'beta': 3, 'threshold': .2}),
sample_inputs_func=partial(sample_inputs_elementwise_unary, op_kwargs={'beta': 3, 'threshold': .2}),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
dtypes=floating_types_and(torch.bfloat16, torch.float16),
decorators=(
DecorateInfo(
toleranceOverride
({
torch.half: tol(atol=1e-2, rtol=1e-2),
torch.bfloat16: tol(atol=1e-2, rtol=1e-2),
}),
'TestUnaryUfuncs'),
),
),
OpInfo(
"nn.functional.mse_loss",
aten_backward_name='mse_loss_backward',
ref=loss_reference_reduction_wrapper(lambda input, target: (input - target) ** 2),
sample_inputs_func=sample_inputs_loss,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
dtypes=floating_types_and(torch.float16, torch.bfloat16),
skips=(
# RuntimeError: input->type()->kind() == TypeKind::OptionalType
# INTERNAL ASSERT FAILED at "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":252,
# please report a bug to PyTorch.
DecorateInfo(unittest.expectedFailure, "TestJit", "test_variant_consistency_jit", dtypes=(torch.float32,),),
),
),
OpInfo(
"nn.functional.grid_sample",
dtypes=floating_types_and(torch.float16, torch.bfloat16),
supports_out=False,
sample_inputs_func=sample_inputs_grid_sample,
reference_inputs_func=reference_inputs_grid_sample,
supports_gradgrad=False,
gradcheck_nondet_tol=1e-15),
# TODO: delete this OpInfo once we add meta support for grid_sampler_3d
OpInfo(
"grid_sampler_2d",
dtypes=floating_types_and(torch.float16, torch.bfloat16),
supports_out=False,
sample_inputs_func=sample_inputs_grid_sampler_2d,
supports_gradgrad=False,
gradcheck_nondet_tol=1e-15,
skips=(
DecorateInfo(slowTest, 'TestDecomp', 'test_comprehensive', dtypes=(torch.float32, torch.float64),
active_if=IS_WINDOWS),
),),
OpInfo(
"argwhere",
ref=np.argwhere,
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
supports_out=False,
supports_autograd=False,
sample_inputs_func=sample_inputs_argwhere,
skips=(
# Compiler issue on ROCm. Might need to skip until ROCm5.5
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_non_standard_bool_values',
dtypes=[torch.bool], active_if=TEST_WITH_ROCM),
),
),
ReductionOpInfo(
'all',
identity=True,
supports_autograd=False,
result_dtype=torch.bool,
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
ref=reference_reduction_numpy(np.all),
skips=(
# FIXME: uint8 input returns uint8 instead of bool
DecorateInfo(unittest.expectedFailure, 'TestReductions', 'test_result_dtype', dtypes=[torch.uint8]),
),
),
ReductionOpInfo(
'any',
identity=False,
supports_autograd=False,
result_dtype=torch.bool,
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
ref=reference_reduction_numpy(np.any),
skips=(
# FIXME: uint8 input returns uint8 instead of bool
DecorateInfo(unittest.expectedFailure, 'TestReductions', 'test_result_dtype', dtypes=[torch.uint8]),
),
),
ReductionOpInfo(
'amax',
nan_policy='propagate',
supports_forward_ad=True,
check_batched_forward_grad=False,
supports_fwgrad_bwgrad=True,
dtypes=all_types_and(torch.float16, torch.bfloat16, torch.bool),
ref=reference_reduction_numpy(np.amax),
skips=(
# FIXME: reduces all dimensions when dim=[]
DecorateInfo(unittest.expectedFailure, 'TestReductions', 'test_dim_empty'),
DecorateInfo(unittest.expectedFailure, 'TestReductions', 'test_dim_empty_keepdim'),
),
error_inputs_func=error_inputs_aminmax_amax_amin,
),
ReductionOpInfo(
'amin',
nan_policy='propagate',
supports_forward_ad=True,
check_batched_forward_grad=False,
supports_fwgrad_bwgrad=True,
dtypes=all_types_and(torch.float16, torch.bfloat16, torch.bool),
ref=reference_reduction_numpy(np.amin),
skips=(
# FIXME: reduces all dimensions when dim=[]
DecorateInfo(unittest.expectedFailure, 'TestReductions', 'test_dim_empty'),
DecorateInfo(unittest.expectedFailure, 'TestReductions', 'test_dim_empty_keepdim'),
),
error_inputs_func=error_inputs_aminmax_amax_amin,
),
ReductionOpInfo(
'argmax',
supports_multiple_dims=False,
supports_autograd=False,
assert_jit_shape_analysis=True,
result_dtype=torch.int64,
dtypes=all_types_and(torch.float16, torch.bfloat16),
ref=reference_reduction_numpy(np.argmax, supports_keepdims=False),
),
ReductionOpInfo(
'argmin',
supports_multiple_dims=False,
supports_autograd=False,
result_dtype=torch.int64,
dtypes=all_types_and(torch.float16, torch.bfloat16),
ref=reference_reduction_numpy(np.argmin, supports_keepdims=False),
),
ReductionOpInfo(
'count_nonzero',
identity=0,
supports_out=False,
supports_autograd=False,
result_dtype=torch.int64,
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
sample_inputs_func=sample_inputs_reduction_count_nonzero,
ref=reference_reduction_numpy(np.count_nonzero),
skips=(
# FIXME: count_nonzero does not accept keepdim kwarg
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_default_keepdim'),
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_none_keepdim'),
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_single_keepdim'),
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_empty_keepdim'),
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_multi_keepdim'),
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_multi_unsorted_keepdim'),
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_offbounds_keepdim'),
# FIXME: dim=[] reduces all dimensions
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_empty'),
),
),
ReductionOpInfo(
'mean',
nan_policy='propagate',
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# FIXME: mean needs 'dim' parameter when using the 'out' overload.
# Adding it with 'generate_args_kwargs' does not work, since these also get passed
# onto the reference implementations.
supports_out=True,
assert_autodiffed=True,
assert_jit_shape_analysis=True,
promotes_int_to_float=True,
dtypes=floating_and_complex_types_and(torch.float16, torch.bfloat16),
ref=reference_reduction_numpy(np.mean),
error_inputs_func=error_inputs_mean,
skips=(
# AssertionError: RuntimeError not raised : Expected RuntimeError when doing an unsafe cast from a result
# of dtype torch.float32 into an out= with dtype torch.long
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_out', device_type='cuda', dtypes=[torch.float32]),
# FIXME: mean does not support passing keepdim without passing dim
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_default_keepdim'),
# FIXME: mean reduces all dimensions when dim=[]
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_empty'),
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_empty_keepdim'),
# FIXME: improve precision
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_ref_small_input',
dtypes=[torch.float16]),
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_ref_extremal_values',
device_type='cuda', dtypes=[torch.complex64]),
),
),
ReductionOpInfo(
'nanmean',
nan_policy='omit',
assert_autodiffed=True,
promotes_int_to_float=True,
supports_forward_ad=True,
check_batched_forward_grad=False,
supports_fwgrad_bwgrad=True,
dtypes=floating_types_and(torch.float16, torch.bfloat16),
dtypesIfCUDA=floating_and_complex_types_and(torch.float16, torch.bfloat16, torch.chalf),
sample_inputs_func=sample_inputs_nan_reduction(supports_multiple_dims=True),
ref=reference_reduction_numpy(np.nanmean),
skips=(
# AssertionError: False is not true :
# Failure in testing nodes' autodifferentiation.
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit'),
# FIXME: prod reduces all dimensions when dim=[]
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_empty'),
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_empty_keepdim'),
# FIXME: improve precision
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_ref_small_input',
dtypes=[torch.float16]),
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_ref_duplicate_values',
device_type='cuda', dtypes=[torch.float16]),
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_ref_extremal_values',
device_type='cuda', dtypes=[torch.complex64]),
DecorateInfo(toleranceOverride({torch.float16: tol(atol=2e-5, rtol=4e-2)}),
"TestConsistency", "test_output_match", device_type="mps"),
),
),
ReductionOpInfo(
'std',
nan_policy='propagate',
supports_out=True,
complex_to_real=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_autodiffed=True,
promotes_int_to_float=True,
check_batched_forward_grad=False,
dtypes=floating_and_complex_types_and(torch.half, torch.bfloat16),
dtypesIfCUDA=floating_and_complex_types_and(torch.half, torch.bfloat16),
sample_inputs_func=sample_inputs_std_var,
ref=reference_std_var(np.std),
generate_args_kwargs=generate_std_var_kwargs,
skips=(
# FIXME: cannot specify keepdim without dim
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_default_keepdim'),
# FIXME: dim=[] reduces all dimensions
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_empty'),
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_empty_keepdim'),
# FIXME: improve precision
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_ref_small_input',
dtypes=(torch.float16,)),
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_ref_duplicate_values',
dtypes=(torch.float16,)),
),
),
ReductionOpInfo(
'std',
variant_test_name='unbiased',
nan_policy='propagate',
supports_out=False,
complex_to_real=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_autodiffed=True,
promotes_int_to_float=True,
check_batched_forward_grad=False,
dtypes=floating_and_complex_types_and(torch.half, torch.bfloat16),
dtypesIfCUDA=floating_and_complex_types_and(torch.half, torch.bfloat16),
sample_inputs_func=sample_inputs_std_var_unbiased,
skips=(
# FIXME: dim=[] reduces all dimensions
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_empty'),
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_empty_keepdim'),
),
),
ReductionOpInfo(
'var',
nan_policy='propagate',
supports_out=True,
assert_autodiffed=True,
promotes_int_to_float=True,
complex_to_real=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
check_batched_forward_grad=False,
dtypes=floating_and_complex_types_and(torch.half, torch.bfloat16),
dtypesIfCUDA=floating_and_complex_types_and(torch.half, torch.bfloat16),
sample_inputs_func=sample_inputs_std_var,
ref=reference_std_var(np.var),
generate_args_kwargs=generate_std_var_kwargs,
skips=(
# FIXME: cannot specify keepdim without dim
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_default_keepdim'),
# FIXME: dim=[] reduces all dimensions
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_empty'),
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_empty_keepdim'),
# FIXME: improve precision
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_ref_small_input'),
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_ref_duplicate_values'),
# NumPy is giving NaN for this
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_ref_large_input'),
),
),
ReductionOpInfo(
'var',
variant_test_name='unbiased',
nan_policy='propagate',
supports_out=False,
complex_to_real=True,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_autodiffed=True,
promotes_int_to_float=True,
check_batched_forward_grad=False,
dtypes=floating_and_complex_types_and(torch.half, torch.bfloat16),
dtypesIfCUDA=floating_and_complex_types_and(torch.half, torch.bfloat16),
sample_inputs_func=sample_inputs_std_var_unbiased,
skips=(
# FIXME: dim=[] reduces all dimensions
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_empty'),
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_empty_keepdim'),
),
),
ReductionOpInfo(
'prod',
identity=1,
nan_policy='propagate',
supports_multiple_dims=False,
# https://github.com/pytorch/pytorch/issues/80411
gradcheck_fast_mode=True,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_int64=True,
gradcheck_nondet_tol=GRADCHECK_NONDET_TOL,
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16),
dtypesIfCUDA=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
sample_inputs_func=sample_inputs_prod,
ref=prod_numpy,
skips=(
# FIXME: prod does not support passing keepdim without passing dim
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_default_keepdim'),
# FIXME: prod reduces all dimensions when dim=[]
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_empty'),
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_empty_keepdim'),
# FIXME: prod does not support passing None to dim
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_none'),
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_none_keepdim'),
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_ref_small_input',
dtypes=[torch.float16, torch.complex64]),
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_ref_duplicate_values',
dtypes=[torch.uint8, torch.float16, torch.complex64]),
# FIXME: ValueError: The data in MaskedTensor a and Tensor b do not match
DecorateInfo(unittest.skip("Skipped!"), 'TestOperators', 'test_reduction_all',
dtypes=[torch.float16]),
),
),
ReductionOpInfo(
'sum',
identity=0,
nan_policy='propagate',
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
promotes_int_to_int64=True,
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
dtypesIfCUDA=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
ref=reference_reduction_numpy(np.sum),
error_inputs_sparse_func=error_inputs_sparse_reduction_sum,
sample_inputs_sparse_coo_func=partial(sample_inputs_sparse_reduction_sum, layout=torch.sparse_coo),
sample_inputs_sparse_csr_func=partial(sample_inputs_sparse_reduction_sum, layout=torch.sparse_csr),
sample_inputs_sparse_csc_func=partial(sample_inputs_sparse_reduction_sum, layout=torch.sparse_csc),
sample_inputs_sparse_bsr_func=partial(sample_inputs_sparse_reduction_sum, layout=torch.sparse_bsr),
sample_inputs_sparse_bsc_func=partial(sample_inputs_sparse_reduction_sum, layout=torch.sparse_bsc),
skips=(
# FIXME: sum does not support passing keepdim without passing dim
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_default_keepdim'),
# FIXME: sum reduces all dimensions when dim=[]
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_empty'),
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_empty_keepdim'),
# FIXME: improve precision
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_ref_small_input',
dtypes=[torch.float16]),
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_ref_duplicate_values',
dtypes=[torch.float16]),
DecorateInfo(unittest.skip("Skipped!"), 'TestOperators', 'test_reduction_all',
dtypes=[torch.float32]),
),
),
ReductionOpInfo(
'nansum',
identity=0,
nan_policy='omit',
supports_out=True,
promotes_int_to_int64=True,
supports_forward_ad=True,
check_batched_forward_grad=False,
supports_fwgrad_bwgrad=True,
dtypes=all_types_and(torch.bool, torch.float16, torch.bfloat16),
dtypesIfCUDA=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
sample_inputs_func=sample_inputs_nan_reduction(supports_multiple_dims=True),
ref=reference_reduction_numpy(np.nansum),
skips=(
# please report a bug to PyTorch.
DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit'),
# FIXME: nansum reduces all dimensions when dim=[]
DecorateInfo(unittest.expectedFailure, 'TestReductions', 'test_dim_empty'),
DecorateInfo(unittest.expectedFailure, 'TestReductions', 'test_dim_empty_keepdim'),
# FIXME: flaky test so skipped instead of xfailed
# possibly bad low precision reference in numpy
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_ref_small_input',
dtypes=[torch.float16]),
DecorateInfo(toleranceOverride({torch.float16: tol(atol=3e-3, rtol=4e-2)}),
"TestConsistency", "test_output_match", device_type="mps"),
),
),
OpInfo(
"nn.functional.ctc_loss",
dtypes=floating_types(),
supports_out=False,
sample_inputs_func=sample_inputs_ctc_loss,
skips=(
# https://github.com/pytorch/pytorch/issues/67462
# torch.autograd.gradcheck.GradcheckError: Jacobian mismatch for output 0 with respect to input 0
DecorateInfo(
unittest.expectedFailure,
"TestBwdGradients",
"test_fn_grad",
dtypes=(torch.float64,),
),
# RuntimeError: derivative for aten::_ctc_loss_backward is not implemented
DecorateInfo(
unittest.expectedFailure,
"TestBwdGradients",
"test_fn_gradgrad",
dtypes=(torch.float64,),
),
# RuntimeError: derivative for aten::_ctc_loss_backward is not implemented
DecorateInfo(
unittest.skip("Skipped!"),
"TestJit",
"test_variant_consistency_jit",
dtypes=(torch.float32,),
),
# Ref: https://github.com/pytorch/pytorch/issues/85231
DecorateInfo(unittest.skip("Fails with ASAN"),
'TestProxyTensorOpInfo',
'test_make_fx_fake_exhaustive', active_if=TEST_WITH_ASAN),
),
),
OpInfo(
"nn.functional.cosine_embedding_loss",
dtypes=all_types_and(torch.half, torch.bfloat16, torch.bool),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
decorators=[
DecorateInfo(
toleranceOverride({torch.float16: tol(atol=1e-4, rtol=2e-3)}),
'TestInductorOpInfo', 'test_comprehensive', device_type="cuda",
),
],
sample_inputs_func=sample_inputs_cosine_embedding_loss,
),
OpInfo(
"nn.functional.nll_loss",
dtypes=floating_types_and(torch.float16, torch.bfloat16),
supports_out=False,
sample_inputs_func=sample_inputs_nll_loss,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
assert_jit_shape_analysis=True,
skips=(
# RuntimeError:
# undefined value tensor:
# File "<string>", line 3
# def the_method(i0, i1):
# return torch.nn.functional.nll_loss(i0, i1, weight=tensor([8.4784, 1.7658, 4.3228], dtype=torch.float32))
# ~~~~~~ <--- HERE
DecorateInfo(unittest.skip("Skipped!"), "TestJit", "test_variant_consistency_jit", dtypes=(torch.float32,),),
# Fails for unknown reason: https://github.com/pytorch/pytorch/issues/120782
DecorateInfo(
unittest.skip("Skipped!"),
"TestCompositeCompliance",
"test_cow_input",
device_type='cuda',
),
DecorateInfo(unittest.skip("FP16 nll_loss cases have not been enabled on MPS yet"),
dtypes=(torch.half,), device_type="mps"),
),
),
OpInfo(
"nn.functional.gaussian_nll_loss",
dtypes=floating_types_and(torch.half, torch.bfloat16),
# Runs very slowly on slow gradcheck - alternatively reduce input sizes
gradcheck_fast_mode=True,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_gaussian_nll_loss,
error_inputs_func=error_inputs_gaussian_nll_loss,
skips=(
# Pre-existing condition (calls .item); needs to be fixed
DecorateInfo(unittest.expectedFailure, 'TestCompositeCompliance', 'test_backward'),
DecorateInfo(unittest.expectedFailure, 'TestCompositeCompliance', 'test_forward_ad'),
# Pre-existing condition (calls .item); needs to be fixed
DecorateInfo(unittest.expectedFailure, 'TestCompositeCompliance', 'test_operator'),
# JIT does not support variadic tensors.
# RuntimeError: input->type()->kind() == TypeKind::OptionalType
# INTERNAL ASSERT FAILED at "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":270,
# please report a bug to PyTorch.
DecorateInfo(unittest.skip("Skipped!"), "TestJit", "test_variant_consistency_jit", dtypes=(torch.float32,),),
DecorateInfo(toleranceOverride({torch.float16: tol(atol=8e-3, rtol=2e-3)}),
"TestConsistency", "test_output_match", device_type="mps"),
DecorateInfo(toleranceOverride({torch.float16: tol(atol=8e-3, rtol=2e-3)}),
"TestConsistency", "test_output_grad_match", device_type="mps"),
),
),
OpInfo(
"nn.functional.hinge_embedding_loss",
dtypes=floating_types_and(torch.half, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_hinge_embedding_loss,
error_inputs_func=error_inputs_hinge_embedding_loss,
reference_inputs_func=reference_inputs_hinge_embedding_loss,
),
OpInfo(
"nn.functional.huber_loss",
aten_backward_name='huber_loss_backward',
dtypes=floating_types_and(torch.float16, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
sample_inputs_func=sample_inputs_huber_loss,
error_inputs_func=error_inputs_huber_loss,
skips=(
# JIT does not support variadic tensors.
# RuntimeError: input->type()->kind() == TypeKind::OptionalType
# INTERNAL ASSERT FAILED at "../torch/csrc/jit/passes/utils/check_alias_annotation.cpp":270,
# please report a bug to PyTorch.
DecorateInfo(unittest.skip("Skipped!"), "TestJit", "test_variant_consistency_jit", dtypes=(torch.float32,),),
)
),
OpInfo(
"nn.functional.pdist",
ref=reference_pdist,
sample_inputs_func=sample_inputs_pdist,
dtypes=floating_types(),
supports_out=False,
supports_gradgrad=False,
skips=(
DecorateInfo(unittest.skip("Unsupported on MPS for now"), 'TestCommon', 'test_numpy_ref_mps'),
)
),
OpInfo(
"nn.functional.poisson_nll_loss",
dtypes=all_types_and(torch.half, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_poisson_nll_loss,
error_inputs_func=error_inputs_poisson_nll_loss,
),
OpInfo(
"argsort",
dtypes=all_types_and(torch.bool, torch.float16, torch.bfloat16),
dtypesIfCUDA=all_types_and(torch.bool, torch.float16, torch.bfloat16),
sample_inputs_func=sample_inputs_sort,
supports_out=False,
supports_autograd=False,
skips=(
DecorateInfo(
unittest.skip("Skipped!"),
"TestJit",
"test_variant_consistency_jit",
dtypes=(torch.float32,),
),
DecorateInfo(
unittest.expectedFailure,
"TestCommon",
"test_non_standard_bool_values",
dtypes=[torch.bool],
device_type='cuda',
active_if=not TEST_WITH_ROCM
),
),
),
OpInfo(
"repeat_interleave",
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16, torch.chalf),
backward_dtypesIfCUDA=floating_and_complex_types_and(torch.float16, torch.bfloat16, torch.chalf),
sample_inputs_func=sample_inputs_repeat_interleave,
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
skips=(
DecorateInfo(
unittest.skip("Skipped!"),
"TestJit",
"test_variant_consistency_jit",
dtypes=(torch.float32, torch.complex64),
),
),
),
OpInfo(
"nn.functional.pairwise_distance",
ref=lambda a, b, p=2.0, eps=1e-6, keepdim=False: (
np.sum(np.abs(a - b + eps) ** p, axis=-1, keepdims=keepdim) ** (1 / p)
),
sample_inputs_func=sample_inputs_pairwise_distance,
dtypes=all_types_and_complex_and(torch.float16, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
DecorateInfo(
unittest.skip("Skipped!"),
"TestJit",
"test_variant_consistency_jit",
dtypes=(torch.float32, torch.complex64),
),
),
),
OpInfo(
"nn.functional.pixel_shuffle",
sample_inputs_func=sample_inputs_pixel_shuffle,
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
DecorateInfo(
unittest.skip("Skipped!"),
"TestJit",
"test_variant_consistency_jit",
dtypes=(torch.float32, torch.complex64),
),
),
),
OpInfo(
"nn.functional.pixel_unshuffle",
sample_inputs_func=sample_inputs_pixel_unshuffle,
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
skips=(
DecorateInfo(
unittest.skip("Skipped!"),
"TestJit",
"test_variant_consistency_jit",
dtypes=(torch.float32, torch.complex64),
),
),
),
OpInfo(
"nn.functional.channel_shuffle",
sample_inputs_func=sample_inputs_channel_shuffle,
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
allow_cow_input_materialize_forward=[0],
allow_cow_input_materialize_backward=[0, 'output grad 0'],
skips=(
# Skip due to NotImplementedError for MPS device.
DecorateInfo(unittest.expectedFailure, 'TestConsistency'),
DecorateInfo(unittest.expectedFailure, 'TestDTensorOps', 'test_dtensor_op_db'),
DecorateInfo(unittest.expectedFailure, "TestMeta", "test_dispatch_symbolic_meta_outplace_all_strides"),
),
),
OpInfo(
"nn.functional.kl_div",
sample_inputs_func=sample_inputs_kl_div,
dtypes=floating_types_and(torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
),
OpInfo(
"diagflat",
ref=lambda input, offset=0: np.diagflat(input, k=offset),
sample_inputs_func=sample_inputs_diagflat,
dtypes=all_types_and_complex_and(torch.bool, torch.bfloat16, torch.float16),
dtypesIfCUDA=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
supports_out=False,
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
# See https://github.com/pytorch/pytorch/pull/78358
check_batched_forward_grad=False,
),
OpInfo(
'scatter_reduce',
variant_test_name='sum',
inplace_variant=torch.Tensor.scatter_reduce_,
# complex not added to dtypes as complex gradients are not properly handled
# and scatter_reduce hasn't been added to the whitelist in gen_variable_type yet
dtypes=all_types_and(torch.float16, torch.bfloat16, torch.bool),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_scatter_reduce,
skips=(
# Compiler issue on ROCm. Regression started in ROCm 6.4.
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_non_standard_bool_values',
dtypes=[torch.bool], active_if=TEST_WITH_ROCM),
),
),
OpInfo(
'scatter_reduce',
variant_test_name='prod',
# complex not added to dtypes as complex gradients are not properly handled
# and scatter_reduce hasn't been added to the whitelist in gen_variable_type yet
dtypes=all_types_and(torch.float16, torch.bfloat16, torch.bool),
dtypesIfCUDA=all_types_and(torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
sample_inputs_func=sample_inputs_scatter_reduce,
skips=(
# Not implemented
DecorateInfo(unittest.expectedFailure, 'TestFwdGradients', 'test_forward_mode_AD'),
DecorateInfo(unittest.expectedFailure, 'TestFwdGradients', 'test_inplace_forward_mode_AD'),
DecorateInfo(unittest.expectedFailure, 'TestFwdGradients', 'test_fn_fwgrad_bwgrad'),
),
),
OpInfo(
'scatter_reduce',
variant_test_name='mean',
# complex not added to dtypes as complex gradients are not properly handled
# and scatter_reduce hasn't been added to the whitelist in gen_variable_type yet
dtypes=all_types_and(torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
dtypesIfCUDA=all_types_and(torch.float16, torch.bfloat16),
supports_forward_ad=True,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_scatter_reduce,
),
OpInfo(
'scatter_reduce',
variant_test_name='amin',
dtypes=all_types_and(torch.float16, torch.bfloat16, torch.bool),
dtypesIfCUDA=all_types_and(torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_forward_ad=True,
check_batched_forward_grad=False,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_scatter_reduce,
),
OpInfo(
'scatter_reduce',
variant_test_name='amax',
dtypes=all_types_and(torch.float16, torch.bfloat16, torch.bool),
dtypesIfCUDA=all_types_and(torch.float16, torch.bfloat16),
dtypesIfHpu=custom_types(torch.float32, torch.bfloat16),
supports_forward_ad=True,
check_batched_forward_grad=False,
supports_fwgrad_bwgrad=True,
sample_inputs_func=sample_inputs_scatter_reduce,
),
OpInfo(
'_segment_reduce',
aten_name='segment_reduce',
variant_test_name='lengths',
dtypes=floating_types_and(torch.float16, torch.bfloat16),
supports_out=False,
# RuntimeError: derivative for aten::_segment_reduce_backward is not implemented
supports_gradgrad=False,
sample_inputs_func=sample_inputs_segment_reduce,
skips=(
# FIXME: CUDA driver API confirmed a leak in
# __main__.TestJitCUDA.test_variant_consistency_jit_segment_reduce_cuda_float32
DecorateInfo(
unittest.skip("Skipped!"),
"TestJit",
"test_variant_consistency_jit",
device_type="cuda",
),
),
),
OpInfo(
'_segment_reduce',
aten_name='segment_reduce',
variant_test_name='offsets',
dtypes=floating_types_and(torch.float16, torch.bfloat16),
supports_out=False,
# RuntimeError: derivative for aten::_segment_reduce_backward is not implemented
supports_gradgrad=False,
sample_inputs_func=partial(sample_inputs_segment_reduce, mode='offsets'),
skips=(
# FIXME: CUDA driver API confirmed a leak in
# __main__.TestJitCUDA.test_variant_consistency_jit_segment_reduce_cuda_float32
DecorateInfo(
unittest.skip("Skipped!"),
"TestJit",
"test_variant_consistency_jit",
device_type="cuda",
),
),
),
]
op_db += opinfo.definitions.op_db
# Separate registry for experimental Python Reference OpInfos.
python_ref_db = [
#
# Elementwise Unary OpInfos
#
ElementwiseUnaryPythonRefInfo(
"_refs.abs",
torch_opinfo_name="abs",
skips=(
# Reference: https://github.com/pytorch/pytorch/issues/49224
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_small',
dtypes=[torch.int8], active_if=TEST_WITH_ASAN),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.acos",
torch_opinfo_name="acos",
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_normal',
device_type='cuda', dtypes=[torch.cdouble],
active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
device_type='cuda', dtypes=[torch.cdouble],
active_if=IS_WINDOWS),
# Failing with wrong imaginary sign on at least some Windows jobs
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_small',
device_type='cuda', dtypes=[torch.cdouble],
active_if=IS_WINDOWS),
# Failing with wrong imaginary sign on at least some Windows jobs
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
device_type='cuda', dtypes=[torch.cdouble],
active_if=IS_WINDOWS),
)
),
ElementwiseUnaryPythonRefInfo(
"_refs.acosh",
torch_opinfo_name="acosh",
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_normal',
device_type='cuda', dtypes=[torch.cdouble],
active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
device_type='cuda', dtypes=[torch.cdouble],
active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
device_type='cuda', dtypes=[torch.cdouble],
active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
device_type='cuda', dtypes=[torch.cdouble],
active_if=IS_WINDOWS),
# Failing with wrong imaginary sign on at least some Windows jobs
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_small',
device_type='cuda', dtypes=[torch.cdouble],
active_if=IS_WINDOWS),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.asin",
torch_opinfo_name="asin",
decorators=[
DecorateInfo(
toleranceOverride({torch.float16: tol(atol=1e-05, rtol=1e-03)}),
'TestUnaryUfuncs', device_type='cuda'),
DecorateInfo(
toleranceOverride({torch.complex64: tol(atol=5e-05, rtol=2e-05)}),
'TestUnaryUfuncs', 'test_reference_numerics_extremal', device_type='cpu'
),
precisionOverride({torch.bfloat16: 1e-2}),
],
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
device_type='cuda', dtypes=[torch.cdouble],
active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
device_type='cuda', dtypes=[torch.cdouble],
active_if=IS_WINDOWS),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.asinh",
torch_opinfo_name="asinh",
decorators=(precisionOverride({torch.bfloat16: 5e-2}),),
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_small',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_normal',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
device_type='cuda', dtypes=[torch.cdouble],
active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
device_type='cuda', dtypes=[torch.cdouble],
active_if=IS_WINDOWS),
),
),
PythonRefInfo(
"_refs.lerp",
torch_opinfo_name="lerp",
),
PythonRefInfo(
"_refs.ones",
torch_opinfo_name="ones",
skips=(
# Tests that assume input is a tensor or sequence of tensors
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_conj_view'),
),
),
PythonRefInfo(
"_refs.zeros",
torch_opinfo_name="zeros",
skips=(
# Tests that assume input is a tensor or sequence of tensors
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_conj_view'),
),
),
PythonRefInfo(
"_refs.cauchy",
torch_opinfo_name="cauchy",
decorators=(
# TODO: RuntimeError: no _refs support for torch.rand_like
DecorateInfo(unittest.skip("TODO: RuntimeError: no _refs support for torch.rand_like"),
'TestCommon',
'test_python_ref'),
# AssertionError: Tensor-likes are not close!
DecorateInfo(unittest.skip("Expected: cauchy is not comparable"),
'TestCommon',
'test_out'),
DecorateInfo(unittest.skip("Expected: cauchy is not comparable"),
'TestCommon',
'test_out_warning'),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_executor'),
DecorateInfo(unittest.skip("Expected: cauchy is not comparable"),
'TestCommon',
'test_python_ref_torch_fallback'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
)
),
PythonRefInfo(
"_refs.exponential",
torch_opinfo_name="exponential",
supports_out=True,
decorators=(
# dtypes that do not support check_uniform_bounds of rand_like
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_meta',
dtypes=(torch.int8, torch.uint8, torch.int16, torch.int32, torch.int64)),
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_dtypes'),
# TODO: RuntimeError: no _refs support for torch.rand_like
DecorateInfo(unittest.skip("TODO: RuntimeError: no _refs support for torch.rand_like"),
'TestCommon',
'test_python_ref'),
# AssertionError: Tensor-likes are not close!
DecorateInfo(unittest.skip("Expected: exponential is not comparable"),
'TestCommon',
'test_out'),
DecorateInfo(unittest.skip("Expected: exponential is not comparable"),
'TestCommon',
'test_out_warning'),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_executor'),
DecorateInfo(unittest.skip("Expected: exponential is not comparable"),
'TestCommon',
'test_python_ref_torch_fallback'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
)
),
PythonRefInfo(
"_refs.geometric",
torch_opinfo_name="geometric",
supports_out=True,
decorators=(
# dtypes that do not support check_uniform_bounds of rand_like
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_dtypes'),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_meta',
dtypes=(torch.int8, torch.uint8, torch.int16, torch.int32, torch.int64)),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_torch_fallback',
dtypes=(torch.int8, torch.uint8, torch.int16, torch.int32, torch.int64)),
# TODO: RuntimeError: no _refs support for torch.rand_like
DecorateInfo(unittest.skip("TODO: RuntimeError: no _refs support for torch.rand_like"),
'TestCommon',
'test_python_ref'),
DecorateInfo(unittest.skip("Expected: geometric is not comparable"),
'TestCommon',
'test_python_ref_executor', device_type='cuda'),
# AssertionError: Tensor-likes are not close!
DecorateInfo(unittest.skip("Expected: geometric is not comparable"),
'TestCommon',
'test_out'),
DecorateInfo(unittest.skip("Expected: geometric is not comparable"),
'TestCommon',
'test_out_warning'),
DecorateInfo(unittest.skip("Expected: geometric is not comparable"),
'TestCommon',
'test_python_ref_torch_fallback'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
)
),
PythonRefInfo(
"_refs.log_normal",
torch_opinfo_name="log_normal",
supports_out=True,
decorators=(
# TODO: RuntimeError: no _refs support for torch.rand_like
DecorateInfo(unittest.skip("TODO: RuntimeError: no _refs support for torch.rand_like"),
'TestCommon',
'test_python_ref'),
DecorateInfo(unittest.skip("Expected: log_normal is not comparable"),
'TestCommon',
'test_python_ref_executor', device_type='cuda'),
# AssertionError: Tensor-likes are not close!
DecorateInfo(unittest.skip("Expected: log_normal is not comparable"),
'TestCommon',
'test_out'),
DecorateInfo(unittest.skip("Expected: log_normal is not comparable"),
'TestCommon',
'test_out_warning'),
DecorateInfo(unittest.skip("Expected: log_normal is not comparable"),
'TestCommon',
'test_python_ref_torch_fallback'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
)
),
PythonRefInfo(
"_refs.normal",
torch_opinfo_name="normal",
supports_out=True,
decorators=(
# TODO: RuntimeError: no _refs support for torch.rand_like
DecorateInfo(unittest.skip("TODO: RuntimeError: no _refs support for torch.rand_like"),
'TestCommon',
'test_python_ref'),
# AssertionError: Tensor-likes are not close!
DecorateInfo(unittest.skip("Expected: normal is not comparable"),
'TestCommon',
'test_out'),
DecorateInfo(unittest.skip("Expected: normal is not comparable"),
'TestCommon',
'test_out_warning'),
DecorateInfo(unittest.skip("Expected: normal is not comparable"),
'TestCommon',
'test_python_ref_torch_fallback'),
DecorateInfo(unittest.skip("Expected: normal is not comparable"), 'TestDecomp', 'test_comprehensive'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
DecorateInfo(unittest.skip("make_traced() doesn't set seed properly!"), 'TestCommon', 'test_python_ref_executor'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_conj_view'),
)
),
PythonRefInfo(
"_refs.normal",
torch_opinfo_name="normal",
torch_opinfo_variant_name="number_mean",
supports_out=True,
decorators=(
# TODO: RuntimeError: no _refs support for torch.rand_like
DecorateInfo(unittest.skip("TODO: RuntimeError: no _refs support for torch.rand_like"),
'TestCommon',
'test_python_ref'),
# AssertionError: Tensor-likes are not close!
DecorateInfo(unittest.skip("Expected: normal is not comparable"),
'TestCommon',
'test_out'),
DecorateInfo(unittest.skip("Expected: normal is not comparable"),
'TestCommon',
'test_out_warning'),
DecorateInfo(unittest.skip("Expected: normal is not comparable"),
'TestCommon',
'test_python_ref_torch_fallback'),
DecorateInfo(unittest.skip("Expected: normal is not comparable"), 'TestDecomp', 'test_comprehensive'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
DecorateInfo(unittest.skip("make_traced() doesn't set seed properly!"), 'TestCommon', 'test_python_ref_executor'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_conj_view'),
)
),
PythonRefInfo(
"_refs.normal_",
op=torch.Tensor.normal_,
torch_opinfo_name="normal",
torch_opinfo_variant_name="in_place",
supports_out=False,
decorators=(
# TODO: RuntimeError: no _refs support for torch.rand_like
DecorateInfo(unittest.skip("TODO: RuntimeError: no _refs support for torch.rand_like"),
'TestCommon',
'test_python_ref'),
# AssertionError: Tensor-likes are not close!
DecorateInfo(unittest.skip("Expected: normal is not comparable"),
'TestCommon',
'test_out'),
DecorateInfo(unittest.skip("Expected: normal is not comparable"),
'TestCommon',
'test_out_warning'),
DecorateInfo(unittest.skip("Expected: normal is not comparable"),
'TestCommon',
'test_python_ref_torch_fallback'),
DecorateInfo(unittest.skip("Expected: normal is not comparable"), 'TestDecomp', 'test_comprehensive'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
DecorateInfo(unittest.skip("make_traced() doesn't set seed properly!"), 'TestCommon', 'test_python_ref_executor'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_conj_view'),
)
),
PythonRefInfo(
"_refs.arange",
torch_opinfo_name="arange",
skips=(
# Tests that assume input is a tensor or sequence of tensors
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_conj_view'),
),
),
PythonRefInfo(
"_refs.linspace",
torch_opinfo_name="linspace",
skips=(
# Tests that assume input is a tensor or sequence of tensors
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_conj_view'),
# cpu implementation is wrong on some integral types
# https://github.com/pytorch/pytorch/issues/81996
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_torch_fallback',
dtypes=(torch.int8, torch.uint8, torch.int16, torch.int32, torch.int64), device_type="cpu"),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref',
dtypes=(torch.int8, torch.uint8, torch.int16, torch.int32, torch.int64), device_type="cpu"),
# cuda implementation is off-by-one on some inputs due to precision issues
# https://github.com/pytorch/pytorch/issues/82230
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_torch_fallback',
dtypes=(torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64),
device_type="cuda"),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref',
dtypes=(torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64),
device_type="cuda"),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_executor',
dtypes=(torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64),
device_type="cuda"),
),
),
PythonRefInfo(
"_refs.linspace",
torch_opinfo_name="linspace",
torch_opinfo_variant_name="tensor_overload",
skips=(
# TypeError: 'int' object is not subscriptable
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
# cpu implementation is wrong on some integral types
# https://github.com/pytorch/pytorch/issues/81996
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_torch_fallback',
dtypes=(torch.int8, torch.uint8, torch.int16, torch.int32, torch.int64), device_type="cpu"),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref',
dtypes=(torch.int8, torch.uint8, torch.int16, torch.int32, torch.int64), device_type="cpu"),
# cuda implementation is off-by-one on some inputs due to precision issues
# https://github.com/pytorch/pytorch/issues/82230
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_torch_fallback',
dtypes=(torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64),
device_type="cuda"),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref',
dtypes=(torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64),
device_type="cuda"),
# TODO torch.ops.aten.copy is not in _refs
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref',
dtypes=(torch.float32, torch.float64, torch.float16, torch.complex64, torch.complex128, torch.bfloat16),
device_type="cuda"),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref',
dtypes=(torch.float32, torch.float64, torch.float16, torch.complex64, torch.complex128, torch.bfloat16),
device_type="cpu"),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_executor',
dtypes=(torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64),
device_type="cuda"),
),
),
PythonRefInfo(
"_refs.logspace",
torch_opinfo_name="logspace",
skips=(
# Tests that assume input is a tensor or sequence of tensors
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_conj_view'),
# Off-by-one issue when casting floats to ints
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_torch_fallback',
dtypes=(torch.int16, torch.int32, torch.int64),
device_type="cuda"),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref',
dtypes=(torch.int16, torch.int32, torch.int64),
device_type="cuda"),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_executor',
dtypes=(torch.int16, torch.int32, torch.int64),
device_type="cuda"),
),
),
PythonRefInfo(
"_refs.logspace",
torch_opinfo_name="logspace",
torch_opinfo_variant_name="tensor_overload",
skips=(
# TypeError: 'int' object is not subscriptable
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.expectedFailure, 'TestMathBits', 'test_conj_view'),
# Off-by-one issue when casting floats to ints
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_torch_fallback',
dtypes=(torch.int16, torch.int32, torch.int64),
device_type="cuda"),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref',
dtypes=(torch.int16, torch.int32, torch.int64),
device_type="cuda"),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_executor',
dtypes=(torch.int16, torch.int32, torch.int64),
device_type="cuda"),
# TODO copy doesn't have prim refs
DecorateInfo(
unittest.expectedFailure, 'TestCommon', 'test_python_ref',
dtypes=(
torch.float32, torch.float64, torch.float16, torch.complex64,
torch.complex128, torch.bfloat16, torch.int8, torch.uint8
),
device_type="cuda"
),
DecorateInfo(
unittest.expectedFailure, 'TestCommon', 'test_python_ref',
dtypes=(
torch.float32, torch.float64, torch.float16,
torch.complex64, torch.complex128, torch.bfloat16,
torch.int16, torch.int32, torch.int64, torch.int8, torch.uint8
),
device_type="cpu"),
),
),
PythonRefInfo(
"_refs.meshgrid",
torch_opinfo_name="meshgrid",
torch_opinfo_variant_name="variadic_tensors",
),
PythonRefInfo(
"_refs.take_along_dim",
torch_opinfo_name="take_along_dim",
skips=(
DecorateInfo(unittest.expectedFailure,
'TestCommon',
'test_python_ref'),
),
),
PythonRefInfo(
"_refs.to",
torch_opinfo_name="to",
),
PythonRefInfo(
"_refs.triu",
torch_opinfo_name="triu",
),
PythonRefInfo(
"_refs.tril",
torch_opinfo_name="tril",
),
PythonRefInfo(
"_refs.triu_indices",
torch_opinfo_name="triu_indices",
# the implementation uses torch.stack that violates view consistency
validate_view_consistency=False,
skips=(
# skip these tests since we have non tensor input
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_noncontiguous_samples'),
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.skip('Skipped!'), 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.skip('Skipped!'), 'TestMathBits', 'test_neg_view'),
)),
PythonRefInfo(
"_refs.tril_indices",
torch_opinfo_name="tril_indices",
# the implementation uses torch.stack that violates view consistency
validate_view_consistency=False,
skips=(
# skip these tests since we have non tensor input
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_noncontiguous_samples'),
DecorateInfo(unittest.skip('Skipped!'), 'TestCommon', 'test_variant_consistency_eager'),
DecorateInfo(unittest.skip('Skipped!'), 'TestJit', 'test_variant_consistency_jit'),
DecorateInfo(unittest.skip('Skipped!'), 'TestMathBits', 'test_neg_view'),
)),
PythonRefInfo(
"_refs.meshgrid",
torch_opinfo_name="meshgrid",
torch_opinfo_variant_name="list_of_tensors",
),
PythonRefInfo(
"_refs.movedim",
aliases=('moveaxis',),
torch_opinfo_name="movedim",
),
PythonRefInfo(
"_refs.bucketize",
torch_opinfo_name="bucketize",
skips=(
# RuntimeError: It appears that you're trying to get value out of a tracing tensor with
# aten._local_scalar_dense.default - erroring out! [...]
# triggered by mid_val = boundaries[mid]
DecorateInfo(unittest.expectedFailure, "TestCommon", "test_python_ref_executor"),
)
),
PythonRefInfo(
"_refs.equal",
torch_opinfo_name="equal",
skips=(
# RuntimeError: Cannot cast FakeTensor to number
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_meta',),
)
),
ElementwiseUnaryPythonRefInfo(
"_refs.atan",
torch_opinfo_name="atan",
decorators=(precisionOverride({torch.bfloat16: 1e-2}),),
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_small',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
device_type='cuda', dtypes=[torch.cfloat, torch.cdouble],
active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
device_type='cuda', dtypes=[torch.cfloat, torch.cdouble],
active_if=IS_WINDOWS),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.atanh",
torch_opinfo_name="atanh",
decorators=(precisionOverride({torch.bfloat16: 1e-2}),),
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_small',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
device_type='cuda', dtypes=[torch.cfloat, torch.cdouble],
active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
device_type='cuda', dtypes=[torch.cfloat],
active_if=IS_WINDOWS),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.bitwise_not",
torch_opinfo_name="bitwise_not",
),
ElementwiseUnaryPythonRefInfo(
"_refs.ceil",
torch_opinfo_name="ceil",
# Fails on int32
# https://github.com/pytorch/pytorch/issues/85258
),
PythonRefInfo(
"_refs.item",
torch_opinfo_name="item",
skips=(
# RuntimeError: Cannot cast FakeTensor(FakeTensor(..., device='meta', size=()), cpu) to number
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_meta'),
# ValueError: Can't convert a tensor with 10 elements to a number!
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_errors'),),
),
ElementwiseUnaryPythonRefInfo(
"_refs.conj_physical",
torch_opinfo_name="conj_physical",
),
ElementwiseUnaryPythonRefInfo(
"_refs.cos",
torch_opinfo_name="cos",
decorators=(precisionOverride({torch.bfloat16: 1e-2}),),
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
dtypes=(torch.cfloat, torch.cdouble,), device_type='cpu',
active_if=IS_WINDOWS),
# This fails on CUDA but passes on ROCm
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
dtypes=(torch.cdouble,), device_type='cuda'),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
dtypes=[torch.cfloat, torch.cdouble], active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
device_type='cpu',
dtypes=[torch.cfloat, torch.cdouble], active_if=IS_MACOS),
# AssertionError: Tensor-likes are not close!
# Greatest absolute difference: nan at index (700,) (up to 1e-05 allowed)
# Greatest relative difference: nan at index (700,) (up to 0.001 allowed)
DecorateInfo(unittest.expectedFailure, 'TestUnaryUfuncs',
'test_reference_numerics_large',
device_type='cuda',
dtypes=(torch.chalf,), active_if=IS_WINDOWS),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.cosh",
torch_opinfo_name="cosh",
skips=(
# Reference: https://github.com/pytorch/pytorch/issues/48641
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
device_type='cpu', dtypes=[torch.int8]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
dtypes=[torch.cdouble]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
dtypes=[torch.cfloat, torch.cdouble], active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
dtypes=[torch.cfloat, torch.cdouble], active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
device_type='cpu',
dtypes=[torch.cfloat, torch.cdouble], active_if=IS_MACOS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
device_type='cpu',
dtypes=[torch.cfloat, torch.cdouble], active_if=IS_MACOS),
# AssertionError: Tensor-likes are not close!
# Greatest absolute difference: nan at index (6000,) (up to 1e-05 allowed)
# Greatest relative difference: nan at index (6000,) (up to 0.001 allowed)
DecorateInfo(unittest.expectedFailure, 'TestUnaryUfuncs',
'test_reference_numerics_large',
device_type='cuda',
dtypes=(torch.chalf,), active_if=IS_WINDOWS),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.digamma",
torch_opinfo_name="digamma",
),
ElementwiseUnaryPythonRefInfo(
"_refs.erf",
torch_opinfo_name="erf",
),
ElementwiseUnaryPythonRefInfo(
"_refs.erfinv",
torch_opinfo_name="erfinv",
decorators=(precisionOverride({torch.float16: 1e-2,
torch.bfloat16: 1e-2,
torch.float32: 1e-4}),),
skips=(
# Reference: https://github.com/pytorch/pytorch/pull/49155#issuecomment-742664611
DecorateInfo(
unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
active_if=TEST_SCIPY and version.parse(scipy.__version__) < version.parse("1.4.0")),
DecorateInfo(
unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
active_if=TEST_SCIPY and version.parse(scipy.__version__) < version.parse("1.4.0")),
DecorateInfo(
unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_small',
active_if=TEST_SCIPY and version.parse(scipy.__version__) < version.parse("1.4.0")),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.erfc",
torch_opinfo_name="erfc",
),
ElementwiseUnaryPythonRefInfo(
"_refs.exp",
torch_opinfo_name="exp",
skips=(
# Reference: https://github.com/pytorch/pytorch/issues/48010
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble], active_if=IS_WINDOWS),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.expm1",
torch_opinfo_name="expm1",
),
ElementwiseUnaryPythonRefInfo(
"_refs.exp2",
torch_opinfo_name="exp2",
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
dtypes=[torch.cdouble]),
# Reference: https://github.com/pytorch/pytorch/issues/48010
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble], active_if=IS_WINDOWS),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.fill",
torch_opinfo_name="fill",
supports_out=True,
),
ElementwiseUnaryPythonRefInfo(
"_refs.floor",
torch_opinfo_name="floor",
# Fails on int32
# https://github.com/pytorch/pytorch/issues/85258
),
ElementwiseUnaryPythonRefInfo(
"_refs.frexp",
torch_opinfo_name="frexp",
# Skipped due to numerical failures on Windows CI.
# This is also skipped in frexp earlier in the file.
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs', 'test_reference_numerics_extremal',
active_if=IS_WINDOWS),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.frac",
torch_opinfo_name="frac",
skips=(
DecorateInfo(
unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
dtypes=(torch.bfloat16, torch.float16, torch.float32, torch.float64)),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.imag",
torch_opinfo_name="imag",
),
ElementwiseUnaryPythonRefInfo(
"_refs.isfinite",
torch_opinfo_name="isfinite",
supports_out=True,
),
ElementwiseUnaryPythonRefInfo(
"_refs.isinf",
torch_opinfo_name="isinf",
supports_out=True,
),
ElementwiseUnaryPythonRefInfo(
"_refs.isposinf",
torch_opinfo_name="isposinf",
supports_out=True,
),
ElementwiseUnaryPythonRefInfo(
"_refs.isneginf",
torch_opinfo_name="isneginf",
supports_out=True,
),
ElementwiseUnaryPythonRefInfo(
"_refs.isnan",
torch_opinfo_name="isnan",
supports_out=True,
),
ElementwiseUnaryPythonRefInfo(
"_refs.isreal",
torch_opinfo_name="isreal",
supports_out=True,
),
ElementwiseUnaryPythonRefInfo(
"_refs.i0",
torch_opinfo_name="i0",
decorators=(precisionOverride({torch.bfloat16: 3e-1,
torch.float16: 5e-1}),),
skips=(
DecorateInfo(unittest.skip("Skipped!"),
'TestUnaryUfuncs',
'test_reference_numerics_large',
dtypes=(torch.int8,)),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.lgamma",
torch_opinfo_name="lgamma",
decorators=(precisionOverride({torch.float16: 7e-1}),),
skips=(
# Reference: https://github.com/pytorch/pytorch/pull/50140#issuecomment-756150214
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
dtypes=[torch.float32, torch.float64], active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
dtypes=[torch.float32, torch.float64], active_if=IS_WINDOWS),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.special.multigammaln",
torch_opinfo_name="mvlgamma",
torch_opinfo_variant_name="mvlgamma_p_1",
skips=skips_mvlgamma(),
decorators=(
DecorateInfo(torch.testing._internal.common_utils.markDynamoStrictTest, 'TestUnaryUfuncs',
'test_reference_numerics_large'),
DecorateInfo(torch.testing._internal.common_utils.xfailIfTorchDynamo, 'TestUnaryUfuncs',
'test_reference_numerics_large'),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.special.multigammaln",
torch_opinfo_name="mvlgamma",
torch_opinfo_variant_name="mvlgamma_p_3",
skips=skips_mvlgamma(),
),
ElementwiseUnaryPythonRefInfo(
"_refs.special.multigammaln",
torch_opinfo_name="mvlgamma",
torch_opinfo_variant_name="mvlgamma_p_5",
skips=skips_mvlgamma(),
),
ElementwiseUnaryPythonRefInfo(
"_refs.log",
torch_opinfo_name="log",
decorators=(precisionOverride({torch.bfloat16: 5e-2}),),
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble],
active_if=IS_WINDOWS),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.log1p",
torch_opinfo_name="log1p",
),
ElementwiseUnaryPythonRefInfo(
"_refs.log10",
torch_opinfo_name="log10",
decorators=(precisionOverride({torch.bfloat16: 5e-2}),),
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble],
active_if=IS_WINDOWS),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.log2",
torch_opinfo_name="log2",
decorators=(precisionOverride({torch.bfloat16: 1e-1}),),
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
dtypes=[torch.cfloat, torch.cdouble]),
),
),
PythonRefInfo(
"_refs.logsumexp",
torch_opinfo_name="logsumexp",
# When keepdim=False logsumexp function uses squeeze operation
# that is not yet exposed in nvFuser's Python API.
),
PythonRefInfo(
"_refs.log_softmax",
torch_opinfo_name="log_softmax",
torch_opinfo_variant_name="with_dtype",
),
ElementwiseUnaryPythonRefInfo(
"_refs.nan_to_num",
torch_opinfo_name="nan_to_num",
),
ElementwiseUnaryPythonRefInfo(
"_refs.neg",
torch_opinfo_name="neg",
),
ElementwiseUnaryPythonRefInfo(
"_refs.positive",
torch_opinfo_name="positive",
),
ElementwiseUnaryPythonRefInfo(
"_refs.real",
torch_opinfo_name="real",
),
ElementwiseUnaryPythonRefInfo(
"_refs.reciprocal",
torch_opinfo_name="reciprocal",
skips=(
# Reference: https://github.com/pytorch/pytorch/issues/45690
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
dtypes=[torch.cfloat, torch.cdouble]),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.round",
torch_opinfo_name="round",
# Fails on int32
# https://github.com/pytorch/pytorch/issues/85258
skips=(
DecorateInfo(toleranceOverride({torch.bfloat16: tol(atol=1e-3, rtol=0.016)}),
"TestUnaryUfuncs", "test_reference_numerics_extremal",
device_type="cuda"),
DecorateInfo(toleranceOverride({torch.bfloat16: tol(atol=1e-3, rtol=0.016)}),
"TestUnaryUfuncs", "test_reference_numerics_normal",
device_type="cuda"),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.rsqrt",
torch_opinfo_name="rsqrt",
decorators=(precisionOverride({torch.half: 5e-2}),),
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
dtypes=(torch.cfloat, torch.cdouble)),
# AssertionError: Tensor-likes are not close!
# Greatest absolute difference: nan at index (700,) (up to 0.01 allowed)
# Greatest relative difference: nan at index (700,) (up to 0.001 allowed)
DecorateInfo(unittest.expectedFailure, 'TestUnaryUfuncs',
'test_reference_numerics_large',
dtypes=(torch.chalf,)),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.sigmoid",
torch_opinfo_name="sigmoid",
aliases=('_refs.special.expit',),
# Reference: https://github.com/pytorch/pytorch/issues/56012
handles_complex_extremal_values=False,
handles_large_floats=False,
decorators=(precisionOverride({torch.float16: 1e-2,
torch.complex64: 1e-1,
torch.bfloat16: 1e-2}),),
skips=(
# Reference: https://github.com/pytorch/pytorch/issues/56012
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
dtypes=[torch.complex64, torch.cdouble], device_type='cuda'),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
dtypes=[torch.chalf, torch.complex64, torch.cdouble], device_type='cuda')
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.sign",
torch_opinfo_name="sign",
skips=(
# Reference: https://github.com/pytorch/pytorch/issues/41245
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
dtypes=[torch.bfloat16, torch.float16, torch.float32,
torch.float64]),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.sgn",
torch_opinfo_name="sgn",
# This is an issue with the vectorised abs on CPU
handles_complex_extremal_values=False,
handles_large_floats=False,
skips=(
# Reference: https://github.com/pytorch/pytorch/issues/41245
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
dtypes=[torch.bfloat16, torch.float16, torch.float32,
torch.float64]),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.signbit",
torch_opinfo_name="signbit",
),
ElementwiseUnaryPythonRefInfo(
"_refs.sin",
torch_opinfo_name="sin",
decorators=(precisionOverride({torch.bfloat16: 1e-2}),),
skips=(
# Fails on CUDA but passes on ROCm
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
dtypes=(torch.cdouble,), device_type='cuda'),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
dtypes=(torch.cfloat, torch.cdouble,), device_type='cpu',
active_if=IS_WINDOWS),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
dtypes=(torch.cfloat, torch.cdouble,), device_type='cpu',
active_if=IS_WINDOWS),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.sinc",
torch_opinfo_name="sinc",
decorators=(precisionOverride({torch.bfloat16: 1e-2,
torch.float16: 1e-2}),),
skips=(
# Reference: https://github.com/pytorch/pytorch/issues/49133
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_small',
dtypes=[torch.cfloat]),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.sinh",
torch_opinfo_name="sinh",
decorators=(precisionOverride({torch.float16: 1e-2}),),
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble],
active_if=(IS_MACOS or IS_WINDOWS)),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble],
active_if=(IS_MACOS or IS_WINDOWS)),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
dtypes=(torch.cdouble,)),
# Reference: https://github.com/pytorch/pytorch/issues/48641
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
device_type='cpu', dtypes=[torch.int8]),
),
),
PythonRefInfo(
"_refs.softmax",
torch_opinfo_name="softmax",
torch_opinfo_variant_name="with_dtype",
),
ElementwiseUnaryPythonRefInfo(
"_refs.sqrt",
torch_opinfo_name="sqrt",
decorators=(
precisionOverride({torch.bfloat16: 7e-2}),
DecorateInfo(
toleranceOverride({torch.chalf: tol(atol=1e-2, rtol=0)}),
'TestUnaryUfuncs', 'test_reference_numerics_large'),
),
skips=(
# Reference: https://github.com/pytorch/pytorch/issues/47358
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
device_type='cpu', dtypes=(torch.cfloat, torch.cdouble),
active_if=IS_MACOS),
# Reference: https://github.com/pytorch/pytorch/pull/47293#issuecomment-721774436
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
dtypes=(torch.bfloat16,)),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.square",
torch_opinfo_name="square",
decorators=(precisionOverride({torch.complex64: 3e-4, torch.bfloat16: 3e-1}),),
skips=(
# AssertionError: Reference result was farther (2.2417024338305655e-07) from the precise computation
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_python_ref_executor', dtypes=(torch.complex64,)),
# Reference: https://github.com/pytorch/pytorch/issues/52549
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
dtypes=[torch.cfloat, torch.cdouble]),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
device_type='cuda', dtypes=[torch.cfloat, torch.cdouble]),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.tan",
torch_opinfo_name="tan",
decorators=[
DecorateInfo(
toleranceOverride({torch.complex64: tol(atol=1e-04, rtol=1e-05)}),
'TestUnaryUfuncs', 'test_reference_numerics_extremal', device_type='cuda'),
],
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble],
active_if=(IS_MACOS or IS_WINDOWS)),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble],
active_if=(IS_MACOS or IS_WINDOWS)),
)
),
ElementwiseUnaryPythonRefInfo(
"_refs.tanh",
torch_opinfo_name="tanh",
decorators=[
DecorateInfo(
toleranceOverride({torch.complex64: tol(atol=1e-04, rtol=2e-05)}),
'TestUnaryUfuncs', 'test_reference_numerics_extremal', device_type='cuda'),
],
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_extremal',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble],
active_if=(IS_MACOS or IS_WINDOWS)),
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_large',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble],
active_if=(IS_MACOS or IS_WINDOWS)),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.trunc",
torch_opinfo_name="trunc",
# Fails on int32
# https://github.com/pytorch/pytorch/issues/85258
),
PythonRefInfo(
"_refs.special.log_softmax",
torch_opinfo_name="log_softmax", # alias
torch_opinfo_variant_name="with_dtype",
supports_out=False,
),
PythonRefInfo(
"_refs.special.softmax",
torch_opinfo_name="softmax", # alias
torch_opinfo_variant_name="with_dtype",
supports_out=False,
),
#
# Elementwise Unary Special OpInfos
#
ElementwiseUnaryPythonRefInfo(
"_refs.special.logit",
torch_opinfo_name="logit",
),
#
# Elementwise Unary nn.functional OpInfos
#
PythonRefInfo(
"_refs.nn.functional.alpha_dropout",
torch_opinfo_name="nn.functional.alpha_dropout",
decorators=(
DecorateInfo(unittest.skip("Expected: dropout is not comparable"),
'TestCommon',
'test_python_ref'),
# AssertionError: Tensor-likes are not close!
DecorateInfo(unittest.skip("Expected: dropout is not comparable"),
'TestCommon',
'test_python_ref_torch_fallback'),
DecorateInfo(unittest.skip("Expected: dropout is not comparable"),
'TestCommon',
'test_python_ref_executor', device_type='cuda'),
# AssertionError: Tensor-likes are not close!
DecorateInfo(unittest.skip("Expected: dropout is not comparable"),
'TestMathBits',
'test_neg_view'),
# AssertionError: Tensor-likes are not close!
DecorateInfo(unittest.skip("Expected: dropout is not comparable"),
'TestCommon',
'test_compare_cpu'),
)
),
ElementwiseUnaryPythonRefInfo(
"_refs.nn.functional.celu",
torch_opinfo_name="nn.functional.celu",
supports_out=True,
),
PythonRefInfo(
"_refs.nn.functional.channel_shuffle",
torch_opinfo_name="nn.functional.channel_shuffle",
supports_out=True,
),
ElementwiseUnaryPythonRefInfo(
"_refs.nn.functional.threshold",
torch_opinfo_name="nn.functional.threshold",
supports_out=True,
),
PythonRefInfo(
"_refs.nn.functional.dropout",
torch_opinfo_name="nn.functional.dropout",
decorators=(
DecorateInfo(unittest.skip("Expected: dropout is not comparable"),
'TestCommon',
'test_python_ref'),
DecorateInfo(unittest.skip("Expected: dropout is not comparable"),
'TestCommon',
'test_python_ref_torch_fallback'),
DecorateInfo(unittest.skip("Expected: dropout is not comparable"),
'TestCommon',
'test_out'),
DecorateInfo(unittest.skip("Expected: dropout is not comparable"),
'TestCommon',
'test_out_warning'),
DecorateInfo(unittest.skip("Expected: dropout is not comparable"),
'TestMathBits',
'test_conj_view'),
DecorateInfo(unittest.skip("Expected: dropout is not comparable"),
'TestMathBits',
'test_neg_conj_view'),
DecorateInfo(unittest.skip("Expected: dropout is not comparable"),
'TestMathBits',
'test_neg_view'),
# dropout is not comparable
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_executor'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
)
),
ElementwiseUnaryPythonRefInfo(
"_refs.nn.functional.elu",
torch_opinfo_name="nn.functional.elu",
supports_out=True,
decorators=[
DecorateInfo(
toleranceOverride({
torch.float16: tol(atol=1e-03, rtol=1.2e-03),
torch.bfloat16: tol(atol=1e-03, rtol=1.2e-03)
}),
'TestUnaryUfuncs', device_type='cuda',
), ],
),
ElementwiseUnaryPythonRefInfo(
"_refs.nn.functional.hardtanh",
torch_opinfo_name="nn.functional.hardtanh",
supports_out=True,
),
PythonRefInfo( # TODO: Port this to an UnaryOpInfo
"_refs.nn.functional.gelu",
torch_opinfo_name="nn.functional.gelu",
),
PythonRefInfo(
"_refs.nn.functional.layer_norm",
torch_opinfo_name="nn.functional.layer_norm",
skips=(
# Reference result was farther (3.5762786809723224e-07) from the precise computation
# than the torch result was (2.5068410824946596e-07)!
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_python_ref',
dtypes=(torch.float32,), device_type='cpu'),
),
),
PythonRefInfo(
"_refs.nn.functional.glu",
torch_opinfo_name="nn.functional.glu",
supports_out=True,
),
PythonRefInfo(
"_refs.nn.functional.pairwise_distance",
torch_opinfo_name="nn.functional.pairwise_distance",
supports_out=True,
),
PythonRefInfo(
"_refs.nn.functional.pdist",
torch_opinfo_name="nn.functional.pdist",
supports_out=True,
skips=(
# RunTimeError: no _refs support for torch.Tensor.index_select
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref'),
# Reference result was farther (1.946091651916504e-05) from the precise
# computation than the torch result was (1.1920928955078125e-06)!
DecorateInfo(
unittest.expectedFailure,
'TestCommon',
'test_python_ref_torch_fallback',
dtypes=(torch.float32,),
device_type='cpu',
),
)),
PythonRefInfo(
"_refs.nn.functional.leaky_relu",
torch_opinfo_name="nn.functional.leaky_relu",
supports_out=True,
),
PythonRefInfo(
"_refs.nn.functional.log_softmax",
torch_opinfo_name="log_softmax", # alias
torch_opinfo_variant_name="with_dtype",
supports_out=False,
),
PythonRefInfo(
"_refs.nn.functional.pixel_shuffle",
torch_opinfo_name="nn.functional.pixel_shuffle",
),
PythonRefInfo(
"_refs.nn.functional.pixel_unshuffle",
torch_opinfo_name="nn.functional.pixel_unshuffle",
),
PythonRefInfo(
"_refs.nn.functional.poisson_nll_loss",
torch_opinfo_name="nn.functional.poisson_nll_loss",
),
ElementwiseUnaryPythonRefInfo(
"_refs.nn.functional.prelu",
torch_opinfo_name="nn.functional.prelu",
),
ElementwiseUnaryPythonRefInfo(
"_refs.nn.functional.relu",
torch_opinfo_name="nn.functional.relu",
supports_out=True,
),
ElementwiseUnaryPythonRefInfo(
"_refs.nn.functional.relu6",
torch_opinfo_name="nn.functional.relu6",
supports_out=True,
),
ElementwiseUnaryPythonRefInfo(
"_refs.nn.functional.mish",
torch_opinfo_name="nn.functional.mish",
supports_out=True,
decorators=[
DecorateInfo(
toleranceOverride({torch.float16: tol(atol=1e-02, rtol=1e-03)}),
'TestUnaryUfuncs',), ],
),
ElementwiseUnaryPythonRefInfo(
"_refs.nn.functional.selu",
torch_opinfo_name="nn.functional.selu",
supports_out=True,
decorators=[
DecorateInfo(
toleranceOverride({
torch.float16: tol(atol=1e-2, rtol=1.8e-2),
torch.bfloat16: tol(atol=1e-2, rtol=1.8e-2)
}),
'TestUnaryUfuncs', device_type='cuda',
), ],
),
PythonRefInfo(
"_refs.nn.functional.softmax",
torch_opinfo_name="softmax", # alias
torch_opinfo_variant_name="with_dtype",
supports_out=False,
),
PythonRefInfo(
"_refs.nn.functional.softmin",
torch_opinfo_name="nn.functional.softmin",
torch_opinfo_variant_name="with_dtype",
supports_out=False,
),
ElementwiseUnaryPythonRefInfo(
"_refs.nn.functional.softplus",
torch_opinfo_name="nn.functional.softplus",
),
PythonRefInfo(
"_refs.nn.functional.l1_loss",
torch_opinfo_name="nn.functional.l1_loss",
),
PythonRefInfo(
"_refs.nn.functional.margin_ranking_loss",
torch_opinfo_name="nn.functional.margin_ranking_loss",
),
PythonRefInfo(
"_refs.nn.functional.mse_loss",
torch_opinfo_name="nn.functional.mse_loss",
),
PythonRefInfo(
"_refs.nn.functional.smooth_l1_loss",
torch_opinfo_name="nn.functional.smooth_l1_loss",
),
PythonRefInfo(
"_refs.nn.functional.hinge_embedding_loss",
torch_opinfo_name="nn.functional.hinge_embedding_loss"
),
PythonRefInfo(
"_refs.nn.functional.nll_loss",
torch_opinfo_name="nn.functional.nll_loss",
# The corresponding PyTorch op doesn't support out. But the ref is
# registered as a decomp and ATen has an out variant.
supports_out=True,
# For simpler indexing, we flatten target indices, then reshape the result tensor.
# This creates inconsistent view state with reference impl.
validate_view_consistency=False,
skips=(
# RuntimeError: It appears that you're trying to get value out of a tracing tensor - erroring out!
DecorateInfo(
unittest.expectedFailure, 'TestCommon', 'test_python_ref_executor', device_type="cuda"
),
),
),
PythonRefInfo(
"_refs.nn.functional.huber_loss",
torch_opinfo_name="nn.functional.huber_loss",
# The corresponding PyTorch op doesn't support out. But the ref is
# registered as a decomp and ATen has an out variant.
supports_out=True,
),
ElementwiseUnaryPythonRefInfo(
"_refs.nn.functional.tanhshrink",
torch_opinfo_name="nn.functional.tanhshrink",
decorators=[
DecorateInfo(unittest.skip("Skipped!"), 'TestUnaryUfuncs',
'test_reference_numerics_normal',
device_type='cpu', dtypes=[torch.cfloat, torch.cdouble]),
DecorateInfo(
toleranceOverride({torch.bfloat16: tol(atol=1e-02, rtol=1.6e-02),
torch.complex64: tol(atol=6e-04, rtol=1e-05)}),
'TestUnaryUfuncs', 'test_reference_numerics_extremal', device_type='cuda'),
],
skips=(
# in each case, pytorch will produce a nan while numpy will not
DecorateInfo(unittest.skip("Fails on some jobs works on others!"),
'TestUnaryUfuncs', "test_reference_numerics_large",
dtypes=(torch.complex64, torch.complex128),
active_if=(IS_MACOS)),
DecorateInfo(unittest.skip("Fails on some jobs works on others!"),
'TestUnaryUfuncs', "test_reference_numerics_extremal",
dtypes=(torch.complex64, torch.complex128),
device_type='cpu',
active_if=(IS_MACOS or IS_WINDOWS)),
),
),
ElementwiseUnaryPythonRefInfo(
"_refs.nn.functional.hardshrink",
torch_opinfo_name="nn.functional.hardshrink",
),
ElementwiseUnaryPythonRefInfo(
"_refs.nn.functional.softshrink",
torch_opinfo_name="nn.functional.softshrink",
),
#
# Elementwise Binary Reference OpInfos
#
ElementwiseBinaryPythonRefInfo(
"_refs.add",
torch_opinfo_name="add",
# https://github.com/pytorch/pytorch/issues/76944
supports_two_python_scalars=True,
supports_one_python_scalar=True,
decorators=(
DecorateInfo(
toleranceOverride({torch.chalf: tol(atol=1e-2, rtol=0)}),
'TestBinaryUfuncs', 'test_reference_numerics'),
),
skips=(
DecorateInfo(unittest.skip("Skipped!"),
'TestBinaryUfuncs',
'test_reference_numerics_extremal_values',
dtypes=(torch.complex64, torch.complex128)),
),
),
ElementwiseBinaryPythonRefInfo(
"_refs.atan2",
torch_opinfo_name="atan2",
),
ElementwiseBinaryPythonRefInfo(
"_refs.bitwise_and",
torch_opinfo_name="bitwise_and",
),
ElementwiseBinaryPythonRefInfo(
"_refs.bitwise_left_shift",
torch_opinfo_name="bitwise_left_shift",
skips=(
# https://github.com/pytorch/pytorch/issues/70904
DecorateInfo(unittest.skip("Some inputs produce undefined outputs"), 'TestCommon', 'test_compare_cpu'),
),
),
ElementwiseBinaryPythonRefInfo(
"_refs.bitwise_right_shift",
torch_opinfo_name="bitwise_right_shift",
skips=(
# # https://github.com/pytorch/pytorch/issues/70904
DecorateInfo(unittest.skip("Skipped some inputs produce undefined outputs"), 'TestCommon', 'test_compare_cpu'),
),
),
ElementwiseBinaryPythonRefInfo(
"_refs.bitwise_or",
torch_opinfo_name="bitwise_or",
),
ElementwiseBinaryPythonRefInfo(
"_refs.bitwise_xor",
torch_opinfo_name="bitwise_xor",
),
ElementwiseBinaryPythonRefInfo(
"_refs.copysign",
torch_opinfo_name="copysign",
skips=(
# RuntimeError: Expected divisor (b) to be on the same device (cuda:0) as dividend (a), but it is found on cpu!
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs', 'test_type_promotion'),
# FIXME output 0: meta disagrees with real impl
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_binary_ufuncs_mixed_dtype'),
)
),
ElementwiseBinaryPythonRefInfo(
"_refs.div",
torch_opinfo_name="div",
torch_opinfo_variant_name="no_rounding_mode",
# https://github.com/pytorch/pytorch/issues/76944
supports_two_python_scalars=True,
supports_one_python_scalar=True,
skips=(
# NotImplementedError: argument of type: <class 'complex'>
DecorateInfo(
unittest.skip("Skipped!"), 'TestCommon', 'test_python_ref_executor',
dtypes=(torch.complex32, torch.complex64, torch.complex128,)
),
# Reference result was farther (0.7433461727239705) from the precise
# computation than the torch result was (nan)!
DecorateInfo(
unittest.expectedFailure, 'TestCommon', 'test_python_ref',
dtypes=(torch.complex32,), device_type="cuda"
),
# Reference result was farther (0.7433461727239705) from the precise
# computation than the torch result was (nan)!
DecorateInfo(
unittest.expectedFailure, 'TestCommon', 'test_python_ref_torch_fallback',
dtypes=(torch.complex32,), device_type="cuda"
),
),
),
ElementwiseBinaryPythonRefInfo(
"_refs.div",
torch_opinfo_name="div",
torch_opinfo_variant_name="trunc_rounding",
# https://github.com/pytorch/pytorch/issues/76944
supports_two_python_scalars=True,
supports_one_python_scalar=True,
decorators=(
# See https://github.com/pytorch/pytorch/issues/111126
DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', 'test_type_promotion'),
),
),
ElementwiseBinaryPythonRefInfo(
"_refs.div",
torch_opinfo_name="div",
torch_opinfo_variant_name="floor_rounding",
# https://github.com/pytorch/pytorch/issues/76944
supports_two_python_scalars=True,
supports_one_python_scalar=True,
decorators=(
# See https://github.com/pytorch/pytorch/issues/111126
DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', 'test_type_promotion'),
# Reference result was farther (nan) from the precise computation than the
# torch result was (inf)!
DecorateInfo(
unittest.expectedFailure,
"TestCommon",
"test_python_ref",
dtypes=(torch.bfloat16,),
device_type="cpu",
active_if=not IS_S390X,
),
),
),
ElementwiseBinaryPythonRefInfo(
"_refs.eq",
torch_opinfo_name="eq",
),
ElementwiseBinaryPythonRefInfo(
"_refs.float_power",
torch_opinfo_name="float_power",
skips=(
# Test doesn't account for float -> double type promotion
DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', 'test_type_promotion'),
# Complex values error with: Greatest absolute difference: nan at index
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs',
'test_reference_numerics_small_values',
dtypes=[torch.complex64, torch.complex128]),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs',
'test_reference_numerics_large_values',
dtypes=[torch.complex64, torch.complex128]),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs',
'test_reference_numerics_extremal_values',
dtypes=[torch.complex64, torch.complex128]),
),
),
ElementwiseBinaryPythonRefInfo(
"_refs.logaddexp",
torch_opinfo_name="logaddexp",
skips=(
# failure due to mismatch in edge cases, which boils down to what torch.exp(inf + infj) should be
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref', device_type='cpu',
dtypes=(torch.complex64, torch.complex128)),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_torch_fallback', device_type='cpu',
dtypes=(torch.complex64, torch.complex128)),
),
),
PythonRefInfo(
"_refs.logaddexp2",
torch_opinfo_name="logaddexp2",
),
ElementwiseBinaryPythonRefInfo(
"_refs.floor_divide",
torch_opinfo_name="floor_divide",
rhs_make_tensor_kwargs=dict(exclude_zero=True),
# https://github.com/pytorch/pytorch/issues/76944
supports_two_python_scalars=True,
supports_one_python_scalar=True,
# bfloat16 floor_divide compared with a float32 reference works inconsistently
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_python_ref',
dtypes=(torch.bfloat16,)),
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_python_ref_torch_fallback',
dtypes=(torch.bfloat16,)),
# bfloat16 floor_divide compared with a float32 reference works inconsistently
DecorateInfo(unittest.skip('Skipped!'), 'TestBinaryUfuncs',
dtypes=(torch.bfloat16,)),
# int8 floor divide has different results for -128 // -1 vs. NumPy
DecorateInfo(unittest.skip('Skipped!'), 'TestBinaryUfuncs',
'test_reference_numerics_small_values',
dtypes=(torch.int8,)),
# The following tests fails on some jobs
DecorateInfo(unittest.skip('Skipped!'), 'TestBinaryUfuncs',
'test_reference_numerics_extremal_values',
dtypes=(torch.float16,)),
DecorateInfo(toleranceOverride({torch.float16: tol(atol=1e-3, rtol=5e-3)}),
'TestBinaryUfuncs', 'test_reference_numerics'),
# FIXME output 0: meta disagrees with real impl
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_binary_ufuncs_mixed_dtype'),
),
),
ElementwiseBinaryPythonRefInfo(
"_refs.fmax",
torch_opinfo_name="fmax",
supports_rhs_python_scalar=False,
),
ElementwiseBinaryPythonRefInfo(
"_refs.fmin",
torch_opinfo_name="fmin",
supports_rhs_python_scalar=False,
),
ElementwiseBinaryPythonRefInfo(
"_refs.fmod",
torch_opinfo_name="fmod",
rhs_make_tensor_kwargs={'exclude_zero': True},
supports_rhs_python_scalar=True,
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_python_ref',
dtypes=(torch.bfloat16,), device_type='cpu'),
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_python_ref_torch_fallback',
dtypes=(torch.bfloat16,), device_type='cpu'),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs',
'test_contig_vs_every_other',
dtypes=(torch.bfloat16,)),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs',
'test_non_contig',
dtypes=(torch.bfloat16,)),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs',
'test_reference_numerics',
dtypes=(torch.bfloat16,)),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs',
'test_reference_numerics_small_values',
dtypes=(torch.uint8,)),
),
),
ElementwiseBinaryPythonRefInfo(
"_refs.gcd",
torch_opinfo_name="gcd",
skips=(
DecorateInfo(unittest.expectedFailure,
'TestBinaryUfuncs',
'test_reference_numerics_small_values',
dtypes=(torch.int8,)),
),
),
ElementwiseBinaryPythonRefInfo(
"_refs.ge",
torch_opinfo_name="ge",
),
ElementwiseBinaryPythonRefInfo(
"_refs.gt",
torch_opinfo_name="gt",
),
ElementwiseBinaryPythonRefInfo(
"_refs.heaviside",
torch_opinfo_name="heaviside",
supports_rhs_python_scalar=False,
skips=(
# PyTorch's heaviside does not appear to propagate NaNs
DecorateInfo(unittest.skip("Skipped!"),
'TestBinaryUfuncs',
'test_reference_numerics_extremal_values'),
),
),
ElementwiseBinaryPythonRefInfo(
"_refs.hypot",
torch_opinfo_name="hypot",
supports_rhs_python_scalar=False,
),
ElementwiseBinaryPythonRefInfo(
"_refs.igamma",
torch_opinfo_name="igamma",
),
ElementwiseBinaryPythonRefInfo(
"_refs.igammac",
torch_opinfo_name="igammac",
),
ElementwiseBinaryPythonRefInfo(
"_refs.isclose",
torch_opinfo_name="isclose",
skips=(
# Intentional xfail -- isclose does not type promote
DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', 'test_type_promotion'),
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_binary_ufuncs_mixed_dtype'),
DecorateInfo(unittest.skip("Skipped!"),
'TestBinaryUfuncs',
'test_reference_numerics_extremal_values'),
),
),
ElementwiseBinaryPythonRefInfo(
"_refs.lcm",
torch_opinfo_name="lcm",
),
ElementwiseBinaryPythonRefInfo(
"_refs.le",
torch_opinfo_name="le",
),
ElementwiseBinaryPythonRefInfo(
"_refs.logical_and",
torch_opinfo_name="logical_and",
),
ElementwiseUnaryPythonRefInfo(
"_refs.logical_not",
torch_opinfo_name="logical_not",
),
ElementwiseBinaryPythonRefInfo(
"_refs.logical_or",
torch_opinfo_name="logical_or",
),
ElementwiseBinaryPythonRefInfo(
"_refs.logical_xor",
torch_opinfo_name="logical_xor",
),
ElementwiseBinaryPythonRefInfo(
"_refs.lt",
torch_opinfo_name="lt",
),
ElementwiseBinaryPythonRefInfo(
"_refs.maximum",
torch_opinfo_name="maximum",
skips=(
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_errors'),
),
),
ElementwiseBinaryPythonRefInfo(
"_refs.minimum",
torch_opinfo_name="minimum",
skips=(
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_errors'),
),
),
ElementwiseBinaryPythonRefInfo(
"_refs.mul",
torch_opinfo_name="mul",
# https://github.com/pytorch/pytorch/issues/76944
supports_two_python_scalars=True,
supports_one_python_scalar=True,
skips=(
# Reference result was farther (0.0) from the precise computation
# than the torch result was (nan)!
DecorateInfo(
unittest.expectedFailure, 'TestCommon', 'test_python_ref_executor',
dtypes=(torch.complex32,),
),
# Reference result was farther (0.0) from the precise computation
# than the torch result was (nan)!
DecorateInfo(
unittest.expectedFailure, 'TestCommon', 'test_python_ref',
dtypes=(torch.complex32,), device_type='cuda'
),
# Reference result was farther (0.0) from the precise computation
# than the torch result was (nan)!
DecorateInfo(
unittest.expectedFailure, 'TestCommon', 'test_python_ref_torch_fallback',
dtypes=(torch.complex32,), device_type='cuda'
),
)
),
ElementwiseBinaryPythonRefInfo(
"_refs.ne",
torch_opinfo_name="ne",
),
ElementwiseBinaryPythonRefInfo(
"_refs.nextafter",
torch_opinfo_name="nextafter",
),
ElementwiseBinaryPythonRefInfo(
"_refs.pow",
torch_opinfo_name="pow",
decorators=(
DecorateInfo(
toleranceOverride({torch.complex64: tol(atol=1e-4, rtol=1.3e-05)}),
'TestBinaryUfuncs', 'test_reference_numerics'),
DecorateInfo(
toleranceOverride({torch.complex64: tol(atol=1e-4, rtol=1.3e-05),
torch.complex128: tol(atol=1e-4, rtol=1.3e-05)}),
'TestBinaryUfuncs', 'test_scalar_support'),
),
skips=(
# Reference result was farther (inf) from the precise
# computation than the torch result was (nan)!
DecorateInfo(
unittest.expectedFailure, 'TestCommon', 'test_python_ref_executor',
dtypes=(torch.complex32,),
),
# Reference result was farther (inf) from the precise
# computation than the torch result was (nan)!
DecorateInfo(
unittest.expectedFailure, 'TestCommon', 'test_python_ref',
dtypes=(torch.complex32,), device_type="cuda"
),
# Reference result was farther (inf) from the precise
# computation than the torch result was (nan)!
DecorateInfo(
unittest.expectedFailure, 'TestCommon', 'test_python_ref_torch_fallback',
dtypes=(torch.complex32,), device_type="cuda"
),
# Skipping integers because they are being raised to negative powers causing an error
DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs',
'test_reference_numerics_small_values',
dtypes=[torch.int8, torch.int16, torch.int32, torch.int64]),
DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs',
'test_reference_numerics_large_values',
dtypes=[torch.int16, torch.int32, torch.int64]),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs',
'test_reference_numerics',
dtypes=(torch.complex32,)),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs',
'test_reference_numerics_small_values',
dtypes=(torch.complex32, torch.complex64, torch.complex128)),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs',
'test_reference_numerics_large_values',
dtypes=(torch.complex32, torch.complex64, torch.complex128)),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs',
'test_reference_numerics_extremal_values',
dtypes=(torch.complex32, torch.complex64, torch.complex128)),
),
),
ElementwiseBinaryPythonRefInfo(
"_refs.remainder",
torch_opinfo_name="remainder",
skips=(
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_python_ref',
dtypes=(torch.bfloat16,), device_type='cpu'),
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_python_ref_torch_fallback',
dtypes=(torch.bfloat16,), device_type='cpu'),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs',
'test_reference_numerics',
dtypes=(torch.bfloat16,)),
DecorateInfo(unittest.skip("Skipped!"), 'TestBinaryUfuncs',
'test_reference_numerics_small_values',
dtypes=(torch.uint8,)),
),
),
ElementwiseBinaryPythonRefInfo(
"_refs.rsub",
torch_opinfo_name="rsub",
# https://github.com/pytorch/pytorch/issues/76944
skips=(
# Reference result was farther (nan) from the precise computation than
# the torch result was (nan)!
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref',
dtypes=(torch.chalf,), device_type='cpu'),
# Reference result was farther (nan) from the precise computation than
# the torch result was (nan)!
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_torch_fallback',
dtypes=(torch.chalf,), device_type='cpu'),
),
),
ElementwiseBinaryPythonRefInfo(
"_refs.sub",
torch_opinfo_name="sub",
# https://github.com/pytorch/pytorch/issues/76944
supports_two_python_scalars=True,
supports_one_python_scalar=True,
decorators=(
DecorateInfo(
toleranceOverride({torch.float16: tol(atol=1e-2, rtol=0),
torch.bfloat16: tol(atol=1e-5, rtol=5e-3),
torch.complex32: tol(atol=1e-5, rtol=1e-3)}),
'TestBinaryUfuncs', 'test_reference_numerics'),
DecorateInfo(
toleranceOverride({torch.chalf: tol(atol=1e-2, rtol=0)}),
'TestCommon', 'test_complex_half_reference_testing', device_type='cpu'),
DecorateInfo(
toleranceOverride({torch.chalf: tol(atol=5e-3, rtol=0)}),
'TestDecomp', 'test_comprehensive', device_type='cpu'),
DecorateInfo(
toleranceOverride({torch.chalf: tol(atol=5e-3, rtol=0)}),
'TestDecomp', 'test_quick', device_type='cpu'),
),
skips=(
DecorateInfo(unittest.skip("Skipped!"),
'TestBinaryUfuncs',
'test_reference_numerics',
dtypes=(torch.uint8,)),
DecorateInfo(unittest.skip("Skipped!"),
'TestBinaryUfuncs',
'test_reference_numerics_small_values',
dtypes=(torch.uint8,)),
),
),
ElementwiseBinaryPythonRefInfo(
"_refs.true_divide",
torch_opinfo_name="true_divide",
# https://github.com/pytorch/pytorch/issues/76944
supports_two_python_scalars=True,
supports_one_python_scalar=True,
skips=(
# Reference result was farther (0.7433461727239705) from the precise
# computation than the torch result was (nan)!
DecorateInfo(
unittest.expectedFailure, 'TestCommon', 'test_python_ref_executor',
dtypes=(torch.complex32,),
),
# Reference result was farther (0.7433461727239705) from the precise
# computation than the torch result was (nan)!
DecorateInfo(
unittest.expectedFailure, 'TestCommon', 'test_python_ref',
dtypes=(torch.complex32,), device_type="cuda"
),
# Reference result was farther (0.7433461727239705) from the precise
# computation than the torch result was (nan)!
DecorateInfo(
unittest.expectedFailure, 'TestCommon', 'test_python_ref_torch_fallback',
dtypes=(torch.complex32,), device_type="cuda"
),
),
),
#
# Elementwise Ternary Reference OpInfos
#
PythonRefInfo(
"_refs.addcdiv",
torch_opinfo_name="addcdiv",
),
PythonRefInfo(
"_refs.addcmul",
torch_opinfo_name="addcmul",
skips=(
# Reference result was farther (1.3343989849090576e-05)
# from the precise computation than the torch result
# was (9.592622518539429e-06)!
# FIXME: enable dtype-based tolerances in test_ops.py:TestCommon._ref_test_helper
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_python_ref',
dtypes=(torch.float16,), device_type="cpu"),
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_python_ref_torch_fallback',
dtypes=(torch.float16,), device_type="cpu"),
),
),
ElementwiseBinaryPythonRefInfo(
"_refs.clamp_min",
torch_opinfo_name="clamp_min",
skips=(
# test error disabled since rhs non-tensor python scalar is supported
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_errors'),
),
),
ElementwiseBinaryPythonRefInfo(
"_refs.clamp_max",
torch_opinfo_name="clamp_max",
skips=(
# test error disabled since rhs non-tensor python scalar is supported
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_errors'),
),
),
PythonRefInfo(
"_refs.clamp",
torch_opinfo_name="clamp",
),
PythonRefInfo(
"_refs.nn.functional.triplet_margin_loss",
torch_opinfo_name="nn.functional.triplet_margin_loss",
supports_out=False,
# TODO: Uses minimum and clamp
skips=(
# AssertionError: Tensor-likes are not close!
# Greatest absolute difference: 6.103515625e-05 at index (4,) (up to 1e-05 allowed)
# Greatest relative difference: 8.519846983548175e-06 at index (4,) (up to 1.3e-06 allowed)
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_python_ref',
dtypes=(torch.uint8,), device_type="cpu"),
)
),
ElementwiseBinaryPythonRefInfo(
"_refs.xlogy",
torch_opinfo_name="xlogy",
supports_one_python_scalar=True,
),
#
# Elementwise Binary Special OpInfos
#
ElementwiseBinaryPythonRefInfo(
"_refs.special.xlog1py",
torch_opinfo_name="special.xlog1py",
supports_one_python_scalar=True,
),
#
# Data Conversion & Data Movement Opinfos
#
ElementwiseUnaryPythonRefInfo(
"_refs._conversions.bfloat16",
torch_opinfo_name="bfloat16",
# TODO: If self already has the correct dtype and device, then self is
# returned ignoring memory_format.
# https://github.com/pytorch/pytorch/issues/86558
validate_view_consistency=False,
),
ElementwiseUnaryPythonRefInfo(
"_refs._conversions.bool",
torch_opinfo_name="bool",
# TODO: If self already has the correct dtype and device, then self is
# returned ignoring memory_format.
# https://github.com/pytorch/pytorch/issues/86558
validate_view_consistency=False,
),
ElementwiseUnaryPythonRefInfo(
"_refs._conversions.byte",
torch_opinfo_name="byte",
# TODO: If self already has the correct dtype and device, then self is
# returned ignoring memory_format.
# https://github.com/pytorch/pytorch/issues/86558
validate_view_consistency=False,
skips=(
DecorateInfo(unittest.skip('Overflow when downcasting signed type is undefined'), 'TestCommon', 'test_compare_cpu'),
)
),
ElementwiseUnaryPythonRefInfo(
"_refs._conversions.char",
torch_opinfo_name="char",
# TODO: If self already has the correct dtype and device, then self is
# returned ignoring memory_format.
# https://github.com/pytorch/pytorch/issues/86558
validate_view_consistency=False,
skips=(
DecorateInfo(unittest.skip('Overflow when downcasting signed type is undefined'), 'TestCommon', 'test_compare_cpu'),
)
),
ElementwiseBinaryPythonRefInfo(
"_refs._conversions.complex",
torch_opinfo_name="complex",
error_inputs_func=partial(error_inputs_complex, is_ref=True),
skips=(
# Tests don't account for complex's type promotion semantics
DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', 'test_type_promotion'),
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_binary_ufuncs_mixed_dtype'),
)
),
ElementwiseBinaryPythonRefInfo(
"_refs._conversions.polar",
torch_opinfo_name="polar",
skips=(
# Tests don't account for complex's type promotion semantics
DecorateInfo(unittest.expectedFailure, 'TestBinaryUfuncs', 'test_type_promotion'),
DecorateInfo(unittest.expectedFailure, 'TestMeta', 'test_binary_ufuncs_mixed_dtype'),
)
),
ElementwiseUnaryPythonRefInfo(
"_refs._conversions.double",
torch_opinfo_name="double",
# TODO: If self already has the correct dtype and device, then self is
# returned ignoring memory_format.
# https://github.com/pytorch/pytorch/issues/86558
validate_view_consistency=False,
),
ElementwiseUnaryPythonRefInfo(
"_refs._conversions.float",
torch_opinfo_name="float",
# TODO: If self already has the correct dtype and device, then self is
# returned ignoring memory_format.
# https://github.com/pytorch/pytorch/issues/86558
validate_view_consistency=False,
),
ElementwiseUnaryPythonRefInfo(
"_refs._conversions.half",
torch_opinfo_name="half",
# TODO: If self already has the correct dtype and device, then self is
# returned ignoring memory_format.
# https://github.com/pytorch/pytorch/issues/86558
validate_view_consistency=False,
),
ElementwiseUnaryPythonRefInfo(
"_refs._conversions.int",
torch_opinfo_name="int",
# TODO: If self already has the correct dtype and device, then self is
# returned ignoring memory_format.
# https://github.com/pytorch/pytorch/issues/86558
validate_view_consistency=False,
skips=(
DecorateInfo(unittest.skip('Overflow when downcasting signed type is undefined'), 'TestCommon', 'test_compare_cpu'),
)
),
ElementwiseUnaryPythonRefInfo(
"_refs._conversions.long",
torch_opinfo_name="long",
# TODO: If self already has the correct dtype and device, then self is
# returned ignoring memory_format.
# https://github.com/pytorch/pytorch/issues/86558
validate_view_consistency=False,
skips=(
DecorateInfo(unittest.skip('Overflow when downcasting signed type is undefined'), 'TestCommon', 'test_compare_cpu'),
)
),
ElementwiseUnaryPythonRefInfo(
"_refs._conversions.short",
torch_opinfo_name="short",
# TODO: If self already has the correct dtype and device, then self is
# returned ignoring memory_format.
# https://github.com/pytorch/pytorch/issues/86558
validate_view_consistency=False,
skips=(
DecorateInfo(unittest.skip('Overflow when downcasting signed type is undefined'), 'TestCommon', 'test_compare_cpu'),
)
),
ElementwiseUnaryPythonRefInfo(
"_refs._conversions.chalf",
torch_opinfo_name="chalf",
# TODO: If self already has the correct dtype and device, then self is
# returned ignoring memory_format.
# https://github.com/pytorch/pytorch/issues/86558
validate_view_consistency=False,
),
ElementwiseUnaryPythonRefInfo(
"_refs._conversions.cfloat",
torch_opinfo_name="cfloat",
# TODO: If self already has the correct dtype and device, then self is
# returned ignoring memory_format.
# https://github.com/pytorch/pytorch/issues/86558
validate_view_consistency=False,
),
ElementwiseUnaryPythonRefInfo(
"_refs._conversions.cdouble",
torch_opinfo_name="cdouble",
# TODO: If self already has the correct dtype and device, then self is
# returned ignoring memory_format.
# https://github.com/pytorch/pytorch/issues/86558
validate_view_consistency=False,
),
PythonRefInfo(
"_refs.clone",
torch_opinfo_name="clone",
),
#
# View & Shape OpInfos
#
PythonRefInfo(
"_refs.alias_copy",
torch_opinfo_name="alias_copy",
supports_out=True,
),
PythonRefInfo(
"_refs.atleast_1d",
torch_opinfo_name="atleast_1d",
validate_view_consistency=False,
),
PythonRefInfo(
"_refs.atleast_2d",
torch_opinfo_name="atleast_2d",
validate_view_consistency=False,
),
PythonRefInfo(
"_refs.atleast_3d",
torch_opinfo_name="atleast_3d",
validate_view_consistency=False,
),
PythonRefInfo(
"_refs.as_strided",
torch_opinfo_name="as_strided",
# FIXME: doesn't support chalf
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
skips=(
# cloned_mutable_input.is_same(returned_output) INTERNAL ASSERT FAILED
DecorateInfo(unittest.skip("Errors when storage_offset is included"), 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.skip("Errors when storage_offset is included"), 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.skip("Errors when storage_offset is included"), 'TestMathBits', 'test_neg_conj_view'),
),
),
PythonRefInfo(
"_refs.as_strided_copy",
torch_opinfo_name="as_strided_copy",
supports_out=True,
# FIXME: doesn't support chalf
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
skips=(
# cloned_mutable_input.is_same(returned_output) INTERNAL ASSERT FAILED
DecorateInfo(unittest.skip("Errors when storage_offset is included"), 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.skip("Errors when storage_offset is included"), 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.skip("Errors when storage_offset is included"), 'TestMathBits', 'test_neg_conj_view'),
# The view function this decompose into does not have a ref
DecorateInfo(unittest.expectedFailure, "TestCommon", "test_python_ref"),
),
),
PythonRefInfo(
"_refs.as_strided",
torch_opinfo_name="as_strided",
torch_opinfo_variant_name="partial_views",
# FIXME: doesn't support chalf
dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
skips=(
# cloned_mutable_input.is_same(returned_output) INTERNAL ASSERT FAILED
DecorateInfo(unittest.skip("Errors when storage_offset is included"), 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.skip("Errors when storage_offset is included"), 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.skip("Errors when storage_offset is included"), 'TestMathBits', 'test_neg_conj_view'),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_compare_cpu'),
),
),
PythonRefInfo(
"_refs.as_strided_scatter",
torch_opinfo_name="as_strided_scatter",
# returns a view of an intermediate tensor (as_strided)
validate_view_consistency=False,
),
PythonRefInfo(
"_refs.block_diag",
torch_opinfo_name="block_diag",
),
PythonRefInfo(
"_refs.broadcast_shapes",
torch_opinfo_name="broadcast_shapes",
),
PythonRefInfo(
"_refs.broadcast_tensors",
torch_opinfo_name="broadcast_tensors",
),
PythonRefInfo(
"_refs.broadcast_to",
torch_opinfo_name="broadcast_to",
),
PythonRefInfo(
"_refs.cat",
torch_opinfo_name="cat",
skips=(
# FIXME: AssertionError: RuntimeError not raised
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_errors'),
),
),
PythonRefInfo(
"_refs.chunk",
torch_opinfo_name="chunk",
),
PythonRefInfo(
"_refs.column_stack",
torch_opinfo_name="column_stack",
),
ElementwiseUnaryPythonRefInfo(
"_refs.conj",
torch_opinfo_name="conj",
),
PythonRefInfo(
"_refs.constant_pad_nd",
torch_opinfo_name="constant_pad_nd",
),
PythonRefInfo(
"_refs.contiguous",
torch_opinfo_name="contiguous",
),
ElementwiseUnaryPythonRefInfo(
"_refs.deg2rad",
torch_opinfo_name="deg2rad",
decorators=(precisionOverride({torch.bfloat16: 7e-1,
torch.float16: 7e-1}),),
),
PythonRefInfo(
"_refs.dsplit",
torch_opinfo_name="dsplit",
),
PythonRefInfo(
"_refs.diag",
torch_opinfo_name="diag",
),
PythonRefInfo(
"_refs.diagonal",
torch_opinfo_name="diagonal",
),
PythonRefInfo(
"_refs.diagonal_copy",
torch_opinfo_name="diagonal_copy",
supports_out=True,
),
PythonRefInfo(
"_refs.diagonal_scatter",
torch_opinfo_name="diagonal_scatter",
supports_out=True,
# returns a view of an intermediate tensor (as_strided)
validate_view_consistency=False,
),
PythonRefInfo(
"_refs.diag_embed",
torch_opinfo_name="diag_embed",
supports_out=True,
),
PythonRefInfo(
"_refs.dstack",
torch_opinfo_name="dstack",
skips=(
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_errors'),
),
),
PythonRefInfo(
"_refs.expand",
torch_opinfo_name="expand",
),
PythonRefInfo(
"_refs.expand_as",
torch_opinfo_name="expand_as",
),
PythonRefInfo(
"_refs.expand_copy",
torch_opinfo_name="expand_copy",
supports_out=True,
),
PythonRefInfo(
"_refs.flatten",
torch_opinfo_name="flatten",
),
PythonRefInfo(
"_refs.flip",
torch_opinfo_name="flip",
),
PythonRefInfo(
"_refs.fliplr",
torch_opinfo_name="fliplr",
),
PythonRefInfo(
"_refs.flipud",
torch_opinfo_name="flipud",
),
PythonRefInfo(
"_refs.hstack",
torch_opinfo_name="hstack",
skips=(
# https://github.com/pytorch/pytorch/issues/78613
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_errors'),
),
),
PythonRefInfo(
"_refs.narrow",
torch_opinfo_name="narrow",
error_inputs_func=partial(error_inputs_narrow_narrow_copy, is_narrow=True, is_ref=True),
),
PythonRefInfo(
"_refs.narrow_copy",
torch_opinfo_name="narrow_copy",
supports_out=True,
error_inputs_func=partial(error_inputs_narrow_narrow_copy, is_narrow=False, is_ref=True),
skips=(
# The view function this decompose into does not have a ref
DecorateInfo(unittest.expectedFailure, "TestCommon", "test_python_ref"),
),
),
PythonRefInfo(
"_refs.nn.functional.group_norm",
torch_opinfo_name="nn.functional.group_norm",
validate_view_consistency=False,
),
PythonRefInfo(
"_refs.native_layer_norm",
torch_opinfo_name="native_layer_norm",
skips=(
DecorateInfo(unittest.skip("Skipped!"), "TestCommon", "test_python_ref",
device_type="cpu", dtypes=(torch.float32,)),
DecorateInfo(unittest.skip("Skipped!"), "TestCommon", "test_python_ref_torch_fallback",
device_type="cpu", dtypes=(torch.float32,)),
),
),
PythonRefInfo(
"_refs.permute",
torch_opinfo_name="permute",
),
PythonRefInfo(
"_refs.permute_copy",
torch_opinfo_name="permute_copy",
supports_out=True,
),
ElementwiseUnaryPythonRefInfo(
"_refs.rad2deg",
torch_opinfo_name="rad2deg",
decorators=(precisionOverride({torch.bfloat16: 7e-1,
torch.float16: 7e-1}),),
),
PythonRefInfo(
"_refs.ravel",
torch_opinfo_name="ravel",
),
PythonRefInfo(
"_refs.renorm",
torch_opinfo_name="renorm",
),
PythonRefInfo(
"_refs.repeat",
torch_opinfo_name="repeat",
validate_view_consistency=False,
),
PythonRefInfo(
"_refs.reshape",
torch_opinfo_name="reshape",
),
PythonRefInfo(
"_refs.reshape_as",
torch_opinfo_name="reshape_as",
),
PythonRefInfo(
"_refs.roll",
torch_opinfo_name="roll",
validate_view_consistency=False,
),
PythonRefInfo(
"_refs.rot90",
torch_opinfo_name="rot90",
validate_view_consistency=False,
),
PythonRefInfo(
"_refs.select_scatter",
torch_opinfo_name="select_scatter",
),
PythonRefInfo(
"_refs.stack",
torch_opinfo_name="stack",
validate_view_consistency=False,
),
PythonRefInfo(
"_refs.squeeze",
torch_opinfo_name="squeeze",
),
PythonRefInfo(
"_refs.squeeze_copy",
torch_opinfo_name="squeeze_copy",
supports_out=True,
),
PythonRefInfo(
"_refs.squeeze",
torch_opinfo_name="squeeze",
torch_opinfo_variant_name="multiple",
),
PythonRefInfo(
"_refs.tensor_split",
torch_opinfo_name="tensor_split",
skips=(
# RuntimeError: no _refs support for torch.Tensor.tolist
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref'),
),
),
PythonRefInfo(
"_refs.hsplit",
torch_opinfo_name="hsplit",
),
PythonRefInfo(
"_refs.vsplit",
torch_opinfo_name="vsplit",
),
PythonRefInfo(
"_refs.dot",
torch_opinfo_name="dot",
error_inputs_func=partial(error_inputs_dot_vdot, is_ref=True),
# .conj() does not set ._is_view() correctly in ATen
validate_view_consistency=False,
skips=(
# RuntimeError: no _refs support for torch.Tensor.is_conj
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref', dtypes=[torch.complex64, torch.complex128]),
),
),
PythonRefInfo(
"_refs.vdot",
torch_opinfo_name="vdot",
error_inputs_func=partial(error_inputs_dot_vdot, is_ref=True),
# .conj() does not set ._is_view() correctly in ATen
validate_view_consistency=False,
skips=(
# RuntimeError: no _refs support for torch.Tensor.is_conj
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref', dtypes=[torch.complex64, torch.complex128]),
),
),
PythonRefInfo(
"_refs.transpose",
torch_opinfo_name="transpose",
),
PythonRefInfo(
"_refs.transpose_copy",
torch_opinfo_name="transpose_copy",
supports_out=True,
),
PythonRefInfo(
"_refs.t",
torch_opinfo_name="t",
),
PythonRefInfo(
"_refs.t_copy",
torch_opinfo_name="t_copy",
supports_out=True,
),
PythonRefInfo(
"_refs.T",
torch_opinfo_name="T",
error_inputs_func=partial(error_inputs_T, has_ndims_error=True),
),
PythonRefInfo(
"_refs.unbind_copy",
torch_opinfo_name="unbind_copy",
),
PythonRefInfo(
"_refs.unfold",
torch_opinfo_name="unfold",
),
PythonRefInfo(
"_refs.unfold_copy",
torch_opinfo_name="unfold_copy",
supports_out=True,
),
PythonRefInfo(
"_refs.unsqueeze",
torch_opinfo_name="unsqueeze",
),
PythonRefInfo(
"_refs.unsqueeze_copy",
torch_opinfo_name="unsqueeze_copy",
supports_out=True,
),
PythonRefInfo(
"_refs.view",
torch_opinfo_name="view",
),
PythonRefInfo(
"_refs.view_as",
torch_opinfo_name="view_as",
),
PythonRefInfo(
"_refs.view_copy",
torch_opinfo_name="view_copy",
supports_out=True,
),
PythonRefInfo(
"_refs.vstack",
torch_opinfo_name="vstack",
skips=(
# https://github.com/pytorch/pytorch/issues/78613
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_errors'),
),
),
PythonRefInfo(
"_refs.unflatten",
torch_opinfo_name="unflatten",
),
PythonRefInfo(
"_refs.unbind",
torch_opinfo_name="unbind",
),
#
# Reduction Reference OpInfos
#
ReductionPythonRefInfo(
"_refs.all",
torch_opinfo_name="all",
skips=(
# FIXME: uint8 input returns uint8 instead of bool
DecorateInfo(
unittest.expectedFailure, 'TestReductions', 'test_result_dtype',
dtypes=[torch.uint8]),
),
),
ReductionPythonRefInfo(
"_refs.amax",
torch_opinfo_name="amax",
error_inputs_func=partial(error_inputs_aminmax_amax_amin, is_ref=True),
skips=(
# FIXME: reduces all dimensions when dim=[]
DecorateInfo(
unittest.expectedFailure, 'TestReductions', 'test_dim_empty'),
DecorateInfo(
unittest.expectedFailure, 'TestReductions', 'test_dim_empty_keepdim'),
),
),
ReductionPythonRefInfo(
"_refs.amin",
torch_opinfo_name="amin",
error_inputs_func=partial(error_inputs_aminmax_amax_amin, is_ref=True),
skips=(
# FIXME: reduces all dimensions when dim=[]
DecorateInfo(
unittest.expectedFailure, 'TestReductions', 'test_dim_empty'),
DecorateInfo(
unittest.expectedFailure, 'TestReductions', 'test_dim_empty_keepdim'),
),
),
ReductionPythonRefInfo(
"_refs.any",
torch_opinfo_name="any",
skips=(
# FIXME: uint8 input returns uint8 instead of bool
DecorateInfo(
unittest.expectedFailure, 'TestReductions', 'test_result_dtype',
dtypes=[torch.uint8]),
),
),
ReductionPythonRefInfo(
"_refs.count_nonzero",
torch_opinfo_name="count_nonzero",
skips=(
# FIXME: count_nonzero does not accept keepdim kwarg
DecorateInfo(
unittest.skip("Skipped!"), 'TestReductions',
'test_dim_default_keepdim'),
DecorateInfo(
unittest.skip("Skipped!"), 'TestReductions', 'test_dim_none_keepdim'),
DecorateInfo(
unittest.skip("Skipped!"), 'TestReductions', 'test_dim_single_keepdim'),
DecorateInfo(
unittest.skip("Skipped!"), 'TestReductions', 'test_dim_empty_keepdim'),
DecorateInfo(
unittest.skip("Skipped!"), 'TestReductions', 'test_dim_multi_keepdim'),
DecorateInfo(
unittest.skip("Skipped!"), 'TestReductions',
'test_dim_multi_unsorted_keepdim'),
# FIXME: dim=[] reduces all dimensions
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_empty'),
),
),
ReductionPythonRefInfo(
"_refs.mean",
torch_opinfo_name="mean",
supports_out=True,
error_inputs_func=partial(error_inputs_mean, is_ref=True),
skips=(
# FIXME: reduces all dimensions when dim=[]
DecorateInfo(
unittest.expectedFailure, 'TestReductions', 'test_dim_empty'),
DecorateInfo(
unittest.expectedFailure, 'TestReductions', 'test_dim_empty_keepdim'),
),
),
ReductionPythonRefInfo(
"_refs.std",
torch_opinfo_name="std",
supports_out=True,
skips=(
# FIXME: reduces all dimensions when dim=[]
DecorateInfo(
unittest.expectedFailure, 'TestReductions', 'test_dim_empty'),
DecorateInfo(
unittest.expectedFailure, 'TestReductions', 'test_dim_empty_keepdim'),
# FIXME: improve precision
DecorateInfo(
unittest.skip("Skipped!"), 'TestReductions', 'test_ref_small_input',
dtypes=(torch.float16,)),
DecorateInfo(
unittest.skip("Skipped!"), 'TestReductions',
'test_ref_duplicate_values',
dtypes=(torch.float16,)),
),
),
# std_mean and var_mean are not ReductionInfos
PythonRefInfo(
"_refs.std_mean",
torch_opinfo_name="std_mean",
),
ReductionPythonRefInfo(
"_refs.sum",
torch_opinfo_name="sum",
supports_out=True,
skips=(
# FIXME: doesn't test out behavior properly for this operator
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out'),
# FIXME: mean reduces all dimensions when dim=[]
DecorateInfo(unittest.skip("Skipped!"), 'TestReductions', 'test_dim_empty'),
DecorateInfo(
unittest.skip("Skipped!"), 'TestReductions', 'test_dim_empty_keepdim'),
# FIXME: improve precision
DecorateInfo(
unittest.skip("Skipped!"), 'TestReductions', 'test_ref_small_input',
dtypes=[torch.float16]),
DecorateInfo(
unittest.skip("Skipped!"), 'TestReductions',
'test_ref_duplicate_values',
dtypes=[torch.float16]),
DecorateInfo(
unittest.skip("Skipped!"), 'TestOperators', 'test_reduction_all',
dtypes=[torch.float32]),
),
),
PythonRefInfo(
"_refs.cumsum",
torch_opinfo_name="cumsum",
supports_out=True,
skips=(
# doesn't test out behavior properly for this operator
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out'),
),
),
PythonRefInfo(
"_refs.cumprod",
torch_opinfo_name="cumprod",
supports_out=True,
skips=(
# doesn't test out behavior properly for this operator
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out'),
),
),
PythonRefInfo(
"_refs.sum_to_size",
torch_opinfo_name="sum_to_size",
validate_view_consistency=False,
),
ReductionPythonRefInfo(
"_refs.prod",
torch_opinfo_name="prod",
supports_out=True,
supports_multiple_dims=True,
skips=(
# FIXME: doesn't test out behavior properly for this operator
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out'),
# FIXME: reduces all dimensions when dim=[]
DecorateInfo(
unittest.expectedFailure, 'TestReductions', 'test_dim_empty'),
DecorateInfo(
unittest.expectedFailure, 'TestReductions', 'test_dim_empty_keepdim'),
# FIXME: improve precision
DecorateInfo(
unittest.skip("Skipped!"), 'TestReductions', 'test_ref_small_input',
dtypes=[torch.float16, torch.complex64]),
),
),
ReductionPythonRefInfo(
"_refs.var",
torch_opinfo_name="var",
supports_out=True,
skips=(
# FIXME: reduces all dimensions when dim=[]
DecorateInfo(
unittest.expectedFailure, 'TestReductions', 'test_dim_empty'),
DecorateInfo(
unittest.expectedFailure, 'TestReductions', 'test_dim_empty_keepdim'),
# FIXME: improve precision
DecorateInfo(
unittest.skip("Skipped!"), 'TestReductions', 'test_ref_small_input'),
),
),
PythonRefInfo(
"_refs.var_mean",
torch_opinfo_name="var_mean",
validate_view_consistency=False,
),
#
# Linear Algebra Operators
#
PythonRefInfo(
"_refs.addr",
torch_opinfo_name="addr",
decorators=(
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref',),
),
),
PythonRefInfo(
"_refs.trace",
torch_opinfo_name="trace",
),
PythonRefInfo(
"_refs.norm",
torch_opinfo_name="norm",
supports_out=True,
# Uses vector_norm inside and vector_norm is affected by
# https://github.com/pytorch/pytorch/issues/77216
validate_view_consistency=False,
),
#
# Tensor Creation Reference OpInfos
#
PythonRefInfo(
"_refs.empty",
torch_opinfo_name="empty",
skips=(
DecorateInfo(unittest.skip("Expected: empty is not comparable"),
'TestCommon',
'test_python_ref'),
DecorateInfo(unittest.skip("Expected: empty is not comparable"),
'TestCommon',
'test_python_ref_torch_fallback'),
DecorateInfo(unittest.skip("Expected: empty is not comparable"),
'TestCommon',
'test_out'),
DecorateInfo(unittest.skip("Expected: empty is not comparable"),
'TestCommon',
'test_out_warning'),
DecorateInfo(unittest.skip("Expected: empty is not comparable"),
'TestMathBits',
'test_conj_view'),
DecorateInfo(unittest.skip("Expected: empty is not comparable"),
'TestMathBits',
'test_neg_conj_view'),
DecorateInfo(unittest.skip("Expected: empty is not comparable"),
'TestMathBits',
'test_neg_view'),
# FIXME: shouldn't check empty results
DecorateInfo(unittest.skip("Can't check result for empty"), 'TestCommon', 'test_python_ref_executor'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
),
),
PythonRefInfo(
"_refs.empty_like",
torch_opinfo_name="empty_like",
skips=(
DecorateInfo(unittest.skip("Expected: empty is not comparable"),
'TestCommon',
'test_python_ref'),
DecorateInfo(unittest.skip("Expected: empty is not comparable"),
'TestCommon',
'test_python_ref_torch_fallback'),
DecorateInfo(unittest.skip("Expected: empty is not comparable"),
'TestCommon',
'test_out'),
DecorateInfo(unittest.skip("Expected: empty is not comparable"),
'TestCommon',
'test_out_warning'),
DecorateInfo(unittest.skip("Expected: empty is not comparable"),
'TestMathBits',
'test_conj_view'),
DecorateInfo(unittest.skip("Expected: empty is not comparable"),
'TestMathBits',
'test_neg_conj_view'),
DecorateInfo(unittest.skip("Expected: empty is not comparable"),
'TestMathBits',
'test_neg_view'),
# FIXME: should not compare results of empty_like
DecorateInfo(unittest.skip("Can't check result for empty_like"), 'TestCommon', 'test_python_ref_executor'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
),
),
PythonRefInfo(
"_refs.randn",
torch_opinfo_name="randn",
op=lambda *args, **kwargs: wrapper_set_seed(refs.randn, *args, **kwargs),
skips=(
# see https://github.com/pytorch/pytorch/issues/85121
DecorateInfo(unittest.skip("make_traced() doesn't set seed properly!"),
'TestCommon',
'test_python_ref_executor'),
# These tests expect the input to be a tensor or a sequence of tensors
DecorateInfo(unittest.skip("Test expects tensor input"), "TestCommon", "test_noncontiguous_samples"),
DecorateInfo(unittest.skip("Test expects tensor input"), 'TestMathBits', 'test_neg_view'),
DecorateInfo(unittest.skip("Test expects tensor input"), 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.skip("Test expects tensor input"), 'TestMathBits', 'test_neg_conj_view'),
),
),
PythonRefInfo(
"_refs.eye",
torch_opinfo_name="eye",
skips=(
# skip these tests since we have non tensor input
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_conj_view'),
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_neg_conj_view'),
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_neg_view'),
),
),
PythonRefInfo(
"_refs.new_empty",
torch_opinfo_name="new_empty",
skips=(
DecorateInfo(unittest.skip("Expected: empty is not comparable"),
'TestCommon',
'test_python_ref'),
DecorateInfo(unittest.skip("Expected: empty is not comparable"),
'TestCommon',
'test_python_ref_torch_fallback'),
DecorateInfo(unittest.skip("Expected: empty is not comparable"),
'TestCommon',
'test_out'),
DecorateInfo(unittest.skip("Expected: empty is not comparable"),
'TestCommon',
'test_out_warning'),
DecorateInfo(unittest.skip("Expected: empty is not comparable"),
'TestMathBits',
'test_conj_view'),
DecorateInfo(unittest.skip("Expected: empty is not comparable"),
'TestMathBits',
'test_neg_conj_view'),
DecorateInfo(unittest.skip("Expected: empty is not comparable"),
'TestMathBits',
'test_neg_view'),
# FIXME: should not compare results of empty_like
DecorateInfo(unittest.skip("Can't check result for new_empty"), 'TestCommon', 'test_python_ref_executor'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
),
),
PythonRefInfo(
"_refs.new_empty_strided",
torch_opinfo_name="new_empty_strided",
skips=(
DecorateInfo(unittest.skip("Expected: empty_strided is not comparable"),
'TestCommon',
'test_python_ref'),
DecorateInfo(unittest.skip("Expected: empty_strided is not comparable"),
'TestCommon',
'test_python_ref_torch_fallback'),
DecorateInfo(unittest.skip("Expected: empty_strided is not comparable"),
'TestMathBits',
'test_conj_view'),
DecorateInfo(unittest.skip("Expected: empty_strided is not comparable"),
'TestMathBits',
'test_neg_conj_view'),
DecorateInfo(unittest.skip("Expected: empty_strided is not comparable"),
'TestMathBits',
'test_neg_view'),
DecorateInfo(unittest.skip("Expected: empty_strided is not comparable"),
'TestCommon',
'test_python_ref_executor'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
),
),
PythonRefInfo(
"_refs.empty_strided",
torch_opinfo_name="empty_strided",
skips=(
DecorateInfo(unittest.skip("Expected: empty_strided is not comparable"),
'TestCommon',
'test_python_ref'),
DecorateInfo(unittest.skip("Expected: empty_strided is not comparable"),
'TestCommon',
'test_python_ref_torch_fallback'),
DecorateInfo(unittest.skip("Expected: empty_strided is not comparable"),
'TestMathBits',
'test_conj_view'),
DecorateInfo(unittest.skip("Expected: empty_strided is not comparable"),
'TestMathBits',
'test_neg_conj_view'),
DecorateInfo(unittest.skip("Expected: empty_strided is not comparable"),
'TestMathBits',
'test_neg_view'),
DecorateInfo(unittest.skip("Expected: empty_strided is not comparable"),
'TestCommon',
'test_python_ref_executor'),
DecorateInfo(unittest.skip('output is non-deterministic'), 'TestCommon', 'test_compare_cpu'),
),
),
PythonRefInfo(
"_refs.new_full",
torch_opinfo_name="new_full",
),
PythonRefInfo(
"_refs.new_ones",
torch_opinfo_name="new_ones",
),
PythonRefInfo(
"_refs.new_zeros",
torch_opinfo_name="new_zeros",
),
#
# Conditional Reference OpInfos
#
PythonRefInfo(
"_refs.masked_fill",
torch_opinfo_name="masked_fill",
skips=(
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_errors'),
),
),
PythonRefInfo(
"_refs.where",
torch_opinfo_name="where",
op=lambda self, condition, other: refs.where(condition, self, other),
supports_out=False,
skips=(
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_errors', device_type='cuda'),
),
),
PythonRefInfo(
"_refs.index_select",
torch_opinfo_name="index_select",
# empty_strided
skips=(
# no _refs support for Tensor.__setitem__
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref'),
# Sample out= with a stride of zero. This _out operation checks that the input has no
# inner overlap
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_errors'),)
),
PythonRefInfo(
"_refs.index_copy",
torch_opinfo_name="index_copy",
# empty_strided
skips=(
# no _refs support for Tensor.__setitem__
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref'),
),
),
PythonRefInfo(
"_refs.index_add",
torch_opinfo_name="index_add",
# empty_strided
skips=(
# no _refs support for Tensor.__setitem__
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref'),
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref_errors'),
),
),
PythonRefInfo(
"_refs.index_fill",
torch_opinfo_name="index_fill",
# empty_strided
skips=(
# no _refs support for Tensor.__setitem__
DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_python_ref'),)
),
#
# Test-related functions
#
PythonRefInfo(
"_refs.allclose",
torch_opinfo_name="allclose",
),
#
# Misc functions
#
PythonRefInfo(
"_refs.stft",
torch_opinfo_name="stft",
skips=[
# RuntimeError: no _refs support for aten.pad
DecorateInfo(
unittest.expectedFailure, 'TestCommon', 'test_python_ref'
),
],
),
PythonRefInfo(
"_refs.istft",
torch_opinfo_name="istft",
skips=[
# RuntimeError: no _refs support for aten.unfold_backward
DecorateInfo(
unittest.expectedFailure, 'TestCommon', 'test_python_ref'
),
DecorateInfo(
unittest.skip("Expected: unfold_backward() got an unexpected keyword argument 'input_sizes'"),
'TestCommon',
'test_python_ref_executor',
dtypes=(torch.complex64, torch.complex128),
),
],
),
PythonRefInfo(
"_refs.view_as_complex",
torch_opinfo_name="view_as_complex",
),
PythonRefInfo(
"_refs.split_with_sizes",
torch_opinfo_name="split_with_sizes",
),
]
python_ref_db += opinfo.definitions.python_ref_db
# Common operator groupings
ops_and_refs = op_db + python_ref_db
unary_ufuncs = [op for op in ops_and_refs if isinstance(op, UnaryUfuncInfo)]
binary_ufuncs = [op for op in ops_and_refs if isinstance(op, BinaryUfuncInfo)]
binary_ufuncs_and_refs = tuple(op for op in ops_and_refs if isinstance(op, BinaryUfuncInfo))
spectral_funcs = [op for op in ops_and_refs if isinstance(op, SpectralFuncInfo)]
sparse_unary_ufuncs = [op for op in op_db if isinstance(op, UnaryUfuncInfo) and op.supports_sparse]
sparse_csr_unary_ufuncs = [op for op in op_db if isinstance(op, UnaryUfuncInfo) and op.supports_sparse_csr]
sparse_reduction_ops = [op for op in op_db if isinstance(op, ReductionOpInfo) and op.supports_sparse]
shape_funcs = [op for op in ops_and_refs if isinstance(op, ShapeFuncInfo)]
reduction_ops = [op for op in ops_and_refs if isinstance(op, ReductionOpInfo)]
reference_filtered_ops = [op for op in reduction_ops if op.ref is not None]
reference_masked_ops = [op for op in reference_filtered_ops if op.name.startswith('masked.')]
sparse_masked_reduction_ops = [op for op in sparse_reduction_ops if op.name.startswith('masked.')]
def index_variable(shape, max_indices, device=torch.device('cpu')):
if not isinstance(shape, tuple):
shape = (shape,)
return torch.testing.make_tensor(*shape, dtype=torch.long, device=device, low=0, high=max_indices)
def gather_variable(shape, index_dim, max_indices, duplicate=False, device=torch.device('cpu')):
assert len(shape) == 2
assert index_dim < 2
batch_dim = 1 - index_dim
index = torch.zeros(*shape, dtype=torch.long, device=device)
for i in range(shape[index_dim]):
index.select(index_dim, i).copy_(
torch.randperm(max_indices, device=device)[:shape[batch_dim]])
if duplicate:
index.select(batch_dim, 0).copy_(index.select(batch_dim, 1))
return index
def bernoulli_scalar():
return torch.tensor(0, dtype=torch.bool).bernoulli_()
def mask_not_all_zeros(shape):
assert len(shape) > 0
while True:
result = torch.randn(shape).gt(0)
if result.sum() > 0:
return result
# Copied from functorch
def xfail(op_name, variant_name='', *, device_type=None, dtypes=None):
return (op_name, variant_name, device_type, dtypes, True)
def skip(op_name, variant_name='', *, device_type=None, dtypes=None):
return (op_name, variant_name, device_type, dtypes, False)
def skipOps(test_case_name, base_test_name, to_skip):
all_opinfos = op_db
for xfail in to_skip:
op_name, variant_name, device_type, dtypes, expected_failure = xfail
matching_opinfos = [o for o in all_opinfos
if o.name == op_name and o.variant_test_name == variant_name]
assert len(matching_opinfos) >= 1, f"Couldn't find OpInfo for {xfail}"
for op in matching_opinfos:
decorators = list(op.decorators)
if expected_failure:
decorator = DecorateInfo(unittest.expectedFailure,
test_case_name, base_test_name,
device_type=device_type, dtypes=dtypes)
decorators.append(decorator)
else:
decorator = DecorateInfo(unittest.skip("Skipped!"),
test_case_name, base_test_name,
device_type=device_type, dtypes=dtypes)
decorators.append(decorator)
op.decorators = tuple(decorators)
# This decorator doesn't modify fn in any way
def wrapped(fn):
return fn
return wrapped