import unittest from collections.abc import Sequence from typing import Optional import torch from .common_utils import MACOS_VERSION from .opinfo.core import DecorateInfo, OpInfo if torch.backends.mps.is_available(): def mps_ops_modifier( ops: Sequence[OpInfo], device_type: str = "mps", xfail_exclusion: Optional[list[str]] = None, sparse: bool = False, ) -> Sequence[OpInfo]: if xfail_exclusion is None: xfail_exclusion = [] # Supported complex OPS SUPPORTED_COMPLEX_OPS = { "__radd__", "__rmul__", "__rsub__", "__getitem__", "_unsafe_masked_index", "_unsafe_masked_index_put_accumulate", "abs", "add", "alias_copy", "argwhere", "atleast_1d", "atleast_2d", "atleast_3d", "as_strided", "as_strided_copy", "as_strided_scatter", "asin", "asinh", "acos", "atan", "broadcast_tensors", "broadcast_to", "chalf", "cfloat", "chunk", "clone", "conj", "conj_physical", "contiguous", "cos", "cosh", "diag", "diag_embed", "diagflat", "diagonal", "diagonal_copy", "diagonal_scatter", "divno_rounding_mode", "dsplit", "empty", "empty_permuted", "empty_strided", "exp", "expm1", "exp2", "expand", "expand_as", "expand_copy", "flatten", "fill", "full", "full_like", "H", "hsplit", "imag", "index_add", "index_copy", "index_select", "index_put", "isfinite", "isinf", "isreal", "item", "kron", "linalg.diagonal", "linalg.svd", "log10", "log1p", "log2", "log", "mH", "mT", "masked_fill", "masked_scatter", "masked_select", "meshgridlist_of_tensors", "meshgridvariadic_tensors", "movedim", "mul", "narrow", "narrow_copy", "neg", "new_full", "new_ones", "new_zeros", "nn.functional.conv1d", "nn.functional.conv2d", "nn.functional.conv_transpose1d", "nn.functional.conv_transpose2d", "nn.functional.conv_transpose3d", "nn.functional.feature_alpha_dropoutwithout_train", "nn.functional.padcircular", "nn.functional.softsign", "nn.functional.tanhshrink", "nn.functional.unfold", "nonzero", "ones", "ones_like", "outer", "permute", "permute_copy", "positive", "randn", "ravel", "real", "repeat_interleave", "reshape_as", "reshape", "resolve_conj", "resolve_neg", "rsqrt", "rsub", "scalar_tensor", "select", "sgn", "sigmoid", "sin", "sinc", "sinh", "slice", "special.spherical_bessel_j0", "special.entr", "special.xlog1py", "special.zeta", "split", "split_with_sizes", "split_with_sizes_copy", "splitlist_args", "sqrt", "squeeze", "squeeze_copy", "squeezemultiple", "sub", "svd", "t", "t_copy", "tanh", "tan", "tensor_split", "transpose", "transpose_copy", "tril", "triu", "true_divide", "T", "unbind", "unbind_copy", "unflatten", "unfold", "unfold_copy", "unsafe_chunk", "unsafe_split", "unsqueeze", "unsqueeze_copy", "view_as", "view_as_real", "view", "view_copy", "vsplit", "zero_", "zeros", "zeros_like", "__rdiv__", "__rmatmul__", "_chunk_cat", "acosh", "all", "allclose", "angle", "any", "addcdiv", "addcmul", "addmmdecomposed", "addmv", "atanh", "bfloat16", "bmm", "bool", "cartesian_prod", "cat", "char", "column_stack", "combinations", "corrcoef", "constant_pad_nd", "cov", "count_nonzero", "diff", "div", "dot", "dstack", "einsum", "eq", "equal", "eye", "fft.fft", "fft.fft2", "fft.fftn", "fft.fftshift", "fft.ifft", "fft.ifft2", "fft.ifftn", "fft.ifftshift", "fft.irfftn", "fft.irfft2", "fft.irfft", "fft.hfftn", "fft.hfft2", "fft.hfft", "flip", "fliplr", "flipud", "float", "gradient", "half", "hstack", "inner", "int", "isclose", "isnan", "ldexp", "lerp", "linalg.multi_dot", "linalg.pinv", "linspace", "linspacetensor_overload", "logical_and", "logical_not", "logical_or", "logical_xor", "logsumexp", "long", "masked.mean", "masked.prod", "masked.std", "masked.sum", "masked.var", "masked.logsumexp", "matmul", "mean", "mm", "mv", "ne", "nn.functional.padconstant", "nn.functional.padreflect", "nn.functional.padreplicate", "nn.functional.pixel_shuffle", "nn.functional.pixel_unshuffle", "nn.functional.rms_norm", "pinverse", "prod", "reciprocal", "roll", "rot90", "short", "square", "stack", "stft", "sum", "sum_to_size", "tensordot", "trace", "trapz", "trapezoid", "vstack", "where", "byte", } MACOS_BEFORE_14_4_XFAILLIST = { # These ops work fine in 14.4 but fail in 14.2 or 13.x "fft.hfft2": [torch.complex64], } # Those ops are not expected to work UNIMPLEMENTED_XFAILLIST: dict[str, Optional[list]] = { # Failures due to lack of op implementation on MPS backend "logspace": None, "logspacetensor_overload": None, "linalg.eig": None, "linalg.eigvals": None, "put": None, "cauchy_": None, "cauchy": None, "cholesky_inverse": None, "cholesky_solve": None, "frexp": None, "gcd": None, "geqrf": None, "nn.functional.grid_sample": None, # Unsupported Border padding mode "hash_tensor": None, "heaviside": None, "index_reduceprod": None, "index_reducemean": None, "index_reduceamax": None, "index_reduceamin": None, # "kthvalue": None, "lcm": None, "linalg.cond": None, "linalg.eigh": None, "linalg.eigvalsh": None, "linalg.householder_product": None, "linalg.ldl_factor": None, "linalg.ldl_factor_ex": None, "linalg.ldl_solve": None, "linalg.lstsq": None, "linalg.lstsqgrad_oriented": None, "linalg.lu": None, "linalg.lu_solve": None, "linalg.matrix_norm": [torch.float32], "linalg.norm": [torch.float32], "linalg.normsubgradients_at_zero": [torch.float32], "linalg.qr": None, "linalg.svdvals": None, "linalg.vecdot": None, "lu_solve": None, "masked.median": None, "matrix_exp": None, "mode": None, "normnuc": None, "nn.functional.fractional_max_pool2d": None, "nn.functional.fractional_max_pool3d": None, "nn.functional.adaptive_avg_pool3d": None, "nn.functional.adaptive_max_pool3d": None, "nn.functional.interpolatearea": None, "nn.functional.interpolatebicubic": [torch.uint8], "nn.functional.ctc_loss": None, "nn.functional.multi_margin_loss": None, "nn.functional.multilabel_margin_loss": None, "nn.functional.pdist": None, "nn.functional.rrelu": None, "nn.functional.norm": None, "ormqr": None, "pca_lowrank": None, "qr": None, "scatter_reduceamax": [torch.int32, torch.int64] if MACOS_VERSION < 15.0 else [torch.int64], "scatter_reduceamin": [torch.int32, torch.int64] if MACOS_VERSION < 15.0 else [torch.int64], "segment_reduce": None, "_segment.reduce": None, "segment.reduce": None, "segment_reduce_offsets": None, "_segment_reduce_offsets": None, "_segment_reduce_lengths": None, "_segment_reducelengths": None, "_segment_reduceoffsets": None, "sparse.mm": None, "sparse.sampled_addmm": None, "sparse.mmreduce": None, "special.airy_ai": None, "special.erfcx": None, "special.laguerre_polynomial_l": None, "special.legendre_polynomial_p": None, "special.log_ndtr": None, "special.ndtri": None, "svd_lowrank": None, "symeig": None, "take": None, "to": None, "vdot": None, "segment_reduce_": None, "_upsample_bilinear2d_aa": [torch.uint8], # uint8 is for CPU only "_upsample_bicubic2d_aa": [torch.uint8], # uint8 is for CPU only "geometric": None, "geometric_": None, "log_normal_": None, "log_normal": None, "cdouble": None, "double": None, "nn.functional.softminwith_dtype": None, "log_softmaxwith_dtype": None, "softmaxwith_dtype": None, "float_power": None, "linalg.matrix_rankhermitian": None, "linalg.pinvhermitian": None, "nonzero_static": None, # MPS: input sizes must be divisible by output sizes "nn.functional.adaptive_avg_pool1d": None, "nn.functional.adaptive_avg_pool2d": None, # Convolution for integral types is not supported on MPS "nn.functional.conv1d": [torch.int64], "nn.functional.conv2d": [torch.int64], "nn.functional.conv3d": [torch.int64], "nn.functional.conv_transpose1d": [torch.int64], "nn.functional.conv_transpose2d": [torch.int64, torch.bfloat16], "nn.functional.conv_transpose3d": [ torch.int64, torch.bfloat16, torch.float16, ], # Unsupported dtypes "histc": [torch.float16, torch.bfloat16], # GEMM on MPS is not supported for integral types "nn.functional.linear": [ torch.int16, torch.int32, torch.int64, torch.uint8, torch.int8, ], "addbmm": [torch.int16, torch.int32, torch.int64, torch.uint8, torch.int8], "baddbmm": [torch.int16, torch.int32, torch.int64, torch.uint8, torch.int8], "mat": [torch.int16, torch.int32, torch.int64, torch.uint8, torch.int8], # returned output on CPU is float64 "bincount": [ torch.int16, torch.int32, torch.int64, torch.uint8, torch.int8, ], } UNIMPLEMENTED_XFAILLIST_SPARSE: dict[str, Optional[list]] = { "logspace": None, "logspacetensor_overload": None, "linalg.eig": None, "linalg.eigvals": None, "put": None, } if MACOS_VERSION < 15.0: UNIMPLEMENTED_XFAILLIST.update( { "quantile": None, "nanquantile": None, } ) if sparse: UNIMPLEMENTED_XFAILLIST.update(UNIMPLEMENTED_XFAILLIST_SPARSE) UNDEFINED_XFAILLIST: dict[str, Optional[list]] = { # Top 60 operators # topk fails with duplicate indices "topk": [ torch.int16, torch.int32, torch.int64, torch.uint8, torch.int8, ], # Failures due to random output that they generate using # Philox engine causing mismatch with CPU results "multinomial": [ torch.float16, torch.float32, torch.bfloat16, ], # random results "uniform": [torch.float16, torch.float32, torch.bfloat16], "rand_like": [torch.float16, torch.float32, torch.bfloat16], "randint": None, "randint_like": None, "randn": None, "randn_like": None, "bernoulli": [torch.float16, torch.float32, torch.bfloat16], "exponential": [torch.float16, torch.float32, torch.bfloat16], "nn.functional.feature_alpha_dropoutwith_train": [ torch.float16, torch.float32, torch.bfloat16, ], "normal": [torch.float16, torch.float32, torch.bfloat16], "normalin_place": [torch.float16, torch.float32, torch.bfloat16], "normalnumber_mean": [torch.float16, torch.float32, torch.bfloat16], "nn.functional.alpha_dropout": [ torch.float16, torch.float32, torch.bfloat16, ], "nn.functional.dropout": [torch.float16, torch.float32, torch.bfloat16], "nn.functional.dropout2d": [torch.float16, torch.float32, torch.bfloat16], "nn.functional.dropout3d": [torch.float16, torch.float32, torch.bfloat16], # See https://github.com/pytorch/pytorch/issues/111479 "nn.functional.multi_head_attention_forward": [ torch.float32, torch.float16, torch.bfloat16, ], # zero to negative integer powers are undefined "__rpow__": [torch.int8, torch.int16, torch.int32, torch.int64], "resize_": [torch.float16, torch.float32, torch.bfloat16], "resize_as_": [torch.float16, torch.float32, torch.bfloat16], # CPU Errors: "addr": [ torch.bool, torch.int16, torch.int32, torch.int64, torch.uint8, torch.int8, ], # "addmv_impl_cpu" not implemented for 'Half' "as_stridedpartial_views": None, # cpu result off, showing random values # random results # mps vs cpu: # Mismatched elements: 40 / 96 (41.7%) # Greatest absolute difference: 17.892311096191406 at index (1, 0, 2) (up to 1e-05 allowed) # Greatest relative difference: inf at index (1, 0, 0) (up to 1.3e-06 allowed) # cuda(2.0.0.dev20230301+cu117) vs cpu: # Mismatched elements: 56 / 96 (58.3%) # Greatest absolute difference: 17.892311096191406 at index (1, 0, 2) (up to 1e-05 allowed) # Greatest relative difference: inf at index (1, 0, 0) (up to 1.3e-06 allowed) "nn.functional.scaled_dot_product_attention": [ torch.float32, torch.float16, torch.bfloat16, ], } ON_MPS_XFAILLIST: dict[str, Optional[list]] = { # Failures due to lack of implementation of downstream functions on MPS backend # TODO: remove these once downstream function 'aten::_linalg_svd.U' have been implemented "linalg.matrix_rank": None, # Exception: Caused by `torch.arange(-8.001, -4.0, dtype=torch.uint8, device="mps")` "arange": [torch.uint8], # before macOS 13.2 it falls back to cpu and pass the forward pass "grid_sampler_2d": [ torch.float32, torch.float16, torch.bfloat16, ], # Unsupported Border padding mode # Failure due to precision issue for fp16 # on both cpu and mps there are test cases that might produce inf result # 'nn.functional.pairwise_distance': [torch.float16], # test blow pass on macOS 12 as it falls back to cpu # Argsort case using duplicate indices (undefined behaviour): # - CPU output: tensor([2546, 6917, 3181, ..., 7128, 5133, 30], device='cpu') # - MPS output: tensor([2546, 6917, 3181, ..., 7128, 30, 5133], device='mps:0') # Elements from index 30 and 5133 are both equal. # Since CPU is not using argsort with stable=True, these cases result in undefined behaviour. "argsort": [ torch.float16, torch.int8, torch.uint8, torch.bool, torch.bfloat16, ], # Same issue as `argsort` with duplicate indices. This test checks both the sorted values and the indices. # The values of the sorted tensor match the CPU, # but in case of the returned indices this results in undefined behaviour. "sort": [ torch.int8, torch.uint8, torch.bool, torch.float16, torch.bfloat16, ], } EMPTY_OPS_SKIPLIST = { # Fill tensors with uninitialized data, causing mismatch with CPU. # They occasionally match, thus skipping them. # See https://github.com/pytorch/pytorch/issues/100175 "new_empty": None, "new_empty_strided": None, "empty_strided": None, # CPU: empty is returning all 0's and there is a mismatch with MPS # allocation (MacOS 13). According to # https://pytorch.org/docs/2.0/generated/torch.empty.html "empty": None, "empty_like": None, "empty_permuted": None, } SKIPLIST = { # Unsupported # This doesn't work on M1, but is partially working on M2 with the exception of torch.float16 "nn.functional.conv3d": None, # The CPU impl of grid_sampler_3d does not use opmath_t, so it has a # large amount of error compared with the MPS impl for half # precision types. So we have to skip these for now. "grid_sampler_3d": [torch.float16, torch.bfloat16], } def addDecorator(op: OpInfo, d: DecorateInfo) -> None: if device_type is not None: d.device_type = device_type op.decorators = op.decorators + (d,) for op in ops: key = op.name + op.variant_test_name addDecorator( op, DecorateInfo( unittest.expectedFailure, dtypes=[ torch.double, torch.cdouble, ], ), ) if sparse: # Skipped due to test_sparse_zero_dims test in test_sparse.py which allocates empty tensor # which leads to unexpected success with it addDecorator( op, DecorateInfo( unittest.skip( "Skipped due to MPS not supporting complex128 tensors" ), dtypes=[ torch.complex128, ], ), ) if key in EMPTY_OPS_SKIPLIST: addDecorator( op, DecorateInfo( unittest.skip("Skipping empty ops."), dtypes=EMPTY_OPS_SKIPLIST[key], ), ) if key in SKIPLIST: addDecorator( op, DecorateInfo(unittest.skip("Skipped!"), dtypes=SKIPLIST[key]) ) for xfaillist in [ UNIMPLEMENTED_XFAILLIST, UNDEFINED_XFAILLIST, ON_MPS_XFAILLIST, ]: if key in xfaillist and key not in xfail_exclusion: addDecorator( op, DecorateInfo(unittest.expectedFailure, dtypes=xfaillist[key]), ) if ( key in MACOS_BEFORE_14_4_XFAILLIST and key not in xfail_exclusion and (MACOS_VERSION < 14.4) ): addDecorator( op, DecorateInfo( unittest.expectedFailure, dtypes=MACOS_BEFORE_14_4_XFAILLIST[key], ), ) # If ops is not supported for complex types, expect it to fail if key not in SUPPORTED_COMPLEX_OPS: addDecorator( op, DecorateInfo( unittest.expectedFailure, dtypes=[torch.complex32, torch.complex64], ), ) return ops def mps_ops_grad_modifier(ops: Sequence[OpInfo]) -> Sequence[OpInfo]: XFAILLIST_GRAD = { # Unimplemented ops "_segment_reduce": [torch.float16, torch.float32], "_chunk_cat": [torch.float16, torch.float32], "_upsample_bilinear2d_aa": None, # `_upsample_bilinear2d_aa_backward_out` not implemented for MPS "_upsample_bicubic2d_aa": None, # `_upsample_bilinear2d_aa_backward_out` not implemented for MPS "sparse.mmreduce": [torch.float32], # csr not supported "unique_consecutive": [torch.float16, torch.float32], "scalar_tensor": [torch.float16, torch.float32], "cdist": [torch.float32], "masked.scatter": [torch.float16, torch.float32], "grid_sampler_3d": None, "index_fill": [torch.float16, torch.float32], # missing `aten::_unique`. "igamma": None, # currently not supported for any device "igammac": None, # currently not supported for any device "linalg.solve": [torch.float16, torch.float32], # missing `aten::lu_solve`. "linalg.solve_ex": [ torch.float16, torch.float32, ], # missing `aten::lu_solve`. "linalg.tensorsolve": [ torch.float16, torch.float32, ], # missing `aten::lu_solve`. "linalg.det": [torch.float16, torch.float32], # missing aten::lu_solve.out "linalg.slogdet": [ torch.float16, torch.float32, ], # missing aten::lu_solve.out "logdet": [torch.float16, torch.float32], # missing aten::lu_solve.out "aminmax": [torch.float32, torch.float16], "special.i1": [torch.float16], # "i1_backward" not implemented for 'Half' "special.i1e": [torch.float16], # "i1e_backward" not implemented for 'Half' # Correctness issues "atanh": [torch.float32], # Same issue as `argsort` and `sort` with duplicate elements (undefined behaviour). # Forward pass is passing since `msort` doesn't return the indices, just the values, which match the CPU. # On the backward pass for `sort` both are used (values and indices), thus resulting in a issmatch between CPU and MPS. # Running `msort` with stable `sort` passes. "msort": [torch.float16], # Random output "exponential": [torch.float16, torch.float32], # CPU errors # derivative for zeta is not implemented "special.zeta": None, # derivative for aten::nextafter is not implemented on CPU "nextafter": None, # derivative for aten::floor_divide is not implemented on CPU "floor_divide": [torch.float16, torch.float32], # derivative for aten::narrow_copy is not implemented on CPU "narrow_copy": [torch.float16, torch.float32], # derivative for aten::_histogramdd_from_bin_cts is not implemented on CPU "histogramdd": [torch.float16, torch.float32], # derivative for aten::histogram is not implemented "histogram": [torch.float16, torch.float32], # 'bool' object is not iterable "allclose": [torch.float16, torch.float32], "equal": [torch.float16, torch.float32], # 'float' object is not iterable "item": [torch.float16, torch.float32], # "smooth_l1_backward_cpu_out" not implemented for 'Half' "nn.functional.smooth_l1_loss": [torch.float16], # cpu error: grad requires non-empty inputs "randn": [torch.float16, torch.float32], "signal.windows.bartlett": [torch.float32], "signal.windows.blackman": [torch.float32], "signal.windows.cosine": [torch.float32], "signal.windows.exponential": [torch.float32], "signal.windows.gaussian": [torch.float32], "signal.windows.general_cosine": [torch.float32], "signal.windows.general_hamming": [torch.float32], "signal.windows.hamming": [torch.float32], "signal.windows.hann": [torch.float32], "signal.windows.kaiser": [torch.float32], "signal.windows.nuttall": [torch.float32], "eye": [torch.float16, torch.float32], # topk fails with duplicate indices "topk": [torch.float16], # Could not run 'aten::uniform_' with arguments from the 'SparseCPU' backend "to_sparse": None, # Exception: the derivative for '_unique2' is not implemented. "unique": None, } SKIPLIST_GRAD = { "nn.functional.pairwise_distance": [torch.float16], # failed assertion `destination datatype must be fp32' "nn.functional.conv1d": [torch.float16], "nn.functional.conv2d": [torch.float16], "nn.functional.conv3d": [torch.float16], "nn.functional.conv_transpose1d": [torch.float16], "nn.functional.conv_transpose2d": [torch.float16], "nn.functional.conv_transpose3d": [torch.float16], } ON_MPS_XFAILLIST = { # Failures due to lack of implementation of downstream functions on MPS backend # TODO: remove these once downstream function 'aten::_linalg_svd.U' have been implemented "linalg.matrix_rank": None, # Exception: Caused by sample input at index 3 on MPS "nn.functional.conv3d": [torch.float32], } def addDecorator(op: OpInfo, d: DecorateInfo) -> None: op.decorators = op.decorators + (d,) for op in ops: key = op.name + op.variant_test_name if key in XFAILLIST_GRAD: addDecorator( op, DecorateInfo(unittest.expectedFailure, dtypes=XFAILLIST_GRAD[key]), ) if key in SKIPLIST_GRAD: addDecorator(op, DecorateInfo(unittest.skip, dtypes=SKIPLIST_GRAD[key])) if key in ON_MPS_XFAILLIST: addDecorator( op, DecorateInfo( unittest.expectedFailure, dtypes=ON_MPS_XFAILLIST[key] ), ) return ops def mps_ops_error_inputs_modifier(ops: Sequence[OpInfo]) -> Sequence[OpInfo]: # Error input samples do not take a dtype argument. XFAILLIST = { # Exceptions are not raised "__rmod__", "__rsub__", "__rpow__", "bernoulli", "clamp_max", "clamp_min", "masked_scatter", # unsupported float64 dtype "multinomial", "nn.functional.conv1d", "nn.functional.conv2d", "nn.functional.conv3d", "gather", "scatter", "scatter_add", # MPS does not support tensor dimensions > 16 "amax", "amin", "aminmax", # memory overlapping checks "index_select", } def addDecorator(op: OpInfo, d: DecorateInfo) -> None: op.decorators = op.decorators + (d,) for op in ops: key = op.name + op.variant_test_name if key in XFAILLIST: addDecorator(op, DecorateInfo(unittest.expectedFailure)) return ops else: def mps_ops_modifier( ops: Sequence[OpInfo], device_type: str = "mps", xfail_exclusion: Optional[list[str]] = None, sparse: bool = False, ) -> Sequence[OpInfo]: return ops