[MPS] Move sparsemps testing from test_mps to test_sparse (#161852)

Moves Sparse MPS testing from test_mps to test_sparse. Lots of skips now but I expect to remove them iteratively once ops are implemented Pull Request resolved: https://github.com/pytorch/pytorch/pull/161852 Approved by: https://github.com/malfet
2025-10-20 12:54:11 +08:00 · 2025-09-02 19:04:07 +00:00
parent 600c25e9a1
commit dcf385395d
7 changed files with 634 additions and 220 deletions
--- a/aten/src/ATen/native/native_functions.yaml
+++ b/aten/src/ATen/native/native_functions.yaml
@ -6917,7 +6917,7 @@
  variants: function, method
  dispatch:
    CompositeExplicitAutograd: clone
-    SparseCPU, SparseCUDA: clone_sparse
+    SparseCPU, SparseCUDA, SparseMPS: clone_sparse
    SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: clone_sparse_compressed
    MkldnnCPU: mkldnn_clone
    QuantizedCPU, QuantizedCUDA: quantized_clone
@ -6952,7 +6952,7 @@
    CPU, CUDA: zero_
    MPS: zero_mps_
    Meta: zero_meta_
-    SparseCPU, SparseCUDA, SparseMeta: zero_sparse_
+    SparseCPU, SparseCUDA, SparseMPS, SparseMeta: zero_sparse_
    SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: zero_sparse_csr_
    MkldnnCPU: mkldnn_zero_
    NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: zero_nested_
--- a/aten/src/ATen/native/sparse/mps/SparseMPSTensorMath.mm
+++ b/aten/src/ATen/native/sparse/mps/SparseMPSTensorMath.mm
@ -123,10 +123,6 @@ SparseTensor& add_out_sparse_mps(const SparseTensor& self,
  TORCH_CHECK(self.sizes().equals(other.sizes()),
              "add: expected 'self' and 'other' to have same size, but ", self.sizes(), " != ", other.sizes());

-  TORCH_CHECK(is_same_density(self, other),
-              "add: expected 'self' and 'other' to have same density, but 'self' has ",
-              self.sparse_dim(), " sparse dimensions while 'other' has ", other.sparse_dim(), " sparse dimensions");
-
  if (other._nnz() == 0) {
    out.resize_as_(self);
    Tensor vals = self._values();
@ -138,6 +134,24 @@ SparseTensor& add_out_sparse_mps(const SparseTensor& self,
    return out;
  }

+  if (self._nnz() == 0) {
+    out.resize_as_(other);
+    Tensor vals = other._values();
+    if (!alpha.isIntegral(false) || alpha.to<double>() != 1.0) {
+      vals = at::mul(vals, alpha);
+    }
+    if (vals.scalar_type() != out.scalar_type()) {
+      vals = vals.to(out.scalar_type());
+    }
+    alias_into_sparse(out, other._indices(), vals);
+    out._coalesced_(other.is_coalesced());
+    return out;
+  }
+
+  TORCH_CHECK(is_same_density(self, other),
+              "add: expected 'self' and 'other' to have same density, but 'self' has ",
+              self.sparse_dim(), " sparse dimensions while 'other' has ", other.sparse_dim(), " sparse dimensions");
+
  Tensor t_indices_ = self._indices();
  Tensor s_indices_ = other._indices();

--- a/aten/src/ATen/native/sparse/mps/kernels/Coalesce.metal
+++ b/aten/src/ATen/native/sparse/mps/kernels/Coalesce.metal
@ -112,4 +112,6 @@ INSTANTIATE_COALESCE_WITH_POSITIONS(long);
 INSTANTIATE_COALESCE_WITH_POSITIONS(char);
 INSTANTIATE_COALESCE_WITH_POSITIONS(uchar);
 INSTANTIATE_COALESCE_WITH_POSITIONS(short);
-INSTANTIATE_COALESCE_WITH_POSITIONS(int);
+INSTANTIATE_COALESCE_WITH_POSITIONS(int);
+INSTANTIATE_COALESCE_WITH_POSITIONS(float2);
+INSTANTIATE_COALESCE_WITH_POSITIONS(half2);
--- a/test/expect/TestSparseMPS.test_print_coalesced_mps_float32.expect
+++ b/test/expect/TestSparseMPS.test_print_coalesced_mps_float32.expect
@ -0,0 +1,266 @@
+# shape: torch.Size([])
+# nnz: 2
+# sparse_dim: 0
+# indices shape: torch.Size([0, 2])
+# values shape: torch.Size([2])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 2)),
+       values=tensor([0, 1]),
+       device='mps:0', size=(), nnz=2, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([], device='mps:0', size=(0, 2), dtype=torch.int64)
+# _values
+tensor([0, 1], device='mps:0', dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 2)),
+       values=tensor([0., 1.]),
+       device='mps:0', size=(), nnz=2, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 2)),
+       values=tensor([0., 1.]),
+       device='mps:0', size=(), nnz=2, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 1)),
+       values=tensor([2.]),
+       device='mps:0', size=(), nnz=1, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([], device='mps:0', size=(0, 2), dtype=torch.int64)
+# _values
+tensor([0., 1.], device='mps:0')
+
+# shape: torch.Size([0])
+# nnz: 10
+# sparse_dim: 0
+# indices shape: torch.Size([0, 10])
+# values shape: torch.Size([10, 0])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 10)),
+       values=tensor([], size=(10, 0)),
+       device='mps:0', size=(0,), nnz=10, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([], device='mps:0', size=(0, 10), dtype=torch.int64)
+# _values
+tensor([], device='mps:0', size=(10, 0), dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 10)),
+       values=tensor([], size=(10, 0)),
+       device='mps:0', size=(0,), nnz=10, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 10)),
+       values=tensor([], size=(10, 0)),
+       device='mps:0', size=(0,), nnz=10, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 1)),
+       values=tensor([], size=(1, 0)),
+       device='mps:0', size=(0,), nnz=1, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([], device='mps:0', size=(0, 10), dtype=torch.int64)
+# _values
+tensor([], device='mps:0', size=(10, 0))
+
+# shape: torch.Size([2])
+# nnz: 3
+# sparse_dim: 0
+# indices shape: torch.Size([0, 3])
+# values shape: torch.Size([3, 2])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([[0, 0],
+                      [0, 1],
+                      [1, 1]]),
+       device='mps:0', size=(2,), nnz=3, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([], device='mps:0', size=(0, 3), dtype=torch.int64)
+# _values
+tensor([[0, 0],
+        [0, 1],
+        [1, 1]], device='mps:0', dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([[0.0000, 0.3333],
+                      [0.6667, 1.0000],
+                      [1.3333, 1.6667]]),
+       device='mps:0', size=(2,), nnz=3, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([[0.0000, 0.3333],
+                      [0.6667, 1.0000],
+                      [1.3333, 1.6667]]),
+       device='mps:0', size=(2,), nnz=3, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 1)),
+       values=tensor([[4.0000, 6.0000]]),
+       device='mps:0', size=(2,), nnz=1, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([], device='mps:0', size=(0, 3), dtype=torch.int64)
+# _values
+tensor([[0.0000, 0.3333],
+        [0.6667, 1.0000],
+        [1.3333, 1.6667]], device='mps:0')
+
+# shape: torch.Size([100, 3])
+# nnz: 3
+# sparse_dim: 1
+# indices shape: torch.Size([1, 3])
+# values shape: torch.Size([3, 3])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([[0, 1, 2]]),
+       values=tensor([[0, 0, 0],
+                      [0, 0, 1],
+                      [1, 1, 1]]),
+       device='mps:0', size=(100, 3), nnz=3, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([[0, 1, 2]], device='mps:0')
+# _values
+tensor([[0, 0, 0],
+        [0, 0, 1],
+        [1, 1, 1]], device='mps:0', dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([[0, 1, 2]]),
+       values=tensor([[0.0000, 0.2222, 0.4444],
+                      [0.6667, 0.8889, 1.1111],
+                      [1.3333, 1.5556, 1.7778]]),
+       device='mps:0', size=(100, 3), nnz=3, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([[0, 1, 2]]),
+       values=tensor([[0.0000, 0.2222, 0.4444],
+                      [0.6667, 0.8889, 1.1111],
+                      [1.3333, 1.5556, 1.7778]]),
+       device='mps:0', size=(100, 3), nnz=3, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([[0, 1, 2]]),
+       values=tensor([[0.0000, 0.4444, 0.8889],
+                      [1.3333, 1.7778, 2.2222],
+                      [2.6667, 3.1111, 3.5556]]),
+       device='mps:0', size=(100, 3), nnz=3, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([[0, 1, 2]], device='mps:0')
+# _values
+tensor([[0.0000, 0.2222, 0.4444],
+        [0.6667, 0.8889, 1.1111],
+        [1.3333, 1.5556, 1.7778]], device='mps:0')
+
+# shape: torch.Size([100, 20, 3])
+# nnz: 0
+# sparse_dim: 2
+# indices shape: torch.Size([2, 0])
+# values shape: torch.Size([0, 3])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(2, 0)),
+       values=tensor([], size=(0, 3)),
+       device='mps:0', size=(100, 20, 3), nnz=0, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([], device='mps:0', size=(2, 0), dtype=torch.int64)
+# _values
+tensor([], device='mps:0', size=(0, 3), dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(2, 0)),
+       values=tensor([], size=(0, 3)),
+       device='mps:0', size=(100, 20, 3), nnz=0, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(2, 0)),
+       values=tensor([], size=(0, 3)),
+       device='mps:0', size=(100, 20, 3), nnz=0, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(2, 0)),
+       values=tensor([], size=(0, 3)),
+       device='mps:0', size=(100, 20, 3), nnz=0, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([], device='mps:0', size=(2, 0), dtype=torch.int64)
+# _values
+tensor([], device='mps:0', size=(0, 3))
+
+# shape: torch.Size([10, 0, 3])
+# nnz: 3
+# sparse_dim: 0
+# indices shape: torch.Size([0, 3])
+# values shape: torch.Size([3, 10, 0, 3])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([], size=(3, 10, 0, 3)),
+       device='mps:0', size=(10, 0, 3), nnz=3, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([], device='mps:0', size=(0, 3), dtype=torch.int64)
+# _values
+tensor([], device='mps:0', size=(3, 10, 0, 3), dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([], size=(3, 10, 0, 3)),
+       device='mps:0', size=(10, 0, 3), nnz=3, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([], size=(3, 10, 0, 3)),
+       device='mps:0', size=(10, 0, 3), nnz=3, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 1)),
+       values=tensor([], size=(1, 10, 0, 3)),
+       device='mps:0', size=(10, 0, 3), nnz=1, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([], device='mps:0', size=(0, 3), dtype=torch.int64)
+# _values
+tensor([], device='mps:0', size=(3, 10, 0, 3))
+
+# shape: torch.Size([10, 0, 3])
+# nnz: 0
+# sparse_dim: 0
+# indices shape: torch.Size([0, 0])
+# values shape: torch.Size([0, 10, 0, 3])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 0)),
+       values=tensor([], size=(0, 10, 0, 3)),
+       device='mps:0', size=(10, 0, 3), nnz=0, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([], device='mps:0', size=(0, 0), dtype=torch.int64)
+# _values
+tensor([], device='mps:0', size=(0, 10, 0, 3), dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 0)),
+       values=tensor([], size=(0, 10, 0, 3)),
+       device='mps:0', size=(10, 0, 3), nnz=0, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 0)),
+       values=tensor([], size=(0, 10, 0, 3)),
+       device='mps:0', size=(10, 0, 3), nnz=0, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 0)),
+       values=tensor([], size=(0, 10, 0, 3)),
+       device='mps:0', size=(10, 0, 3), nnz=0, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([], device='mps:0', size=(0, 0), dtype=torch.int64)
+# _values
+tensor([], device='mps:0', size=(0, 10, 0, 3))
--- a/test/expect/TestSparseMPS.test_print_uncoalesced_mps_float32.expect
+++ b/test/expect/TestSparseMPS.test_print_uncoalesced_mps_float32.expect
@ -0,0 +1,265 @@
+# shape: torch.Size([])
+# nnz: 2
+# sparse_dim: 0
+# indices shape: torch.Size([0, 2])
+# values shape: torch.Size([2])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 2)),
+       values=tensor([0, 1]),
+       device='mps:0', size=(), nnz=2, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([], device='mps:0', size=(0, 2), dtype=torch.int64)
+# _values
+tensor([0, 1], device='mps:0', dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 2)),
+       values=tensor([0., 1.]),
+       device='mps:0', size=(), nnz=2, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 2)),
+       values=tensor([0., 1.]),
+       device='mps:0', size=(), nnz=2, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 1)),
+       values=tensor([2.]),
+       device='mps:0', size=(), nnz=1, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([], device='mps:0', size=(0, 2), dtype=torch.int64)
+# _values
+tensor([0., 1.], device='mps:0')
+
+# shape: torch.Size([0])
+# nnz: 10
+# sparse_dim: 0
+# indices shape: torch.Size([0, 10])
+# values shape: torch.Size([10, 0])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 10)),
+       values=tensor([], size=(10, 0)),
+       device='mps:0', size=(0,), nnz=10, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([], device='mps:0', size=(0, 10), dtype=torch.int64)
+# _values
+tensor([], device='mps:0', size=(10, 0), dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 10)),
+       values=tensor([], size=(10, 0)),
+       device='mps:0', size=(0,), nnz=10, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 10)),
+       values=tensor([], size=(10, 0)),
+       device='mps:0', size=(0,), nnz=10, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 1)),
+       values=tensor([], size=(1, 0)),
+       device='mps:0', size=(0,), nnz=1, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([], device='mps:0', size=(0, 10), dtype=torch.int64)
+# _values
+tensor([], device='mps:0', size=(10, 0))
+
+# shape: torch.Size([2])
+# nnz: 3
+# sparse_dim: 0
+# indices shape: torch.Size([0, 3])
+# values shape: torch.Size([3, 2])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([[0, 0],
+                      [0, 1],
+                      [1, 1]]),
+       device='mps:0', size=(2,), nnz=3, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([], device='mps:0', size=(0, 3), dtype=torch.int64)
+# _values
+tensor([[0, 0],
+        [0, 1],
+        [1, 1]], device='mps:0', dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([[0.0000, 0.3333],
+                      [0.6667, 1.0000],
+                      [1.3333, 1.6667]]),
+       device='mps:0', size=(2,), nnz=3, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([[0.0000, 0.3333],
+                      [0.6667, 1.0000],
+                      [1.3333, 1.6667]]),
+       device='mps:0', size=(2,), nnz=3, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 1)),
+       values=tensor([[4.0000, 6.0000]]),
+       device='mps:0', size=(2,), nnz=1, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([], device='mps:0', size=(0, 3), dtype=torch.int64)
+# _values
+tensor([[0.0000, 0.3333],
+        [0.6667, 1.0000],
+        [1.3333, 1.6667]], device='mps:0')
+
+# shape: torch.Size([100, 3])
+# nnz: 3
+# sparse_dim: 1
+# indices shape: torch.Size([1, 3])
+# values shape: torch.Size([3, 3])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([[0, 1, 0]]),
+       values=tensor([[0, 0, 0],
+                      [0, 0, 1],
+                      [1, 1, 1]]),
+       device='mps:0', size=(100, 3), nnz=3, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([[0, 1, 0]], device='mps:0')
+# _values
+tensor([[0, 0, 0],
+        [0, 0, 1],
+        [1, 1, 1]], device='mps:0', dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([[0, 1, 0]]),
+       values=tensor([[0.0000, 0.2222, 0.4444],
+                      [0.6667, 0.8889, 1.1111],
+                      [1.3333, 1.5556, 1.7778]]),
+       device='mps:0', size=(100, 3), nnz=3, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([[0, 1, 0]]),
+       values=tensor([[0.0000, 0.2222, 0.4444],
+                      [0.6667, 0.8889, 1.1111],
+                      [1.3333, 1.5556, 1.7778]]),
+       device='mps:0', size=(100, 3), nnz=3, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([[0, 1]]),
+       values=tensor([[2.6667, 3.5556, 4.4444],
+                      [1.3333, 1.7778, 2.2222]]),
+       device='mps:0', size=(100, 3), nnz=2, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([[0, 1, 0]], device='mps:0')
+# _values
+tensor([[0.0000, 0.2222, 0.4444],
+        [0.6667, 0.8889, 1.1111],
+        [1.3333, 1.5556, 1.7778]], device='mps:0')
+
+# shape: torch.Size([100, 20, 3])
+# nnz: 0
+# sparse_dim: 2
+# indices shape: torch.Size([2, 0])
+# values shape: torch.Size([0, 3])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(2, 0)),
+       values=tensor([], size=(0, 3)),
+       device='mps:0', size=(100, 20, 3), nnz=0, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([], device='mps:0', size=(2, 0), dtype=torch.int64)
+# _values
+tensor([], device='mps:0', size=(0, 3), dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(2, 0)),
+       values=tensor([], size=(0, 3)),
+       device='mps:0', size=(100, 20, 3), nnz=0, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(2, 0)),
+       values=tensor([], size=(0, 3)),
+       device='mps:0', size=(100, 20, 3), nnz=0, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(2, 0)),
+       values=tensor([], size=(0, 3)),
+       device='mps:0', size=(100, 20, 3), nnz=0, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([], device='mps:0', size=(2, 0), dtype=torch.int64)
+# _values
+tensor([], device='mps:0', size=(0, 3))
+
+# shape: torch.Size([10, 0, 3])
+# nnz: 3
+# sparse_dim: 0
+# indices shape: torch.Size([0, 3])
+# values shape: torch.Size([3, 10, 0, 3])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([], size=(3, 10, 0, 3)),
+       device='mps:0', size=(10, 0, 3), nnz=3, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([], device='mps:0', size=(0, 3), dtype=torch.int64)
+# _values
+tensor([], device='mps:0', size=(3, 10, 0, 3), dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([], size=(3, 10, 0, 3)),
+       device='mps:0', size=(10, 0, 3), nnz=3, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([], size=(3, 10, 0, 3)),
+       device='mps:0', size=(10, 0, 3), nnz=3, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 1)),
+       values=tensor([], size=(1, 10, 0, 3)),
+       device='mps:0', size=(10, 0, 3), nnz=1, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([], device='mps:0', size=(0, 3), dtype=torch.int64)
+# _values
+tensor([], device='mps:0', size=(3, 10, 0, 3))
+
+# shape: torch.Size([10, 0, 3])
+# nnz: 0
+# sparse_dim: 0
+# indices shape: torch.Size([0, 0])
+# values shape: torch.Size([0, 10, 0, 3])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 0)),
+       values=tensor([], size=(0, 10, 0, 3)),
+       device='mps:0', size=(10, 0, 3), nnz=0, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([], device='mps:0', size=(0, 0), dtype=torch.int64)
+# _values
+tensor([], device='mps:0', size=(0, 10, 0, 3), dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 0)),
+       values=tensor([], size=(0, 10, 0, 3)),
+       device='mps:0', size=(10, 0, 3), nnz=0, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 0)),
+       values=tensor([], size=(0, 10, 0, 3)),
+       device='mps:0', size=(10, 0, 3), nnz=0, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 0)),
+       values=tensor([], size=(0, 10, 0, 3)),
+       device='mps:0', size=(10, 0, 3), nnz=0, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([], device='mps:0', size=(0, 0), dtype=torch.int64)
+# _values
+tensor([], device='mps:0', size=(0, 10, 0, 3))
--- a/test/test_mps.py
+++ b/test/test_mps.py
@ -12773,213 +12773,6 @@ class TestMetalLibrary(TestCaseMPS):
                           f"Capture file {capture_dirname} contains only metadata, i.e. {capture_listdir}")


-
-class TestSparseMPS(TestCaseMPS):
-    def _get_basic_sparse_coo(self, device="mps"):
-        indices = torch.tensor([[0, 1], [2, 0]], dtype=torch.int64, device=device)
-        values = torch.tensor([1, 2], dtype=torch.float32, device=device)
-        size = (2, 3)
-        return torch.sparse_coo_tensor(indices, values, size, device=device)
-
-    def test_sparse_coo_tensor_with_dims(self):
-        indices = torch.zeros((2, 0), dtype=torch.int64, device="mps")
-        values = torch.tensor([], dtype=torch.float32, device="mps")
-        size = (2, 3)
-        t = torch.sparse_coo_tensor(indices, values, size, device="mps")
-        self.assertEqual(t.device.type, "mps")
-        self.assertEqual(t.layout, torch.sparse_coo)
-
-    def test_sparse_coo_tensor_with_dims_and_tensors(self):
-        indices = torch.tensor([[0, 1], [2, 0]], device="mps")
-        values = torch.tensor([1., 2.], device="mps")
-        size = (2, 3)
-        t = torch.sparse_coo_tensor(indices, values, size, device="mps")
-        self.assertEqual(t.device.type, "mps")
-        self.assertEqual(t.layout, torch.sparse_coo)
-        self.assertEqual(t._indices().cpu(), indices.cpu())
-        self.assertEqual(t._values().cpu(), values.cpu())
-
-    def test_nnz(self):
-        t = self._get_basic_sparse_coo()
-        self.assertEqual(t._nnz(), 2)
-
-    def test_sparse_dim(self):
-        t = self._get_basic_sparse_coo()
-        self.assertEqual(t.sparse_dim(), 2)
-
-    def test_to_sparse(self):
-        t = torch.tensor([[[1., 0], [2., 3.]], [[4., 0], [5., 6.]]], device="mps")
-        x = t.to_sparse()
-        t_cpu = torch.tensor([[[1., 0], [2., 3.]], [[4., 0], [5., 6.]]], device="mps")
-        x_cpu = t.to_sparse()
-        self.assertEqual(x.cpu(), x_cpu)
-
-    def test_resize(self):
-        indices = torch.tensor([[0, 1], [2, 0]])
-        values = torch.tensor([3.0, 4.0])
-        size = torch.Size([2, 3])
-        sparse = torch.sparse_coo_tensor(indices, values, size, device="mps")
-        sparse_cpu = torch.sparse_coo_tensor(indices, values, size, device="cpu")
-        sparse = sparse.sparse_resize_(torch.Size([4, 5]), sparse_dim=2, dense_dim=0)
-        sparse_cpu = sparse_cpu.sparse_resize_(torch.Size([4, 5]), sparse_dim=2, dense_dim=0)
-        self.assertEqual(sparse, sparse_cpu)
-
-    @parametrize("dtype", [torch.int8, torch.int16, torch.uint8, torch.int32, torch.int64,
-                           torch.float32, torch.float16, torch.bfloat16, torch.bool])
-    def test_coalesce(self, dtype):
-        indices = torch.tensor([[0, 0, 1, 1], [0, 0, 2, 2]], dtype=torch.int64, device="mps")
-        values = torch.tensor([1., 2., 3., 4.], dtype=dtype, device="mps")
-        size = (2, 3)
-        indices_cpu = indices.cpu()
-        values_cpu = values.cpu()
-        sparse_mps = torch.sparse_coo_tensor(indices, values, size, device="mps")
-        sparse_cpu = torch.sparse_coo_tensor(indices_cpu, values_cpu, size, device="cpu")
-        coalesced_mps = sparse_mps.coalesce()
-        coalesced_cpu = sparse_cpu.coalesce()
-
-        self.assertTrue(coalesced_mps.is_coalesced())
-        self.assertTrue(coalesced_cpu.is_coalesced())
-        self.assertEqual(coalesced_mps._nnz(), 2)
-        self.assertEqual(coalesced_mps.cpu(), coalesced_cpu)
-
-    def test_already_coalesced_tensor(self):
-        already_coalesced = self._get_basic_sparse_coo()
-        result = already_coalesced.coalesce()
-        self.assertTrue(result.is_coalesced())
-        self.assertEqual(result._indices().cpu(), already_coalesced._indices().cpu())
-        self.assertEqual(result._values().cpu(), already_coalesced._values().cpu())
-
-    def test_coalesce_empty_sparse_tensor(self):
-        empty_indices = torch.zeros((2, 0), dtype=torch.int64, device="mps")
-        empty_values = torch.tensor([], dtype=torch.float32, device="mps")
-        empty_sparse = torch.sparse_coo_tensor(empty_indices, empty_values, (3, 3), device="mps")
-        empty_coalesced = empty_sparse.coalesce()
-        self.assertTrue(empty_coalesced.is_coalesced())
-        self.assertEqual(empty_coalesced._nnz(), 0)
-
-    def test_coalesce_large_tensor(self):
-        size = (1000000, 1000000)
-        num_elements = 1000
-
-        # 800 unique random positions
-        unique_indices = torch.randint(0, size[0], (2, 800), dtype=torch.int64)
-        # 200 duplicates by repeating some of the first 200 indices
-        duplicate_indices = unique_indices[:, :200]
-        indices = torch.cat([unique_indices, duplicate_indices], dim=1)
-        # shuffle indices to mix duplicates with unique entries
-        perm = torch.randperm(indices.size(1))
-        indices = indices[:, perm]
-
-        values = torch.randn(num_elements, dtype=torch.float32)
-        indices_mps = indices.to("mps")
-        values_mps = values.to("mps")
-        sparse_mps = torch.sparse_coo_tensor(indices_mps, values_mps, size, device="mps")
-        sparse_cpu = torch.sparse_coo_tensor(indices, values, size, device="cpu")
-
-        self.assertFalse(sparse_mps.is_coalesced())
-        coalesced_mps = sparse_mps.coalesce()
-        coalesced_cpu = sparse_cpu.coalesce()
-        self.assertTrue(coalesced_mps.is_coalesced())
-        self.assertTrue(coalesced_cpu.is_coalesced())
-        self.assertEqual(coalesced_mps._nnz(), coalesced_cpu._nnz())
-        self.assertEqual(coalesced_mps._indices().cpu(), coalesced_cpu._indices())
-        self.assertEqual(coalesced_mps._values().cpu(), coalesced_cpu._values())
-
-    def test_sparse_add(self):
-        # Basic dense + sparse add
-        dense_mps = torch.zeros((2, 3), device="mps", dtype=torch.float32)
-        sparse_mps = self._get_basic_sparse_coo(device="mps")
-
-        dense_cpu = dense_mps.cpu()
-        sparse_cpu = torch.sparse_coo_tensor(
-            sparse_mps._indices().cpu(), sparse_mps._values().cpu(), sparse_mps.size(), device="cpu"
-        )
-
-        res_mps = torch.add(dense_mps, sparse_mps)
-        res_cpu = torch.add(dense_cpu, sparse_cpu)
-        self.assertEqual(res_mps.cpu(), res_cpu)
-
-        # alpha scaling (integral alpha)
-        res_mps = torch.add(dense_mps, sparse_mps, alpha=2)
-        res_cpu = torch.add(dense_cpu, sparse_cpu, alpha=2)
-        self.assertEqual(res_mps.cpu(), res_cpu)
-
-        # alpha scaling (float alpha) with random dense
-        dense2_mps = torch.randn((2, 3), device="mps", dtype=torch.float32)
-        dense2_cpu = dense2_mps.cpu()
-        res_mps = torch.add(dense2_mps, sparse_mps, alpha=0.5)
-        res_cpu = torch.add(dense2_cpu, sparse_cpu, alpha=0.5)
-        self.assertEqual(res_mps.cpu(), res_cpu)
-
-        # nnz == 0 fast-path
-        empty_indices_mps = torch.zeros((2, 0), dtype=torch.int64, device="mps")
-        empty_values_mps = torch.tensor([], dtype=torch.float32, device="mps")
-        empty_sparse_mps = torch.sparse_coo_tensor(empty_indices_mps, empty_values_mps, (2, 3), device="mps")
-
-        empty_indices_cpu = empty_indices_mps.cpu()
-        empty_values_cpu = empty_values_mps.cpu()
-        empty_sparse_cpu = torch.sparse_coo_tensor(empty_indices_cpu, empty_values_cpu, (2, 3), device="cpu")
-
-        res_mps = torch.add(dense2_mps, empty_sparse_mps)
-        res_cpu = torch.add(dense2_cpu, empty_sparse_cpu)
-        self.assertEqual(res_mps.cpu(), res_cpu)
-
-        # 3D case to exercise view_cols > 1 path (values are 2D)
-        indices3_mps = torch.tensor([[0, 1], [2, 0]], dtype=torch.int64, device="mps")
-        values3_mps = torch.tensor([[1., 2., 3., 4.], [5., 6., 7., 8.]], dtype=torch.float32, device="mps")
-        size3 = (2, 3, 4)
-        sp3_mps = torch.sparse_coo_tensor(indices3_mps, values3_mps, size3, device="mps")
-        dense3_mps = torch.randn(size3, device="mps", dtype=torch.float32)
-
-        indices3_cpu = indices3_mps.cpu()
-        values3_cpu = values3_mps.cpu()
-        sp3_cpu = torch.sparse_coo_tensor(indices3_cpu, values3_cpu, size3, device="cpu")
-        dense3_cpu = dense3_mps.cpu()
-
-        res_mps = torch.add(dense3_mps, sp3_mps, alpha=1.0)
-        res_cpu = torch.add(dense3_cpu, sp3_cpu, alpha=1.0)
-        self.assertEqual(res_mps.cpu(), res_cpu)
-
-        # dtype promotion: dense float32 + sparse float16
-        sparse_f16_mps = torch.sparse_coo_tensor(
-            sparse_mps._indices(),
-            sparse_mps._values().to(torch.float16),
-            sparse_mps.size(),
-            device="mps",
-        )
-        sparse_f16_cpu = torch.sparse_coo_tensor(
-            sparse_f16_mps._indices().cpu(),
-            sparse_f16_mps._values().cpu(),
-            sparse_f16_mps.size(),
-            device="cpu",
-        )
-        res_mps = torch.add(dense2_mps, sparse_f16_mps, alpha=0.25)
-        res_cpu = torch.add(dense2_cpu, sparse_f16_cpu, alpha=0.25)
-        self.assertEqual(res_mps.cpu(), res_cpu)
-
-        # broadcasting not supported: mismatched size should error
-        bad_sparse_mps = torch.sparse_coo_tensor(
-            sparse_mps._indices(), sparse_mps._values(), (2, 4), device="mps"
-        )
-        with self.assertRaisesRegex(RuntimeError, "same size"):
-            torch.add(dense_mps, bad_sparse_mps)
-
-        # sparse + sparse with overlap (tests concatenation + coalesce + alpha)
-        s1_idx = torch.tensor([[0, 0, 1], [0, 0, 2]], dtype=torch.int64)
-        s1_val = torch.tensor([1., 2., 3.], dtype=torch.float32)
-        s2_idx = torch.tensor([[0, 1, 1], [0, 2, 2]], dtype=torch.int64)
-        s2_val = torch.tensor([4., 5., 6.], dtype=torch.float32)
-
-        s1_mps = torch.sparse_coo_tensor(s1_idx.to("mps"), s1_val.to("mps"), (2, 3), device="mps")
-        s2_mps = torch.sparse_coo_tensor(s2_idx.to("mps"), s2_val.to("mps"), (2, 3), device="mps")
-        s1_cpu = torch.sparse_coo_tensor(s1_idx, s1_val, (2, 3), device="cpu")
-        s2_cpu = torch.sparse_coo_tensor(s2_idx, s2_val, (2, 3), device="cpu")
-
-        sp_res_mps = torch.add(s1_mps, s2_mps, alpha=2.0).coalesce()
-        sp_res_cpu = torch.add(s1_cpu, s2_cpu, alpha=2.0).coalesce()
-        self.assertEqual(sp_res_mps.cpu(), sp_res_cpu)
-
-
 # TODO: Actually instantiate that test for the "mps" device to better reflect what it is doing.
 # This requires mps to be properly registered in the device generic test framework which is not the
 # case right now. We can probably use `allow_mps` introduced in https://github.com/pytorch/pytorch/pull/87342
@ -12994,7 +12787,6 @@ instantiate_parametrized_tests(TestMPS)
 instantiate_parametrized_tests(TestSDPA)
 instantiate_parametrized_tests(TestSmoothL1Loss)
 instantiate_parametrized_tests(TestMetalLibrary)
-instantiate_parametrized_tests(TestSparseMPS)

 if __name__ == "__main__":
    run_tests()
--- a/test/test_sparse.py
+++ b/test/test_sparse.py
@ -21,12 +21,12 @@ from packaging import version
 from torch.testing._internal.common_cuda import \
    (SM53OrLater, SM80OrLater, TEST_MULTIGPU)
 from torch.testing._internal.common_device_type import \
-    (instantiate_device_type_tests, ops, dtypes, dtypesIfCUDA, onlyCPU, onlyCUDA, precisionOverride,
-     deviceCountAtLeast, OpDTypes, onlyNativeDeviceTypes, skipCUDAIf, largeTensorTest)
+    (instantiate_device_type_tests, ops, dtypes, dtypesIfCUDA, dtypesIfMPS, onlyCPU, onlyCUDA, precisionOverride,
+     deviceCountAtLeast, OpDTypes, onlyNativeDeviceTypes, skipCUDAIf, expectedFailureMPS, largeTensorTest)
 from torch.testing._internal.common_methods_invocations import \
    (op_db, reduction_ops, sparse_unary_ufuncs, sparse_masked_reduction_ops, binary_ufuncs)
 from torch.testing._internal.common_dtype import (
-    all_types, all_types_and_complex, all_types_and_complex_and, floating_and_complex_types,
+    all_types, all_types_and_complex, all_mps_types, all_types_and_complex_and, floating_and_complex_types,
    floating_and_complex_types_and, integral_types, floating_types_and,
 )
 from torch.testing._internal.opinfo.definitions.sparse import validate_sample_input_sparse
@ -224,10 +224,12 @@ class TestSparse(TestSparseBase):
        return torch.empty(*args, **kwargs).normal_()

    @dtypes(torch.double)
+    @dtypesIfMPS(torch.float32)
    def test_print_coalesced(self, device, dtype):
        self._test_print(device, dtype, True)

    @dtypes(torch.double)
+    @dtypesIfMPS(torch.float32)
    def test_print_uncoalesced(self, device, dtype):
        self._test_print(device, dtype, False)

@ -266,7 +268,7 @@ class TestSparse(TestSparseBase):
            if values.dtype == torch.double:
                dtypes.append(torch.float)
            else:
-                dtypes.append(torch.double)
+                dtypes.append(torch.double if values.device != torch.device("mps:0") else torch.float32)
            for dtype in dtypes:
                printed.append(f"########## {dtype} ##########")
                x = sp_tensor.detach().to(dtype)
@ -286,6 +288,7 @@ class TestSparse(TestSparseBase):

    @coalescedonoff
    @dtypes(torch.double, torch.cdouble)
+    @dtypesIfMPS(torch.float32, torch.complex64)
    def test_basic(self, device, dtype, coalesced):
        def test_shape(sparse_dims, nnz, with_size):
            if isinstance(with_size, Number):
@ -320,6 +323,7 @@ class TestSparse(TestSparseBase):

    @coalescedonoff
    @dtypes(torch.double, torch.cdouble, torch.bfloat16)
+    @dtypesIfMPS(torch.float32, torch.complex64)
    @precisionOverride({torch.bfloat16: 1e-2})
    def test_coalesce(self, device, dtype, coalesced):

@ -382,6 +386,7 @@ class TestSparse(TestSparseBase):
        sparse_matrix = sparse_matrix.coalesce()

    @dtypes(torch.double)
+    @dtypesIfMPS(torch.float32)
    @skipIfTorchDynamo("https://github.com/pytorch/pytorch/issues/89395")
    def test_coalesce_reference_cycle(self, device, dtype):
        # Test coalesce doesn't create autograd graph cycles (gh-52253)
@ -409,6 +414,7 @@ class TestSparse(TestSparseBase):
        self.assertTrue(ref.expired())

    @dtypes(torch.double)
+    @dtypesIfMPS(torch.float32)
    def test_ctor_large_sizes(self, device, dtype):
        # Test that integer overflow is detected when computing numel
        # of a sparse tensor with large dimensions (gh-57416). Notice
@ -423,6 +429,7 @@ class TestSparse(TestSparseBase):
                              indices, values, (N + 1,) * 4, device=device))

    @dtypes(torch.double, torch.cdouble)
+    @dtypesIfMPS(torch.float32, torch.complex64)
    def test_ctor_size_checks(self, device, dtype):
        indices = self.index_tensor([
            [0, 0, 0],
@ -446,6 +453,7 @@ class TestSparse(TestSparseBase):
            RuntimeError,
            lambda: self.sparse_tensor(indices, values, torch.Size([2, 4, 2, 1])))

+    @expectedFailureMPS
    @coalescedonoff
    @dtypes(torch.double)
    def test_ctor_is_coalesced_with_gradcheck(self, device, dtype, coalesced):
@ -471,6 +479,7 @@ class TestSparse(TestSparseBase):
                                                "cannot set is_coalesced to true if indices correspond to uncoalesced COO tensor"):
                        torch.autograd.gradcheck(func, (t._indices(), t._values().requires_grad_(True), shape, True))

+    @expectedFailureMPS
    @dtypes(*floating_and_complex_types_and(torch.float16, torch.bfloat16))
    @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupported triggers assertion error")
    @gradcheck_semantics()
@ -536,6 +545,7 @@ class TestSparse(TestSparseBase):

    @coalescedonoff
    @dtypes(torch.float16, torch.bfloat16, torch.float64, torch.int, torch.cfloat, torch.cdouble)
+    @expectedFailureMPS  # unique_dim not implemented for MPS device
    def test_to_sparse(self, device, dtype, coalesced):
        shape = [5, 2, 10, 4]
        max_nnz = 1
@ -555,6 +565,7 @@ class TestSparse(TestSparseBase):
                    self.assertEqual(dim, result.sparse_dim())

    @dtypes(torch.double, torch.cdouble)
+    @dtypesIfMPS(torch.float32, torch.complex64)
    def test_sparse_bool(self, device, dtype):
        a = torch.tensor([True, False], dtype=dtype, device=device).to(torch.bool)
        b = a.to_sparse().to_dense()
@ -562,6 +573,7 @@ class TestSparse(TestSparseBase):

    @skipIfTorchDynamo("https://github.com/pytorch/pytorch/issues/108667")
    @dtypes(torch.double, torch.cdouble)
+    @dtypesIfMPS(torch.float32, torch.complex64)
    def test_scalar(self, device, dtype):
        # tensor with value
        a = self.sparse_tensor(self.index_tensor([], device=device).unsqueeze(1), 12.3, [], dtype=dtype, device=device)
@ -592,6 +604,7 @@ class TestSparse(TestSparseBase):
        self.assertEqual(a, a.to_dense().to_sparse())

    @dtypes(torch.double, torch.cdouble)
+    @dtypesIfMPS(torch.float32, torch.complex64)
    def test_shared(self, device, dtype):
        i = self.index_tensor([[2]], device=device)
        v = torch.tensor([5], dtype=dtype, device=device)
@ -607,6 +620,7 @@ class TestSparse(TestSparseBase):
        i[0][0] = 0
        self.assertEqual(torch.empty((3, 0), dtype=dtype, device=device), self.safeToDense(x))

+    @expectedFailureMPS
    @dtypes(torch.double, torch.cdouble)
    @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupported triggers assertion error")
    @gradcheck_semantics()
@ -656,6 +670,7 @@ class TestSparse(TestSparseBase):
        test_tensor(x, res)

    @dtypes(torch.double, torch.cdouble)
+    @dtypesIfMPS(torch.float32, torch.complex64)
    def test_contig(self, device, dtype):
        def test_tensor(x, exp_i, exp_v):
            x = x.coalesce()
@ -737,6 +752,7 @@ class TestSparse(TestSparseBase):
        test_tensor(x, exp_i, exp_v)

    @dtypes(torch.double, torch.cdouble)
+    @dtypesIfMPS(torch.float32, torch.complex64)
    def test_contig_hybrid(self, device, dtype):
        def test_tensor(x, exp_i, exp_v):
            x = x.coalesce()
@ -824,6 +840,7 @@ class TestSparse(TestSparseBase):
        test_tensor(x, exp_i, exp_v)

    @coalescedonoff
+    @dtypesIfMPS(torch.float32, torch.complex64)
    @dtypes(torch.double, torch.cdouble)
    def test_clone(self, device, dtype, coalesced):
        def test_shape(sparse_dims, nnz, with_size):
@ -842,6 +859,7 @@ class TestSparse(TestSparseBase):
        test_shape(3, 0, [0, 0, 100, 5, 5, 5, 0])

    @coalescedonoff
+    @expectedFailureMPS
    @dtypes(torch.double, torch.cdouble, torch.bfloat16)
    @precisionOverride({torch.bfloat16: 2e-2})
    def test_Sparse_to_Sparse_copy_(self, device, dtype, coalesced):
@ -944,6 +962,7 @@ class TestSparse(TestSparseBase):

    @coalescedonoff
    @dtypes(torch.double, torch.cdouble)
+    @dtypesIfMPS(torch.float32, torch.complex64)
    def test_transpose(self, device, dtype, coalesced):
        def test_shape(sparse_dims, nnz, with_size):
            x = self._gen_sparse(sparse_dims, nnz, with_size, dtype, device, coalesced)[0]
@ -964,6 +983,7 @@ class TestSparse(TestSparseBase):

    @coalescedonoff
    @dtypes(torch.double, torch.cdouble)
+    @expectedFailureMPS
    @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupported triggers assertion error")
    @gradcheck_semantics()
    def test_permute(self, device, dtype, coalesced, gradcheck):
@ -1043,6 +1063,7 @@ class TestSparse(TestSparseBase):

    @skipIfTorchDynamo("https://github.com/pytorch/torchdynamo/issues/1166")
    @dtypes(torch.double, torch.cdouble)
+    @dtypesIfMPS(torch.float32, torch.complex64)
    def test_t_empty(self, device, dtype):
        def test_in_place(x):
            shape_original = x.shape
@ -1072,6 +1093,7 @@ class TestSparse(TestSparseBase):

    @coalescedonoff
    @dtypes(torch.double, torch.cdouble)
+    @dtypesIfMPS(torch.float32, torch.complex64)
    def test_add_zeros(self, device, dtype, coalesced):
        def test_shape(sparse_dims, nnz, sizes):
            x, _, _ = self._gen_sparse(sparse_dims, nnz, sizes, dtype, device, coalesced)
@ -1086,6 +1108,7 @@ class TestSparse(TestSparseBase):
        test_shape(2, 20, [3, 17, 19, 5])
        test_shape(2, 20, [3, 17, 19, 0])

+    @expectedFailureMPS
    @dtypes(torch.double, torch.cdouble)
    def test_add_sub_nnz(self, device, dtype):
        # nnz should not grow unbounded (gh-34964)
@ -1098,6 +1121,7 @@ class TestSparse(TestSparseBase):
        x.sub_(2 * x)
        self.assertLessEqual(x._nnz(), 10)

+    @expectedFailureMPS
    @coalescedonoff
    @dtypes(torch.double, torch.cdouble)
    def test_cat(self, device, dtype, coalesced):
@ -1140,6 +1164,7 @@ class TestSparse(TestSparseBase):
                                    "Concatenating sparse tensors, but a dense tensor was found at position 1."):
            torch.cat((sp, dn))

+    @expectedFailureMPS
    @coalescedonoff
    @dtypes(torch.double, torch.cdouble)
    def test_unsqueeze(self, device, dtype, coalesced):
@ -1174,6 +1199,7 @@ class TestSparse(TestSparseBase):

    @coalescedonoff
    @dtypes(torch.double, torch.cdouble)
+    @dtypesIfMPS(torch.float32, torch.complex64)
    def test_select(self, device, dtype, coalesced):
        def test_shape(sparse_dims, nnz, sizes, select_dim, select_index, fail_message=None):
            x, _, _ = self._gen_sparse(sparse_dims, nnz, sizes, dtype, device, coalesced)
@ -1219,6 +1245,7 @@ class TestSparse(TestSparseBase):
            self.assertEqual(t.dtype, t[0, 0].dtype)
            self.assertEqual(t.dtype, t[1, 1].dtype)

+    @expectedFailureMPS
    @coalescedonoff
    @dtypes(torch.double, torch.cdouble)
    def test_index_select(self, device, dtype, coalesced):
@ -1271,18 +1298,21 @@ class TestSparse(TestSparseBase):
                    small_sparse_result = t_small_sparse.index_select(d, t_idx)
                    self.assertEqual(small_dense_result, small_sparse_result)

+    @expectedFailureMPS
    @coalescedonoff
    @dtypes(torch.double, torch.cdouble)
    def test_index_select_exhaustive_index_small(self, device, dtype, coalesced):
        # will trigger brute-force algo
        self._test_index_select_exhaustive_index((3, 3, 4), range(3), device, dtype, coalesced)

+    @expectedFailureMPS
    @coalescedonoff
    @dtypes(torch.double, torch.cdouble)
    def test_index_select_exhaustive_index_large(self, device, dtype, coalesced):
        # will trigger more sophisticated algos
        self._test_index_select_exhaustive_index((100, 50, 3, 3), (2, 3), device, dtype, coalesced)

+    @expectedFailureMPS
    @coalescedonoff
    @dtypes(torch.double, torch.cdouble)
    def test_index_select_empty_and_non_contiguous_index(self, device, dtype, coalesced):
@ -1381,6 +1411,7 @@ class TestSparse(TestSparseBase):
        "bmm sparse-dense CUDA is not yet supported in Windows, at least up to CUDA 10.1"
    )
    @coalescedonoff
+    @expectedFailureMPS
    @dtypes(torch.double)
    def test_bmm(self, device, dtype, coalesced):
        def test_shape(num_mats, dim_i, dim_j, dim_k, nnz):
@ -1591,6 +1622,7 @@ class TestSparse(TestSparseBase):
        self.assertEqual(self.safeToDense(res), self.safeToDense(true_result))

    @coalescedonoff
+    @expectedFailureMPS
    @precisionOverride({torch.bfloat16: 5e-2, torch.float16: 5e-2})
    @dtypes(torch.double, torch.cdouble, torch.bfloat16, torch.float16)
    def test_sparse_addmm(self, device, dtype, coalesced):
@ -1632,6 +1664,7 @@ class TestSparse(TestSparseBase):
        test_shape(7, 8, 9, 20, True, (1, 1))

    @coalescedonoff
+    @expectedFailureMPS
    @dtypes(torch.double)
    @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupported triggers assertion error")
    def test_sparse_mm(self, device, dtype, coalesced):
@ -1654,6 +1687,7 @@ class TestSparse(TestSparseBase):
        test_shape(7, 8, 9, 20, True)

    @coalescedonoff
+    @expectedFailureMPS
    @dtypes(torch.double)
    @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupported triggers assertion error")
    @gradcheck_semantics()
@ -1677,6 +1711,7 @@ class TestSparse(TestSparseBase):
        # test_shape(2, 3, [2, 2, 0])

    @coalescedonoff
+    @expectedFailureMPS
    @dtypes(torch.double)
    def test_dsmm(self, device, dtype, coalesced):
        def test_shape(di, dj, dk, nnz):
@ -1696,6 +1731,7 @@ class TestSparse(TestSparseBase):
        test_shape(1000, 100, 0, 20)

    @coalescedonoff
+    @expectedFailureMPS
    @dtypes(torch.double)
    def test_hsmm(self, device, dtype, coalesced):
        def test_shape(di, dj, dk, nnz):
@ -1715,6 +1751,7 @@ class TestSparse(TestSparseBase):
        test_shape(1000, 100, 0, 20)

    @coalescedonoff
+    @expectedFailureMPS
    @dtypes(torch.double)
    def test_spadd(self, device, dtype, coalesced):

@ -1802,6 +1839,7 @@ class TestSparse(TestSparseBase):
        self.assertEqual(res_fp32, res_bf16, atol=1e-2, rtol=0)

    @coalescedonoff
+    @expectedFailureMPS
    @dtypes(torch.double, torch.cdouble)
    def test_norm(self, device, dtype, coalesced):
        def test_shape(sparse_dims, nnz, with_size):
@ -1830,6 +1868,7 @@ class TestSparse(TestSparseBase):
                x.norm(**kwargs)

    @coalescedonoff
+    @expectedFailureMPS
    @dtypes(torch.double)
    @unittest.skipIf(TEST_WITH_CROSSREF, "fallback triggers cuda device error")
    def test_sparse_sum(self, device, dtype, coalesced):
@ -1894,6 +1933,7 @@ class TestSparse(TestSparseBase):
            S = self._gen_sparse(sparse_dims, nnz, with_size, dtype, device, coalesced)[0]
            run_tests(S.requires_grad_(True), test_dim)

+    @expectedFailureMPS
    def _test_basic_ops_shape(self, nnz_x1, nnz_x2, shape_i, shape_v, dtype, device, coalesced):
        shape = shape_i + (shape_v)
        x1, _, _ = self._gen_sparse(len(shape_i), nnz_x1, shape, dtype, device, coalesced)
@ -2002,6 +2042,7 @@ class TestSparse(TestSparseBase):
        _test_basic_ops_hybrid()

    @dtypes(torch.double, torch.cdouble)
+    @dtypesIfMPS(torch.float32, torch.complex64)
    def test_add_dense_sparse_mismatch(self, device, dtype):
        def test_shape(dense_size, sparse_dims_shape, dense_dims_shape, sparse_size):
            x = torch.zeros(dense_size, dtype=dtype, device=device)
@ -2018,6 +2059,7 @@ class TestSparse(TestSparseBase):

    @skipIfTorchDynamo("Not a TorchDynamo suitable test")
    @dtypes(torch.double, torch.cdouble)
+    @dtypesIfMPS(torch.float32, torch.complex64)
    def test_add_noncontiguous(self, device, dtype):
        indices = self.index_tensor([[1, 2], [0, 2]], device=device)
        values = torch.tensor([1.], dtype=dtype, device=device).expand(2, 3, 4, 5)
@ -2040,6 +2082,7 @@ class TestSparse(TestSparseBase):
        self.assertEqual(self.safeToDense(y2), expected)

    @coalescedonoff
+    @expectedFailureMPS
    @dtypes(torch.double, torch.cdouble)
    def test_sparse_mask(self, device, dtype, coalesced):
        def _test_sparse_mask_fixed():
@ -2110,6 +2153,7 @@ class TestSparse(TestSparseBase):

    @coalescedonoff
    @dtypes(torch.double, torch.cdouble)
+    @dtypesIfMPS(torch.float32, torch.complex64)
    def test_sparse_mask_hybrid(self, device, dtype, coalesced):
        def _test_sparse_mask_hybrid_fixed():
            i = self.index_tensor([
@ -2171,6 +2215,7 @@ class TestSparse(TestSparseBase):
        self._test_sparse_mask_shape(0, 0, [10, 10, 0], [2, 0], dtype, device, coalesced)

    @dtypes(torch.double, torch.cdouble)
+    @expectedFailureMPS
    @skipIfCrossRef
    def test_sparse_mask_backward(self, device, dtype):
        from itertools import product, repeat
@ -2205,6 +2250,7 @@ class TestSparse(TestSparseBase):

    @coalescedonoff
    @dtypes(torch.double, torch.cdouble)
+    @dtypesIfMPS(torch.float32, torch.complex64)
    def test_zeros(self, device, dtype, coalesced):
        def _test_zeros(nnzs, shape, out_shape_i, out_shape_v=None):
            out_shape = out_shape_i + (out_shape_v or [])
@ -2229,6 +2275,7 @@ class TestSparse(TestSparseBase):
        test_shape([2, 3, 4], [0, 4, 5, 6], [2, 3, 0], [9, 12])

    @coalescedonoff
+    @expectedFailureMPS
    @dtypes(torch.double, torch.cdouble)
    def test_zeros_like(self, device, dtype, coalesced):
        def _test_zeros_like(nnzs, template_shape_i, template_shape_v=None):
@ -2312,6 +2359,7 @@ class TestSparse(TestSparseBase):
            result = torch.empty_like(dense_tensor, layout=torch.sparse_coo)

    @coalescedonoff
+    @expectedFailureMPS
    @dtypes(torch.double, torch.cdouble)
    def test_empty_like(self, device, dtype, coalesced):
        # tests https://github.com/pytorch/pytorch/issues/43699
@ -2368,6 +2416,7 @@ class TestSparse(TestSparseBase):
                    yield [dim, start, length]

    @coalescedonoff
+    @expectedFailureMPS
    @dtypes(torch.double, torch.cdouble)
    def test_narrow(self, device, dtype, coalesced):
        shape = [3, 3, 4, 2]
@ -2410,6 +2459,7 @@ class TestSparse(TestSparseBase):
                sparse_tensor.requires_grad_()

    @coalescedonoff
+    @dtypesIfMPS(*all_mps_types())
    @dtypes(*all_types())
    def test_log1p(self, device, dtype, coalesced):
        if coalesced:
@ -2475,6 +2525,7 @@ class TestSparse(TestSparseBase):

    @coalescedonoff
    @dtypes(torch.double, torch.cdouble)
+    @dtypesIfMPS(torch.float32, torch.complex64)
    def test_neg_negative(self, device, dtype, coalesced):

        if coalesced:
@ -2556,6 +2607,7 @@ class TestSparse(TestSparseBase):

    @coalescedonoff
    @dtypes(*all_types())
+    @dtypesIfMPS(*all_mps_types())
    def test_asin_arcsin(self, device, dtype, coalesced):
        if coalesced:
            input_coalesced = torch.sparse_coo_tensor(
@ -2601,6 +2653,7 @@ class TestSparse(TestSparseBase):
            self._test_asin_arcsin(input_uncoalesced, coalesced)

    @coalescedonoff
+    @expectedFailureMPS
    @dtypes(torch.double)
    def test_mv(self, device, dtype, coalesced):
        def test_shape(di, dj, dk, nnz):
@ -2628,6 +2681,7 @@ class TestSparse(TestSparseBase):
            res = x.mv(y)

    @dtypes(*floating_and_complex_types())
+    @dtypesIfMPS(torch.float32, torch.bfloat16, torch.complex64)
    def test_sparse_add_coalesce(self, device, dtype):
        i = self.index_tensor([[1, 2, 1]], device=device)
        v = torch.tensor([3, 4, 5], dtype=dtype, device=device)
@ -2705,6 +2759,7 @@ class TestSparse(TestSparseBase):

    @coalescedonoff
    @dtypes(torch.double, torch.cdouble)
+    @dtypesIfMPS(torch.float32, torch.complex64)
    def test_new(self, device, dtype, coalesced):
        def test_shape(sparse_dims, nnz, with_size):
            x, indices, values = self._gen_sparse(sparse_dims, nnz, with_size, dtype, device, coalesced)
@ -2761,6 +2816,7 @@ class TestSparse(TestSparseBase):
                            self.assertEqual(True, sparse_tensor.requires_grad)

    @dtypes(torch.double, torch.cdouble)
+    @dtypesIfMPS(torch.float32, torch.complex64)
    def test_factory_size_check(self, device, dtype):
        indices = self.index_tensor([[1, 2],
                                    [0, 2]], device=device)
@ -2815,6 +2871,7 @@ class TestSparse(TestSparseBase):
        self.assertEqual(tensor._indices(), expected_indices)

    @dtypes(torch.double, torch.cdouble)
+    @dtypesIfMPS(torch.float32, torch.complex64)
    def test_factory_nnz(self, device, dtype):
        indices = self.index_tensor([[0]], device=device)  # (sparse_dim, nnz): (1, 1)
        values = torch.tensor([[1, 1], [1, 1]], dtype=dtype, device=device)  # (nnz, ...): (2, 2)
@ -2829,6 +2886,7 @@ class TestSparse(TestSparseBase):
            torch.sparse_coo_tensor(indices, values, sizes, dtype=dtype, device=device)

    @dtypes(torch.double, torch.cdouble)
+    @dtypesIfMPS(torch.float32, torch.complex64)
    def test_factory_nnz_zero(self, device, dtype):
        def test_shape(i_shape, v_shape, size, expected_size):
            if size:
@ -2850,6 +2908,7 @@ class TestSparse(TestSparseBase):
        test_shape([3, 0], [0, 2, 4, 0], [1, 2, 3, 2, 4, 0], [1, 2, 3, 2, 4, 0])

    @dtypes(torch.double, torch.cdouble)
+    @dtypesIfMPS(torch.float32, torch.complex64)
    def test_factory_dense_dim(self, device, dtype):
        indices = self.index_tensor([[0]], device=device)
        values = torch.tensor([[[1, 1, 1], [1, 1, 1]]], dtype=dtype, device=device)
@ -3090,6 +3149,7 @@ class TestSparse(TestSparseBase):
                         x_dense.view(-1)[0:x_v_numel].view(x_v))

    @dtypes(torch.double, torch.cdouble)
+    @dtypesIfMPS(torch.float32, torch.complex64)
    def test_resize(self, device, dtype):
        # 1. Expand the size of some dense dimensions [Supported]
        self._test_resize_shape([1, 1], [1, 2, 3], [2, 2, 3],
@ -3175,6 +3235,7 @@ class TestSparse(TestSparseBase):
                         .is_nonzero())

    @dtypes(torch.double, torch.cdouble)
+    @dtypesIfMPS(torch.float32, torch.complex64)
    def test_change_tensor_metadata(self, device, dtype):
        i = self.index_tensor([[0], [1]], device=device)
        v = torch.tensor([[3, 4, 5]], dtype=dtype, device=device)
@ -3217,6 +3278,7 @@ class TestSparse(TestSparseBase):
        self.assertEqual(list(t.coalesce().values().size()), [1, 3])

    @coalescedonoff
+    @expectedFailureMPS
    @dtypes(torch.double)
    def test_pickle(self, device, dtype, coalesced):
        import pickle
@ -3248,6 +3310,7 @@ class TestSparse(TestSparseBase):
            sp_tensor_loaded = pickle.loads(serialized)
            self.assertEqual(sp_tensor, sp_tensor_loaded)

+    @expectedFailureMPS
    def test_any(self, device):
        t = torch.sparse_coo_tensor(torch.tensor(([0, 0], [2, 0])), torch.tensor([False, False]), device=device)
        t_any = torch.tensor(False)
@ -3265,6 +3328,7 @@ class TestSparse(TestSparseBase):
        self.assertEqual(torch.isnan(t).int(), t_nan.int())

    @coalescedonoff
+    @expectedFailureMPS
    @dtypes(torch.float32, torch.float64)
    def test_div_rounding_mode(self, device, dtype, coalesced):
        sparse, _, _ = self._gen_sparse(2, 10, (10, 10), dtype,
@ -3285,11 +3349,13 @@ class TestSparse(TestSparseBase):
            torch.div(sparse, -2, rounding_mode=mode, out=actual)
            self.assertEqual(self.safeToDense(actual), expect)

+    @expectedFailureMPS
    def test_div_by_sparse_error(self, device):
        self.assertRaisesRegex(RuntimeError, 'Sparse division requires',
                               lambda: torch.tensor(1., device=device).to_sparse()
                               / torch.tensor(1., device=device).to_sparse())

+    @expectedFailureMPS
    def test_floor_divide_by_sparse_error(self, device):
        self.assertRaisesRegex(RuntimeError, 'Sparse floor division requires',
                               lambda: torch.tensor(1., device=device).to_sparse()
@ -3302,6 +3368,7 @@ class TestSparse(TestSparseBase):
        self.assertRaises(TypeError, lambda: t.numpy())

    @coalescedonoff
+    @expectedFailureMPS
    @dtypes(torch.double)
    def test_softmax(self, device, dtype, coalesced):
        import torch.nn.functional as F
@ -3614,12 +3681,14 @@ class TestSparse(TestSparseBase):


    @dtypes(torch.double, torch.float)
+    @expectedFailureMPS
    @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupported triggers assertion error")
    def test_softmax_zero_nnz(self, device, dtype):
        self._check_zero_nnz_softmax_op(torch.sparse.softmax, 1, device, dtype)
        self._check_zero_nnz_softmax_op(torch.sparse.softmax, 10, device, dtype)

    @dtypes(torch.double, torch.float)
+    @expectedFailureMPS
    @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupported triggers assertion error")
    def test_log_softmax_zero_nnz(self, device, dtype):
        self._check_zero_nnz_softmax_op(torch.sparse.log_softmax, 1, device, dtype)
@ -3628,6 +3697,7 @@ class TestSparse(TestSparseBase):
    # TODO: Check after why ROCm's cusparseXcsrgemm2Nnz function doesn't return the same nnz value as CUDA
    @coalescedonoff
    @dtypes(*floating_and_complex_types())
+    @expectedFailureMPS
    @dtypesIfCUDA(*floating_types_and(*[torch.half] if SM53OrLater and not TEST_WITH_ROCM else [],
                                      *[torch.bfloat16] if SM80OrLater and not TEST_WITH_ROCM else [],
                                      torch.complex64,
@ -3758,6 +3828,7 @@ class TestSparse(TestSparseBase):

        self.assertRaises(TypeError, assign_to)

+    @expectedFailureMPS
    @dtypes(torch.double, torch.cdouble)
    def test_full_broadcast_to(self, device, dtype):
        def can_broadcast(s0, s1):
@ -3788,6 +3859,7 @@ class TestSparse(TestSparseBase):
                        torch._sparse_broadcast_to(s, s1)

    @coalescedonoff
+    @expectedFailureMPS
    @dtypes(torch.double, torch.cdouble)
    def test_sparse_broadcast_to(self, device, dtype, coalesced):
        def test(sparse_dims, nnz, with_size, new_size):
@ -3817,6 +3889,7 @@ class TestSparse(TestSparseBase):
            self.skipTest(f"Test with dtype={dtype}, device={device} runs only with coalesced inputs")

    @coalescedonoff
+    @expectedFailureMPS
    # NOTE: addcmul_out is not implemented for bool.
    @dtypes(*all_types_and_complex_and(torch.bfloat16, torch.float16))
    @precisionOverride({torch.bfloat16: 1e-2, torch.float16: 1e-2})
@ -3868,6 +3941,7 @@ class TestSparse(TestSparseBase):
                # check_autograd(x, y)

    @coalescedonoff
+    @expectedFailureMPS
    @dtypes(*all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16))
    @precisionOverride({torch.bfloat16: 1e-2, torch.float16: 1e-2})
    def test_sparse_dense_mul(self, device, dtype, coalesced):
@ -4053,6 +4127,7 @@ class TestSparse(TestSparseBase):
        self.assertFalse(torch.sparse_coo_tensor([[0, 1], [0, 1]], [1, 2], (2, 2)).is_coalesced())

    @coalescedonoff
+    @expectedFailureMPS
    @dtypes(*all_types_and_complex_and(torch.bool))
    def test_sum(self, device, dtype, coalesced):
        def run_test(shape, nnz):
@ -5531,7 +5606,7 @@ instantiate_device_type_tests(TestSparseUnaryUfuncs, globals(), allow_mps=True,
 instantiate_device_type_tests(TestSparseMaskedReductions, globals(), except_for='meta')

 # e.g., TestSparseCPU and TestSparseCUDA
-instantiate_device_type_tests(TestSparse, globals(), except_for='meta')
+instantiate_device_type_tests(TestSparse, globals(), allow_mps=True, except_for='meta')

 instantiate_device_type_tests(TestSparseAny, globals(), except_for='meta')