Use schema as source of truth + support ones_like/empty_like (#149052)

This change does 2 important things: (a) Instead of relying on IValue type as source of truth, we use the schema as the source of truth, which is important as IValue types are overloaded and can ambiguously convert incorrectly. For example, a MemoryFormat will look like an int + get converted to an int64_t vs a MemoryFormat! (b) This PR expands support for many more types to encompass way more schemas, e.g., Optional, Device, dtype, etc. The main win from this PR is the ability for aoti_torch_call_dispatcher to call TensorFactory ops like ones_like/empty_like! Pull Request resolved: https://github.com/pytorch/pytorch/pull/149052 Approved by: https://github.com/albanD
2025-10-20 21:14:14 +08:00 · 2025-03-17 16:33:38 -07:00
parent ebabd0efdd
commit 988827cdfb
7 changed files with 343 additions and 90 deletions
--- a/test/test_cpp_extensions_aot.py
+++ b/test/test_cpp_extensions_aot.py
@ -270,22 +270,34 @@ class TestCppExtensionAOT(common.TestCase):
            curr_mem = torch.cuda.memory_allocated(device)
            self.assertEqual(curr_mem, init_mem)

-        # (3) test calling our dispatcher on ones_like
-        t = torch.rand(32, 16, device=device)
-        cpu_t = libtorch_agnostic.ops.my_abs(t)
-        self.assertEqual(cpu_t, torch.abs(t))
+        # (3a) test calling our dispatcher on easy API like abs
+        t = torch.rand(32, 16, device=device) - 0.5

        def _make_cuda_tensors(prior_mem):
            cuda_t = libtorch_agnostic.ops.my_abs(t)
            self.assertGreater(torch.cuda.memory_allocated(device), prior_mem)
            self.assertEqual(cuda_t, torch.abs(t))

-        if t.is_cuda:
-            init_mem = torch.cuda.memory_allocated(device)
-            for _ in range(3):
-                _make_cuda_tensors(init_mem)
-                curr_mem = torch.cuda.memory_allocated(device)
-                self.assertEqual(curr_mem, init_mem)
+        init_mem = torch.cuda.memory_allocated(device)
+        for _ in range(3):
+            _make_cuda_tensors(init_mem)
+            curr_mem = torch.cuda.memory_allocated(device)
+            self.assertEqual(curr_mem, init_mem)
+
+        # (3b) and on factory API like ones_like
+        cpu_t = libtorch_agnostic.ops.my_ones_like(t, "cpu")
+        self.assertEqual(cpu_t, torch.ones_like(t, device="cpu"))
+
+        def _make_cuda_tensors(prior_mem):
+            cuda_t = libtorch_agnostic.ops.my_ones_like(t, t.device)
+            self.assertGreater(torch.cuda.memory_allocated(device), prior_mem)
+            self.assertEqual(cuda_t, torch.ones_like(t, device=t.device))
+
+        init_mem = torch.cuda.memory_allocated(device)
+        for _ in range(3):
+            _make_cuda_tensors(init_mem)
+            curr_mem = torch.cuda.memory_allocated(device)
+            self.assertEqual(curr_mem, init_mem)


@torch.testing._internal.common_utils.markDynamoStrictTest