From 4d419a74610c32b1372f8802dcc61893740a23cf Mon Sep 17 00:00:00 2001
From: Mikayla Gawarecki <mikaylagawarecki@gmail.com>
Date: Tue, 12 Aug 2025 17:17:47 +0000
Subject: [PATCH] Add pad and narrow to torch/csrc/stable/ops.h (#159328)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/159328
Approved by: https://github.com/janeyx99
ghstack dependencies: #159507
---
 .../libtorch_agnostic/csrc/kernel.cpp         | 37 +++++++++++++++++++
 .../libtorch_agnostic/ops.py                  | 27 ++++++++++++++
 .../test/test_libtorch_agnostic.py            | 20 ++++++++++
 .../aoti_torch/generated/c_shim_aten.h        |  2 +
 torch/csrc/stable/ops.h                       | 34 +++++++++++++++++
 torchgen/aoti/fallback_ops.py                 |  2 +
 6 files changed, 122 insertions(+)
diff --git a/test/cpp_extensions/libtorch_agnostic_extension/libtorch_agnostic/csrc/kernel.cpp b/test/cpp_extensions/libtorch_agnostic_extension/libtorch_agnostic/csrc/kernel.cpp
index 34f4729d98e9..e3dfc581179a 100644
--- a/test/cpp_extensions/libtorch_agnostic_extension/libtorch_agnostic/csrc/kernel.cpp
+++ b/test/cpp_extensions/libtorch_agnostic_extension/libtorch_agnostic/csrc/kernel.cpp
@@ -291,10 +291,43 @@ void boxed_fill_infinity(
   stack[0] = from(res);
 }
 
+Tensor my_pad(Tensor t) {
+  std::vector<int64_t> padding = {1, 2, 2, 1};
+  std::string mode = "constant";
+  double value = 0.0;
+  return pad(t, padding, mode, value);
+}
+
+void boxed_my_pad(
+    StableIValue* stack,
+    uint64_t num_args,
+    uint64_t num_outputs) {
+  auto res = my_pad(to<Tensor>(stack[0]));
+  stack[0] = from(res);
+}
+
+Tensor my_narrow(Tensor t, int64_t dim, int64_t start, int64_t length) {
+  return narrow(t, dim, start, length);
+}
+
+void boxed_my_narrow(
+    StableIValue* stack,
+    uint64_t num_args,
+    uint64_t num_outputs) {
+  auto res = my_narrow(
+      to<Tensor>(stack[0]),
+      to<int64_t>(stack[1]),
+      to<int64_t>(stack[2]),
+      to<int64_t>(stack[3]));
+  stack[0] = from(res);
+}
+
 STABLE_TORCH_LIBRARY_FRAGMENT(libtorch_agnostic, m) {
   m.def("my_transpose(Tensor t, int dim0, int dim1) -> Tensor");
   m.def("my_empty_like(Tensor t) -> Tensor");
   m.def("fill_infinity(Tensor(a!) t) -> Tensor(a!)");
+  m.def("my_pad(Tensor t) -> Tensor");
+  m.def("my_narrow(Tensor t, int dim, int start, int length) -> Tensor");
 }
 
 STABLE_TORCH_LIBRARY_IMPL(libtorch_agnostic, CompositeExplicitAutograd, m) {
@@ -303,6 +336,10 @@ STABLE_TORCH_LIBRARY_IMPL(libtorch_agnostic, CompositeExplicitAutograd, m) {
   m.impl("fill_infinity", &boxed_fill_infinity);
 }
 
+STABLE_TORCH_LIBRARY_IMPL(libtorch_agnostic, CompositeImplicitAutograd, m) {
+  m.impl("my_pad", &boxed_my_pad);
+  m.impl("my_narrow", &boxed_my_narrow);
+}
 
 Tensor my_zero_(Tensor t) {
   return zero_(t);
diff --git a/test/cpp_extensions/libtorch_agnostic_extension/libtorch_agnostic/ops.py b/test/cpp_extensions/libtorch_agnostic_extension/libtorch_agnostic/ops.py
index 04488e7d9183..817732371060 100644
--- a/test/cpp_extensions/libtorch_agnostic_extension/libtorch_agnostic/ops.py
+++ b/test/cpp_extensions/libtorch_agnostic_extension/libtorch_agnostic/ops.py
@@ -176,3 +176,30 @@ def test_default_constructor(defined) -> bool:
     Returns: bool - result of calling .defined() on the tensor
     """
     return torch.ops.libtorch_agnostic.test_default_constructor.default(defined)
+
+
+def my_pad(t) -> Tensor:
+    """
+    Pads the input tensor with hardcoded padding parameters.
+
+    Args:
+        t: Input tensor
+
+    Returns: Padded tensor with padding [1, 2, 2, 1], mode "constant", value 0.0
+    """
+    return torch.ops.libtorch_agnostic.my_pad.default(t)
+
+
+def my_narrow(t, dim, start, length) -> Tensor:
+    """
+    Returns a new tensor that is a narrowed version of the input tensor.
+
+    Args:
+        t: Input tensor
+        dim: Dimension along which to narrow
+        start: Starting position
+        length: Length of the narrowed section
+
+    Returns: Narrowed tensor
+    """
+    return torch.ops.libtorch_agnostic.my_narrow.default(t, dim, start, length)
diff --git a/test/cpp_extensions/libtorch_agnostic_extension/test/test_libtorch_agnostic.py b/test/cpp_extensions/libtorch_agnostic_extension/test/test_libtorch_agnostic.py
index e197904e8ae2..ae3c2767627f 100644
--- a/test/cpp_extensions/libtorch_agnostic_extension/test/test_libtorch_agnostic.py
+++ b/test/cpp_extensions/libtorch_agnostic_extension/test/test_libtorch_agnostic.py
@@ -232,6 +232,26 @@ if not IS_WINDOWS:
             )
             self.assertFalse(undefined_tensor_is_defined)
 
+        def test_my_pad(self, device):
+            import libtorch_agnostic
+
+            t = torch.rand(2, 3, device=device)
+            out = libtorch_agnostic.ops.my_pad(t)
+            expected = torch.nn.functional.pad(t, [1, 2, 2, 1], "constant", 0.0)
+            self.assertEqual(out, expected)
+
+        def test_my_narrow(self, device):
+            import libtorch_agnostic
+
+            t = torch.randn(2, 5, device=device)
+
+            dim0 = 0
+            start0 = 0
+            length0 = 1
+            out0 = libtorch_agnostic.ops.my_narrow(t, dim0, start0, length0)
+            expected0 = torch.narrow(t, dim0, start0, length0)
+            self.assertEqual(out0, expected0)
+
     instantiate_device_type_tests(TestLibtorchAgnostic, globals(), except_for=None)
 
 if __name__ == "__main__":
diff --git a/torch/csrc/inductor/aoti_torch/generated/c_shim_aten.h b/torch/csrc/inductor/aoti_torch/generated/c_shim_aten.h
index cc2dcdf4c75e..d5bc50750fc7 100644
--- a/torch/csrc/inductor/aoti_torch/generated/c_shim_aten.h
+++ b/torch/csrc/inductor/aoti_torch/generated/c_shim_aten.h
@@ -15,6 +15,8 @@ extern "C" {
 #endif
 
 AOTI_TORCH_EXPORT AOTITorchError aoti_torch_aten_fill__Scalar(AtenTensorHandle self, double value);
+AOTI_TORCH_EXPORT AOTITorchError aoti_torch_aten_narrow(AtenTensorHandle self, int64_t dim, int64_t start, int64_t length, AtenTensorHandle* ret0);
+AOTI_TORCH_EXPORT AOTITorchError aoti_torch_aten_pad(AtenTensorHandle self, const int64_t* pad, int64_t pad_len_, const char* mode, double* value, AtenTensorHandle* ret0);
 
 #ifdef __cplusplus
 } // extern "C"
diff --git a/torch/csrc/stable/ops.h b/torch/csrc/stable/ops.h
index c4a8a9984805..7ce25af14d3f 100644
--- a/torch/csrc/stable/ops.h
+++ b/torch/csrc/stable/ops.h
@@ -4,11 +4,15 @@
 #include <array>
 #include <cstdint>
 #include <optional>
+#include <string>
+#include <vector>
 
 #include <torch/csrc/inductor/aoti_torch/generated/c_shim_aten.h>
 
 using torch::stable::Tensor;
 
+namespace torch::stable {
+
 // We expect this to be the stable version of the empty_like op that takes in
 // no kwargs (device, dtype, layout, memory_format). We will add kwargs
 // support in the future.
@@ -36,6 +40,34 @@ inline Tensor fill_(const Tensor& self, double value) {
   return self;
 }
 
+// We expect this to be the stable version of the narrow.default op.
+// narrow takes in a SymInt for start and length, but these are typed as
+// int64_t as SymInt is not yet header-only.
+inline Tensor narrow(Tensor& self, int64_t dim, int64_t start, int64_t length) {
+  AtenTensorHandle ret0 = nullptr;
+
+  TORCH_ERROR_CODE_CHECK(
+      aoti_torch_aten_narrow(self.get(), dim, start, length, &ret0));
+  return Tensor(ret0);
+}
+
+// We expect this to be the stable version of the pad.default op.
+// pad.default takes in a SymInt[] as the pad argument however pad is typed as
+// use std::vector<int64_t> because
+// (1) IntArrayRef is not yet header-only
+// (2) SymInt is not yet header-only
+inline Tensor pad(
+    const Tensor& self,
+    std::vector<int64_t> pad,
+    const std::string& mode = "constant",
+    double value = 0.0) {
+  AtenTensorHandle ret0 = nullptr;
+
+  TORCH_ERROR_CODE_CHECK(aoti_torch_aten_pad(
+      self.get(), pad.data(), pad.size(), mode.c_str(), &value, &ret0));
+  return Tensor(ret0);
+}
+
 // We expect this to be the stable version of the transpose op with identical
 // semantics to the existing transpose.int op.
 inline Tensor transpose(const Tensor& self, int64_t dim0, int64_t dim1) {
@@ -56,3 +88,5 @@ inline Tensor zero_(Tensor& self) {
       aoti_torch_call_dispatcher("aten::zero_", "", stack.data()));
   return to<Tensor>(stack[0]);
 }
+
+} // namespace torch::stable
diff --git a/torchgen/aoti/fallback_ops.py b/torchgen/aoti/fallback_ops.py
index 3ff40412898a..be00c49d7b1f 100644
--- a/torchgen/aoti/fallback_ops.py
+++ b/torchgen/aoti/fallback_ops.py
@@ -183,4 +183,6 @@ inductor_fallback_ops: dict[str, dict[str, list[str]]] = {
 # The same BC rules apply as inductor_fallback_ops.
 aten_shimified_ops: dict[str, dict[str, list[str]]] = {
     "aten.fill_.Scalar": {},
+    "aten.pad.default": {},
+    "aten.narrow.default": {},
 }