From a00a99e801c14d2b09533e09a9c20a1af473d9ab Mon Sep 17 00:00:00 2001
From: David Berard <dberard@fb.com>
Date: Mon, 13 May 2024 15:30:44 -0700
Subject: [PATCH] [profiler] Report strides in json trace (#125851)

We already collect strides, we just don't report them anywhere.

Note: this depends on concrete input collection being enabled, which I think is currently not the case internally.

Differential Revision: [D57165421](https://our.internmc.facebook.com/intern/diff/D57165421)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/125851
Approved by: https://github.com/Chillee, https://github.com/aaronenyeshi
---
 .../TestProfiler.test_profiler_strides        |  0
 test/profiler/test_profiler.py                | 20 +++++++++++++++++++
 torch/csrc/autograd/profiler_kineto.cpp       | 17 ++++++++++++++--
 3 files changed, 35 insertions(+), 2 deletions(-)
 create mode 100644 test/dynamo_expected_failures/TestProfiler.test_profiler_strides
diff --git a/test/dynamo_expected_failures/TestProfiler.test_profiler_strides b/test/dynamo_expected_failures/TestProfiler.test_profiler_strides
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/profiler/test_profiler.py b/test/profiler/test_profiler.py
index 87df89f04d57..4c7d298ca839 100644
--- a/test/profiler/test_profiler.py
+++ b/test/profiler/test_profiler.py
@@ -1216,6 +1216,26 @@ class TestProfiler(TestCase):
                         f"Failed finding record funciont for op = {e}",
                     )
 
+    def test_profiler_strides(self):
+        torch._C._profiler._set_record_concrete_inputs_enabled_val(True)
+        base_tensor = torch.randn(1024, dtype=torch.float32)
+        a = base_tensor.as_strided((16, 16), (17, 1), 0)
+        b = base_tensor.as_strided((16, 16), (25, 2), 272)
+        with _profile(record_shapes=True) as prof:
+            c = torch.add(a, b)
+
+        with TemporaryFileName(mode="w+") as fname:
+            prof.export_chrome_trace(fname)
+            with open(fname) as f:
+                j = json.load(f)
+                op_events = [
+                    e for e in j["traceEvents"] if e.get("cat", "") == "cpu_op"
+                ]
+                for e in op_events:
+                    args = e["args"]
+                    if e["name"] == "aten::add":
+                        self.assertEqual(args["Input Strides"], [[17, 1], [25, 2], []])
+
     def test_profiler_fwd_bwd_link(self):
         with _profile(use_kineto=True) as prof:
             t1, t2 = torch.ones(1, requires_grad=True), torch.ones(
diff --git a/torch/csrc/autograd/profiler_kineto.cpp b/torch/csrc/autograd/profiler_kineto.cpp
index 0c73c8b7a72a..3b095cef2a68 100644
--- a/torch/csrc/autograd/profiler_kineto.cpp
+++ b/torch/csrc/autograd/profiler_kineto.cpp
@@ -80,16 +80,18 @@ struct OpArgData {
   std::vector<std::string> dtypes;
   std::vector<c10::IValue> concrete_inputs;
   std::vector<std::vector<int64_t>> shapes_for_kineto_event;
+  std::vector<shape> strides;
 };
 
 auto parseArgData(
     const std::vector<op_input_t>& input_shapes,
     const std::vector<op_input_t>& concrete_inputs) {
   if (input_shapes.empty()) {
-    return OpArgData{false, {}, {}, {}, {}};
+    return OpArgData{false, {}, {}, {}, {}, {}};
   }
 
   std::vector<shape> shapes(input_shapes.size());
+  std::vector<shape> strides(input_shapes.size());
   std::vector<std::vector<int64_t>> shapes_for_kineto_event(
       input_shapes.size());
 
@@ -103,14 +105,19 @@ auto parseArgData(
               shapes[i] = t.sizes_;
               shapes_for_kineto_event[i] = t.sizes_;
               dtypes[i] = std::string(scalarTypeToTypeMeta(t.dtype_).name());
+              strides[i] = t.strides_;
             },
             [&](const std::vector<TensorMetadata>& l) {
               std::vector<std::vector<int64_t>> shape;
               shape.reserve(l.size());
+              std::vector<std::vector<int64_t>> stride;
+              stride.reserve(l.size());
               for (const auto& t : l) {
                 shape.emplace_back(t.sizes_);
+                stride.emplace_back(t.strides_);
               }
               shapes[i] = shape;
+              strides[i] = stride;
               dtypes[i] = "TensorList";
             },
             [&](const c10::IValue& val) { dtypes[i] = "Scalar"; },
@@ -141,7 +148,12 @@ auto parseArgData(
   }
 
   return OpArgData{
-      true, shapes, dtypes, concrete_inputs_list, shapes_for_kineto_event};
+      true,
+      shapes,
+      dtypes,
+      concrete_inputs_list,
+      shapes_for_kineto_event,
+      strides};
 }
 
 struct MetadataBase {
@@ -236,6 +248,7 @@ struct AddGenericMetadata : public MetadataBase {
     if (arg_data.has_data) {
       if (get_record_concrete_inputs_enabled()) {
         addMetadata("Input Dims", variantShapesToStr(arg_data.shapes));
+        addMetadata("Input Strides", variantShapesToStr(arg_data.strides));
       } else {
         addMetadata(
             "Input Dims", shapesToStr(arg_data.shapes_for_kineto_event));