[Static Runtime] Add test that runs NNC fused kernels in parallel (#73256)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/73256 This adds a test that executes multiple Static Runtime instances in parallel when each instances includes a fusion. ghstack-source-id: 149787403 Test Plan: ``` buck run mode/dev-asan //caffe2/benchmarks/static_runtime:static_runtime_cpptest -- --gtest_filter=CpuFusion.ParallelRuntimes ``` The above test results in an error: P482317015 (when parts of the fix in D34287960 (6d33852685) are backed out) Reviewed By: mikeiovine Differential Revision: D34404127 fbshipit-source-id: 95a267e27d74584df90841fe496f909171136981 (cherry picked from commit 57d3ad9a46a24559f6d4f4097bd1b8e0b1f6b077)
2025-10-20 21:14:14 +08:00 · 2022-02-28 09:31:17 -08:00
parent ab6395fc65
commit cfd92f2d59
1 changed files with 55 additions and 0 deletions
--- a/benchmarks/static_runtime/test_cpu_fusion.cc
+++ b/benchmarks/static_runtime/test_cpu_fusion.cc
@ -1,6 +1,7 @@
 #include <gtest/gtest.h>
 #include <torch/csrc/jit/runtime/static/impl.h>
 #include <torch/torch.h>
+#include <thread>

 #include "test_utils.h"

@ -81,3 +82,57 @@ TEST(CpuFusion, FallbackGraph) {
    EXPECT_TRUE(at::allclose(expect, actual.toTensor()));
  }
 }
+
+TEST(CpuFusion, ParallelRuntimes) {
+  const auto simple_script = R"JIT(
+    def forward(self, a, b):
+        return (a + b).relu().tanh()
+  )JIT";
+
+  Module m("module");
+  m.define(simple_script);
+
+  StaticModuleOptions opts; // start with the defaults.
+  opts.enable_tensorexpr_fusion = true;
+
+  auto sample_input1 = at::randn({2, 3});
+  auto sample_input2 = at::ones({2, 3});
+  auto smodule = StaticModule(
+      m, /* is_frozen */ false, opts, {sample_input1, sample_input2});
+
+  constexpr size_t kNumThreads = 2;
+  std::vector<std::vector<std::pair<int, int>>> all_inputs;
+  for (size_t id = 0; id < kNumThreads; ++id) {
+    std::vector<std::pair<int, int>> thread_input = {
+        {id, id + 1},
+        {id + 10, id + 11},
+        {id + 20, id + 21},
+        {id + 30, id + 31},
+        {id + 40, id + 41},
+        {id + 50, id + 51},
+        {id + 60, id + 61},
+        {id + 70, id + 71}};
+    all_inputs.emplace_back(std::move(thread_input));
+  }
+
+  auto exec_runtime = [&](size_t tid) {
+    const auto& inputs = all_inputs[tid];
+    StaticRuntime runtime(smodule);
+    for (const auto& inp : inputs) {
+      auto a = at::randn({inp.first, inp.second});
+      auto b = at::randn({inp.first, inp.second});
+      auto expect = at::tanh(at::relu(a + b));
+      auto actual = runtime({a, b}, {});
+      EXPECT_TRUE(at::allclose(expect, actual.toTensor()));
+    }
+  };
+
+  std::vector<std::thread> threads;
+  for (size_t id = 0; id < kNumThreads; ++id) {
+    threads.emplace_back(exec_runtime, id);
+  }
+
+  for (auto& t : threads) {
+    t.join();
+  }
+}