[nativert] Move DelegateExecutor to PyTorch core (#155581)

Summary: Moves DelegateExecutor base class to PyTorch core. It provides the extension point of backend delegation for NativeRT. Torch Native Runtime RFC: pytorch/rfcs#72 Test Plan: This is only a virtual base class. So relying on internal CI is sufficient. Rollback Plan: Differential Revision: D76351984 Pull Request resolved: https://github.com/pytorch/pytorch/pull/155581 Approved by: https://github.com/zhxchen17
2025-10-20 21:14:14 +08:00 · 2025-06-12 04:33:31 +00:00
parent a9d5157e25
commit 57e4d7b5cc
3 changed files with 121 additions and 0 deletions
--- a/build_variables.bzl
+++ b/build_variables.bzl
@ -595,6 +595,7 @@ libtorch_nativert_sources = [
    "torch/nativert/graph/GraphSignature.cpp",
    "torch/nativert/graph/Serialization.cpp",
    "torch/nativert/graph/TensorMeta.cpp",
+    "torch/nativert/executor/DelegateExecutor.cpp",
    "torch/nativert/executor/Placement.cpp",
    "torch/nativert/executor/ExecutionPlanner.cpp",
    "torch/nativert/executor/PlacementUtils.cpp",
--- a/torch/nativert/executor/DelegateExecutor.cpp
+++ b/torch/nativert/executor/DelegateExecutor.cpp
@ -0,0 +1,68 @@
+#include <torch/nativert/executor/DelegateExecutor.h>
+
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+#include <sys/stat.h>
+
+#include <c10/util/Logging.h>
+
+#include <c10/util/string_view.h>
+#include <torch/nativert/common/FileUtil.h>
+
+namespace torch::nativert {
+
+namespace {
+char* _mkdtemp(char* outputDir) {
+  // mkdtemp is not available on Windows
+#ifdef _WIN32
+  return nullptr;
+#else
+  return mkdtemp(outputDir);
+#endif
+}
+
+} // namespace
+
+std::string extractToTemporaryFolder(
+    caffe2::serialize::PyTorchStreamReader& packageReader,
+    const std::string& targetPath) {
+  char outputDir[] = "/tmp/delegate_model_XXXXXX";
+  char* tempdir = _mkdtemp(outputDir);
+  TORCH_CHECK(
+      tempdir != nullptr,
+      "error creating temporary directory for compiled model. errno: ",
+      errno);
+
+  std::vector<std::string> allRecords = packageReader.getAllRecords();
+
+  for (const auto& path : allRecords) {
+    if (!c10::starts_with(path, targetPath) || c10::ends_with(path, "/")) {
+      continue;
+    }
+
+    TORCH_CHECK(
+        packageReader.hasRecord(path), path, " not present in model package");
+    auto [dataPointer, dataSize] = packageReader.getRecord(path);
+
+    std::string fileName = path.substr(path.rfind('/') + 1);
+    std::string extractedFilename = std::string(outputDir) + "/" + fileName;
+
+    VLOG(1) << "Extracting " << extractedFilename
+            << " from archive path: " << path << " size: " << dataSize;
+
+    File extracted(extractedFilename, O_CREAT | O_WRONLY, 0640);
+    const auto bytesWritten = torch::nativert::writeFull(
+        extracted.fd(), const_cast<void*>(dataPointer.get()), dataSize);
+    TORCH_CHECK(
+        bytesWritten != -1,
+        "failure copying from archive path ",
+        path,
+        " to temporary file");
+  }
+
+  return std::string(outputDir);
+}
+
+} // namespace torch::nativert
--- a/torch/nativert/executor/DelegateExecutor.h
+++ b/torch/nativert/executor/DelegateExecutor.h
@ -0,0 +1,52 @@
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include <ATen/core/Tensor.h>
+#include <caffe2/serialize/inline_container.h>
+#include <torch/csrc/inductor/aoti_torch/proxy_executor.h>
+#include <torch/nativert/executor/Weights.h>
+namespace torch::nativert {
+
+std::string extractToTemporaryFolder(
+    caffe2::serialize::PyTorchStreamReader& packageReader,
+    const std::string& targetPath);
+
+using MakeProxyExecutorFn =
+    std::function<std::unique_ptr<torch::aot_inductor::ProxyExecutor>(
+        const std::string&,
+        bool,
+        std::optional<std::unordered_map<std::string, c10::IValue>>)>;
+
+// This is the extension point for delegation backends.
+class DelegateExecutor {
+ public:
+  virtual ~DelegateExecutor() {}
+
+  // Runtime calls processWeights() to pass the weights to the delegate backend.
+  // Typically, a backend would perform some form of validation and processing,
+  // such as constant folding. The processed weights stays in the inactivate
+  // state until commitWeights() is called.
+  //
+  // Weights tensors are co-owned by the runtime and the delegate backend.
+  // In the regular inference run() path, neither Runtime or Delegate backend
+  // can modify the weights tensor.
+  // To support inplace weight update, weight tensors are be exposed by
+  // ModelRunner::getWeights() to an external caller. The external caller can
+  // then modify the weight tensors in-place. Such mutation would instantly
+  // affect the weight tensors in the delegate backend.
+  // When a weight tensor is no longer used by the delegate backend, the backend
+  // must release it by decreasing a refcount. Runtime would
+  // also release the refcount for weight tensor if it's no longer activte. The
+  // underlying storage for weight tensors will be freed when the refcount
+  // reaches 0.
+  virtual void processWeights(std::shared_ptr<Weights> weights) = 0;
+
+  // This call activate the processed weights.
+  virtual void commitWeights() = 0;
+
+  virtual std::vector<at::Tensor> run(std::vector<at::Tensor>& inputs) = 0;
+};
+
+} // namespace torch::nativert