[nativert] Move DelegateExecutor to PyTorch core (#155581)

Summary:
Moves DelegateExecutor base class to PyTorch core. It provides the extension point of backend delegation for NativeRT.
Torch Native Runtime RFC: pytorch/rfcs#72

Test Plan:
This is only a virtual base class. So relying on internal CI is sufficient.

Rollback Plan:

Differential Revision: D76351984

Pull Request resolved: https://github.com/pytorch/pytorch/pull/155581
Approved by: https://github.com/zhxchen17
This commit is contained in:
Yiming Zhou
2025-06-12 04:33:31 +00:00
committed by PyTorch MergeBot
parent a9d5157e25
commit 57e4d7b5cc
3 changed files with 121 additions and 0 deletions

View File

@ -595,6 +595,7 @@ libtorch_nativert_sources = [
"torch/nativert/graph/GraphSignature.cpp",
"torch/nativert/graph/Serialization.cpp",
"torch/nativert/graph/TensorMeta.cpp",
"torch/nativert/executor/DelegateExecutor.cpp",
"torch/nativert/executor/Placement.cpp",
"torch/nativert/executor/ExecutionPlanner.cpp",
"torch/nativert/executor/PlacementUtils.cpp",

View File

@ -0,0 +1,68 @@
#include <torch/nativert/executor/DelegateExecutor.h>
#ifndef _WIN32
#include <unistd.h>
#endif
#include <sys/stat.h>
#include <c10/util/Logging.h>
#include <c10/util/string_view.h>
#include <torch/nativert/common/FileUtil.h>
namespace torch::nativert {
namespace {
char* _mkdtemp(char* outputDir) {
// mkdtemp is not available on Windows
#ifdef _WIN32
return nullptr;
#else
return mkdtemp(outputDir);
#endif
}
} // namespace
std::string extractToTemporaryFolder(
caffe2::serialize::PyTorchStreamReader& packageReader,
const std::string& targetPath) {
char outputDir[] = "/tmp/delegate_model_XXXXXX";
char* tempdir = _mkdtemp(outputDir);
TORCH_CHECK(
tempdir != nullptr,
"error creating temporary directory for compiled model. errno: ",
errno);
std::vector<std::string> allRecords = packageReader.getAllRecords();
for (const auto& path : allRecords) {
if (!c10::starts_with(path, targetPath) || c10::ends_with(path, "/")) {
continue;
}
TORCH_CHECK(
packageReader.hasRecord(path), path, " not present in model package");
auto [dataPointer, dataSize] = packageReader.getRecord(path);
std::string fileName = path.substr(path.rfind('/') + 1);
std::string extractedFilename = std::string(outputDir) + "/" + fileName;
VLOG(1) << "Extracting " << extractedFilename
<< " from archive path: " << path << " size: " << dataSize;
File extracted(extractedFilename, O_CREAT | O_WRONLY, 0640);
const auto bytesWritten = torch::nativert::writeFull(
extracted.fd(), const_cast<void*>(dataPointer.get()), dataSize);
TORCH_CHECK(
bytesWritten != -1,
"failure copying from archive path ",
path,
" to temporary file");
}
return std::string(outputDir);
}
} // namespace torch::nativert

View File

@ -0,0 +1,52 @@
#pragma once
#include <memory>
#include <vector>
#include <ATen/core/Tensor.h>
#include <caffe2/serialize/inline_container.h>
#include <torch/csrc/inductor/aoti_torch/proxy_executor.h>
#include <torch/nativert/executor/Weights.h>
namespace torch::nativert {
std::string extractToTemporaryFolder(
caffe2::serialize::PyTorchStreamReader& packageReader,
const std::string& targetPath);
using MakeProxyExecutorFn =
std::function<std::unique_ptr<torch::aot_inductor::ProxyExecutor>(
const std::string&,
bool,
std::optional<std::unordered_map<std::string, c10::IValue>>)>;
// This is the extension point for delegation backends.
class DelegateExecutor {
public:
virtual ~DelegateExecutor() {}
// Runtime calls processWeights() to pass the weights to the delegate backend.
// Typically, a backend would perform some form of validation and processing,
// such as constant folding. The processed weights stays in the inactivate
// state until commitWeights() is called.
//
// Weights tensors are co-owned by the runtime and the delegate backend.
// In the regular inference run() path, neither Runtime or Delegate backend
// can modify the weights tensor.
// To support inplace weight update, weight tensors are be exposed by
// ModelRunner::getWeights() to an external caller. The external caller can
// then modify the weight tensors in-place. Such mutation would instantly
// affect the weight tensors in the delegate backend.
// When a weight tensor is no longer used by the delegate backend, the backend
// must release it by decreasing a refcount. Runtime would
// also release the refcount for weight tensor if it's no longer activte. The
// underlying storage for weight tensors will be freed when the refcount
// reaches 0.
virtual void processWeights(std::shared_ptr<Weights> weights) = 0;
// This call activate the processed weights.
virtual void commitWeights() = 0;
virtual std::vector<at::Tensor> run(std::vector<at::Tensor>& inputs) = 0;
};
} // namespace torch::nativert