mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[nativert] Move DelegateExecutor to PyTorch core (#155581)
Summary: Moves DelegateExecutor base class to PyTorch core. It provides the extension point of backend delegation for NativeRT. Torch Native Runtime RFC: pytorch/rfcs#72 Test Plan: This is only a virtual base class. So relying on internal CI is sufficient. Rollback Plan: Differential Revision: D76351984 Pull Request resolved: https://github.com/pytorch/pytorch/pull/155581 Approved by: https://github.com/zhxchen17
This commit is contained in:
committed by
PyTorch MergeBot
parent
a9d5157e25
commit
57e4d7b5cc
@ -595,6 +595,7 @@ libtorch_nativert_sources = [
|
||||
"torch/nativert/graph/GraphSignature.cpp",
|
||||
"torch/nativert/graph/Serialization.cpp",
|
||||
"torch/nativert/graph/TensorMeta.cpp",
|
||||
"torch/nativert/executor/DelegateExecutor.cpp",
|
||||
"torch/nativert/executor/Placement.cpp",
|
||||
"torch/nativert/executor/ExecutionPlanner.cpp",
|
||||
"torch/nativert/executor/PlacementUtils.cpp",
|
||||
|
68
torch/nativert/executor/DelegateExecutor.cpp
Normal file
68
torch/nativert/executor/DelegateExecutor.cpp
Normal file
@ -0,0 +1,68 @@
|
||||
#include <torch/nativert/executor/DelegateExecutor.h>
|
||||
|
||||
#ifndef _WIN32
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <c10/util/Logging.h>
|
||||
|
||||
#include <c10/util/string_view.h>
|
||||
#include <torch/nativert/common/FileUtil.h>
|
||||
|
||||
namespace torch::nativert {
|
||||
|
||||
namespace {
|
||||
char* _mkdtemp(char* outputDir) {
|
||||
// mkdtemp is not available on Windows
|
||||
#ifdef _WIN32
|
||||
return nullptr;
|
||||
#else
|
||||
return mkdtemp(outputDir);
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::string extractToTemporaryFolder(
|
||||
caffe2::serialize::PyTorchStreamReader& packageReader,
|
||||
const std::string& targetPath) {
|
||||
char outputDir[] = "/tmp/delegate_model_XXXXXX";
|
||||
char* tempdir = _mkdtemp(outputDir);
|
||||
TORCH_CHECK(
|
||||
tempdir != nullptr,
|
||||
"error creating temporary directory for compiled model. errno: ",
|
||||
errno);
|
||||
|
||||
std::vector<std::string> allRecords = packageReader.getAllRecords();
|
||||
|
||||
for (const auto& path : allRecords) {
|
||||
if (!c10::starts_with(path, targetPath) || c10::ends_with(path, "/")) {
|
||||
continue;
|
||||
}
|
||||
|
||||
TORCH_CHECK(
|
||||
packageReader.hasRecord(path), path, " not present in model package");
|
||||
auto [dataPointer, dataSize] = packageReader.getRecord(path);
|
||||
|
||||
std::string fileName = path.substr(path.rfind('/') + 1);
|
||||
std::string extractedFilename = std::string(outputDir) + "/" + fileName;
|
||||
|
||||
VLOG(1) << "Extracting " << extractedFilename
|
||||
<< " from archive path: " << path << " size: " << dataSize;
|
||||
|
||||
File extracted(extractedFilename, O_CREAT | O_WRONLY, 0640);
|
||||
const auto bytesWritten = torch::nativert::writeFull(
|
||||
extracted.fd(), const_cast<void*>(dataPointer.get()), dataSize);
|
||||
TORCH_CHECK(
|
||||
bytesWritten != -1,
|
||||
"failure copying from archive path ",
|
||||
path,
|
||||
" to temporary file");
|
||||
}
|
||||
|
||||
return std::string(outputDir);
|
||||
}
|
||||
|
||||
} // namespace torch::nativert
|
52
torch/nativert/executor/DelegateExecutor.h
Normal file
52
torch/nativert/executor/DelegateExecutor.h
Normal file
@ -0,0 +1,52 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <ATen/core/Tensor.h>
|
||||
#include <caffe2/serialize/inline_container.h>
|
||||
#include <torch/csrc/inductor/aoti_torch/proxy_executor.h>
|
||||
#include <torch/nativert/executor/Weights.h>
|
||||
namespace torch::nativert {
|
||||
|
||||
std::string extractToTemporaryFolder(
|
||||
caffe2::serialize::PyTorchStreamReader& packageReader,
|
||||
const std::string& targetPath);
|
||||
|
||||
using MakeProxyExecutorFn =
|
||||
std::function<std::unique_ptr<torch::aot_inductor::ProxyExecutor>(
|
||||
const std::string&,
|
||||
bool,
|
||||
std::optional<std::unordered_map<std::string, c10::IValue>>)>;
|
||||
|
||||
// This is the extension point for delegation backends.
|
||||
class DelegateExecutor {
|
||||
public:
|
||||
virtual ~DelegateExecutor() {}
|
||||
|
||||
// Runtime calls processWeights() to pass the weights to the delegate backend.
|
||||
// Typically, a backend would perform some form of validation and processing,
|
||||
// such as constant folding. The processed weights stays in the inactivate
|
||||
// state until commitWeights() is called.
|
||||
//
|
||||
// Weights tensors are co-owned by the runtime and the delegate backend.
|
||||
// In the regular inference run() path, neither Runtime or Delegate backend
|
||||
// can modify the weights tensor.
|
||||
// To support inplace weight update, weight tensors are be exposed by
|
||||
// ModelRunner::getWeights() to an external caller. The external caller can
|
||||
// then modify the weight tensors in-place. Such mutation would instantly
|
||||
// affect the weight tensors in the delegate backend.
|
||||
// When a weight tensor is no longer used by the delegate backend, the backend
|
||||
// must release it by decreasing a refcount. Runtime would
|
||||
// also release the refcount for weight tensor if it's no longer activte. The
|
||||
// underlying storage for weight tensors will be freed when the refcount
|
||||
// reaches 0.
|
||||
virtual void processWeights(std::shared_ptr<Weights> weights) = 0;
|
||||
|
||||
// This call activate the processed weights.
|
||||
virtual void commitWeights() = 0;
|
||||
|
||||
virtual std::vector<at::Tensor> run(std::vector<at::Tensor>& inputs) = 0;
|
||||
};
|
||||
|
||||
} // namespace torch::nativert
|
Reference in New Issue
Block a user