mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Summary: att Test Plan: ci Rollback Plan: Differential Revidsion: D76167093 Pull Request resolved: https://github.com/pytorch/pytorch/pull/155374 Approved by: https://github.com/zhxchen17
118 lines
3.3 KiB
C++
118 lines
3.3 KiB
C++
#include <unordered_map>
|
|
|
|
#include <c10/util/Logging.h>
|
|
|
|
#include <c10/util/Enumerate.h>
|
|
#include <torch/nativert/executor/ExecutionPlanner.h>
|
|
|
|
namespace torch::nativert {
|
|
|
|
std::unique_ptr<ExecutionPlan> ExecutionPlanner::createPlan() {
|
|
auto plan = std::make_unique<ExecutionPlan>();
|
|
|
|
// Current implementation assume that nodes will be executed
|
|
// in the same order as the thrift graph.
|
|
// In the future, we can do execution order plan, as long as it's
|
|
// comply with topological order
|
|
|
|
generateDeallocationPlan(*plan);
|
|
return plan;
|
|
}
|
|
|
|
/* static */ c10::FastSet<ValueId> ExecutionPlanner::staticValues(
|
|
const Graph& graph) {
|
|
c10::FastSet<ValueId> staticValues;
|
|
// Filter lastUsedBy by graph inputs
|
|
// parameters/buffer values should not be freed
|
|
// It's a policy decision to whether to free user inputs. For now, we don't
|
|
// free user inputs.
|
|
// TODO: It should be fine to "free" the user inputs. If the user holds a ref
|
|
// to it, it won't be deallocated.
|
|
for (const auto* input : graph.inputs()) {
|
|
if (input) {
|
|
const auto& id = input->id();
|
|
staticValues.insert(id);
|
|
}
|
|
}
|
|
|
|
// Filter lastUsedBy by graph outputs, as they are still needed to be returned
|
|
for (const auto& output : graph.outputs()) {
|
|
const auto& id = output->id();
|
|
staticValues.insert(id);
|
|
}
|
|
|
|
for (const auto& [id, _] : graph.getConstantSymIntValues()) {
|
|
staticValues.insert(id);
|
|
}
|
|
|
|
for (const Node& node : graph.nodes()) {
|
|
if (node.target() == "torch.ops.higher_order.run_const_graph") {
|
|
for (const auto& output : node.outputs()) {
|
|
// Do not free the outputs of run_const_graph, as they are newly
|
|
// produced folded constants
|
|
staticValues.insert(output->id());
|
|
}
|
|
} else {
|
|
for (const auto& input : node.inputs()) {
|
|
if (input.value->isFolded()) {
|
|
staticValues.insert(input.value->id());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return staticValues;
|
|
}
|
|
|
|
void ExecutionPlanner::generateDeallocationPlan(ExecutionPlan& plan) {
|
|
const auto& nodes = graph_.nodes();
|
|
size_t numNodes = nodes.size();
|
|
|
|
std::unordered_map<ValueId, NodeIndex> lastUsedBy;
|
|
|
|
// Traverse from the last node to the first node
|
|
// For each Value, find out which is the last node that uses it
|
|
// the Value can freed after executing the node
|
|
size_t nodeIdx = nodes.size() - 1;
|
|
for (auto it = std::rbegin(nodes); it != std::rend(nodes); it++) {
|
|
const auto& inputs = it->inputs();
|
|
for (const auto& input : inputs) {
|
|
const auto& id = input.value->id();
|
|
if (lastUsedBy.find(id) == lastUsedBy.end()) {
|
|
lastUsedBy.insert({id, nodeIdx});
|
|
}
|
|
}
|
|
nodeIdx--;
|
|
}
|
|
|
|
std::vector<std::vector<ValueId>> valuesToFree(numNodes);
|
|
|
|
const auto& statics = staticValues(graph_);
|
|
for (auto& [id, nodeIndex] : lastUsedBy) {
|
|
if (statics.find(id) == statics.end()) {
|
|
valuesToFree[nodeIndex].push_back(id);
|
|
}
|
|
}
|
|
|
|
plan.valuesToFree = std::move(valuesToFree);
|
|
|
|
// print allocation plan
|
|
VLOG(2) << plan;
|
|
|
|
return;
|
|
}
|
|
|
|
std::ostream& operator<<(std::ostream& out, const ExecutionPlan& plan) {
|
|
out << "****** Deallocation Plan ******\n";
|
|
for (auto&& [i, values] : c10::enumerate(plan.valuesToFree)) {
|
|
out << "Node #" << i << ", valuesToFree = [";
|
|
for (const auto& value : values) {
|
|
out << value << ", ";
|
|
}
|
|
out << "]\n";
|
|
}
|
|
return out;
|
|
}
|
|
|
|
} // namespace torch::nativert
|