mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Revert "[nativert] move layout planner algorithms to libtorch (#156508)"
This reverts commit eab45643f22e58ee12d95d8b0162d51ca0a50801.
Reverted https://github.com/pytorch/pytorch/pull/156508 on behalf of https://github.com/atalman due to [GH job link](https://github.com/pytorch/pytorch/actions/runs/15793524714/job/44524067679) [HUD commit link](eab45643f2
) ([comment](https://github.com/pytorch/pytorch/pull/156508#issuecomment-2993589983))
This commit is contained in:
@ -610,8 +610,6 @@ libtorch_nativert_sources = [
|
||||
"torch/nativert/detail/ITree.cpp",
|
||||
"torch/nativert/kernels/C10Kernel.cpp",
|
||||
"torch/nativert/kernels/AutoFunctionalizeKernel.cpp",
|
||||
"torch/nativert/executor/memory/GreedyBySize.cpp",
|
||||
"torch/nativert/executor/memory/Bump.cpp",
|
||||
]
|
||||
|
||||
torch_mobile_tracer_sources = [
|
||||
|
@ -18,8 +18,6 @@ set(NATIVERT_TEST_SRCS
|
||||
${TORCH_ROOT}/torch/nativert/detail/ITree.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/ExecutionFrame.cpp
|
||||
${TORCH_ROOT}/torch/nativert/kernels/C10Kernel.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/memory/GreedyBySize.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/memory/Bump.cpp
|
||||
)
|
||||
|
||||
add_executable(test_nativert
|
||||
|
@ -1,63 +0,0 @@
|
||||
#include <c10/util/Enumerate.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <torch/nativert/executor/memory/Bump.h>
|
||||
#include <torch/nativert/executor/memory/GreedyBySize.h>
|
||||
|
||||
using namespace ::testing;
|
||||
using namespace torch::nativert;
|
||||
|
||||
std::vector<AllocationSpec> create_test_allocation_specs() {
|
||||
std::vector<AllocationSpec> specs;
|
||||
|
||||
const std::vector<std::tuple<size_t, size_t, size_t>> test_cases = {
|
||||
{0, 1, 32},
|
||||
{1, 4, 28},
|
||||
{2, 5, 36},
|
||||
{3, 5, 16},
|
||||
{4, 5, 8},
|
||||
{5, 7, 64},
|
||||
{6, 8, 10},
|
||||
{7, 8, 40},
|
||||
};
|
||||
|
||||
for (const auto& [l_start, l_end, size] : test_cases) {
|
||||
specs.push_back(AllocationSpec{
|
||||
.lifetime = AllocationLifetime(l_start, l_end), .size = size});
|
||||
};
|
||||
|
||||
return specs;
|
||||
}
|
||||
|
||||
// figure 6 -- https://arxiv.org/pdf/2001.03288
|
||||
TEST(LayoutPlannerAlgorithmTests, TestGreedyBySize) {
|
||||
auto result = GreedyBySizeAllocationPlanner(create_test_allocation_specs());
|
||||
|
||||
EXPECT_EQ(result.total_size, 124);
|
||||
|
||||
auto& allocations = result.allocations;
|
||||
|
||||
EXPECT_EQ(allocations[0].offset, 0);
|
||||
EXPECT_EQ(allocations[1].offset, 32);
|
||||
EXPECT_EQ(allocations[2].offset, 64);
|
||||
EXPECT_EQ(allocations[3].offset, 100);
|
||||
EXPECT_EQ(allocations[4].offset, 116);
|
||||
EXPECT_EQ(allocations[5].offset, 0);
|
||||
EXPECT_EQ(allocations[6].offset, 104);
|
||||
EXPECT_EQ(allocations[7].offset, 64);
|
||||
}
|
||||
|
||||
TEST(LayoutPlannerAlgorithmTests, TestBump) {
|
||||
auto specs = create_test_allocation_specs();
|
||||
auto result = BumpAllocationPlanner(create_test_allocation_specs());
|
||||
|
||||
auto& allocations = result.allocations;
|
||||
|
||||
auto offset = 0;
|
||||
for (auto&& [i, spec] : c10::enumerate(specs)) {
|
||||
EXPECT_EQ(allocations[i].offset, offset);
|
||||
offset += spec.size;
|
||||
}
|
||||
|
||||
EXPECT_EQ(result.total_size, offset);
|
||||
}
|
@ -1,24 +0,0 @@
|
||||
#include <torch/nativert/executor/memory/Bump.h>
|
||||
|
||||
namespace torch::nativert {
|
||||
|
||||
LayoutPlan BumpAllocationPlanner(
|
||||
const std::vector<AllocationSpec>& allocation_specs) {
|
||||
LayoutPlan plan;
|
||||
|
||||
auto& allocations = plan.allocations;
|
||||
auto& total_size = plan.total_size;
|
||||
|
||||
allocations.reserve(allocation_specs.size());
|
||||
for (const auto& spec : allocation_specs) {
|
||||
allocations.push_back(Allocation{
|
||||
.size = spec.size,
|
||||
.offset = total_size,
|
||||
});
|
||||
total_size += spec.size;
|
||||
}
|
||||
|
||||
return plan;
|
||||
}
|
||||
|
||||
} // namespace torch::nativert
|
@ -1,13 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <torch/nativert/executor/memory/LayoutPlannerAlgorithm.h>
|
||||
|
||||
namespace torch::nativert {
|
||||
|
||||
// lay out all tensors contiguously in memory
|
||||
// this doesn't take into account lifetimes,
|
||||
// it literally just puts them all next to each other
|
||||
LayoutPlan BumpAllocationPlanner(
|
||||
const std::vector<AllocationSpec>& allocation_specs);
|
||||
|
||||
} // namespace torch::nativert
|
@ -1,166 +0,0 @@
|
||||
#include <iomanip>
|
||||
#include <limits>
|
||||
#include <optional>
|
||||
|
||||
#include <c10/util/Enumerate.h>
|
||||
#include <c10/util/Logging.h>
|
||||
#include <c10/util/irange.h>
|
||||
|
||||
#include <torch/nativert/executor/memory/GreedyBySize.h>
|
||||
|
||||
namespace {
|
||||
|
||||
using namespace torch::nativert;
|
||||
|
||||
// we need to track the original order in which allocations were made
|
||||
// since they will be re-sorted between iterations
|
||||
struct GreedyAllocation : public Allocation {
|
||||
explicit GreedyAllocation(
|
||||
Allocation allocation,
|
||||
size_t allocation_idx,
|
||||
size_t input_spec_idx)
|
||||
: Allocation(allocation),
|
||||
allocation_index(allocation_idx),
|
||||
input_spec_index(input_spec_idx) {}
|
||||
// we need to maintain the allocation ordering s.t., we can look up
|
||||
// previous allocations directly from descending_allocation_specs_
|
||||
// even after allocations has been re-sorted, which happens after
|
||||
// each allocation is complete.
|
||||
//
|
||||
// i.e., this index represents the index of the spec that was used
|
||||
// to create this allocation inside descending_allocation_specs_
|
||||
// AFTER the sorting was completed.
|
||||
size_t allocation_index{0};
|
||||
// index of the spec associated with this allocation
|
||||
// in the event that the specs get re-ordered
|
||||
// in the process of creating allocations
|
||||
// e.g.,
|
||||
// allocation_specs[sX, sY, sZ]
|
||||
// ^ ^ ^
|
||||
// values[vX, vY, vZ]
|
||||
//
|
||||
// means that an allocation created from sY
|
||||
// will have an input_spec_index of 1
|
||||
//
|
||||
// this allows us to return to the original
|
||||
// ordering before returning the allocations
|
||||
size_t input_spec_index{0};
|
||||
};
|
||||
|
||||
struct AllocationSpecWithIndex {
|
||||
const AllocationSpec* spec;
|
||||
size_t index;
|
||||
};
|
||||
|
||||
// associate specs with their original (unsorted) index
|
||||
// and then sort them in descending order by byte size
|
||||
std::vector<AllocationSpecWithIndex> prepare_allocation_specs(
|
||||
const std::vector<AllocationSpec>& allocation_specs) {
|
||||
std::vector<AllocationSpecWithIndex> specs;
|
||||
specs.reserve(allocation_specs.size());
|
||||
|
||||
for (const auto i : c10::irange(allocation_specs.size())) {
|
||||
specs.push_back({&allocation_specs[i], i});
|
||||
}
|
||||
|
||||
std::sort(specs.begin(), specs.end(), [](auto& lhs, auto& rhs) {
|
||||
return lhs.spec->size > rhs.spec->size;
|
||||
});
|
||||
|
||||
return specs;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace torch::nativert {
|
||||
|
||||
// https://arxiv.org/pdf/2001.03288
|
||||
LayoutPlan GreedyBySizeAllocationPlanner(
|
||||
const std::vector<AllocationSpec>& allocation_specs) {
|
||||
LayoutPlan plan;
|
||||
|
||||
auto descending_allocation_specs = prepare_allocation_specs(allocation_specs);
|
||||
|
||||
std::vector<GreedyAllocation> allocations;
|
||||
allocations.reserve(allocation_specs.size());
|
||||
|
||||
auto get_next_offset = [&](const AllocationSpec& spec) -> size_t {
|
||||
size_t prev_offset = 0;
|
||||
std::optional<size_t> best_offset = std::nullopt;
|
||||
size_t smallest_gap = std::numeric_limits<size_t>::max();
|
||||
|
||||
for (const auto& alloc : allocations) {
|
||||
if (auto* allocated_spec =
|
||||
descending_allocation_specs.at(alloc.allocation_index).spec;
|
||||
allocated_spec->not_overlapping_with(spec)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (alloc.offset > prev_offset) {
|
||||
if (size_t gap = alloc.offset - prev_offset;
|
||||
gap >= spec.size && gap < smallest_gap) {
|
||||
smallest_gap = gap;
|
||||
best_offset = prev_offset;
|
||||
}
|
||||
}
|
||||
|
||||
prev_offset = std::max(prev_offset, alloc.offset + alloc.size);
|
||||
}
|
||||
|
||||
return best_offset.value_or(prev_offset);
|
||||
};
|
||||
|
||||
size_t total_allocation_size = 0;
|
||||
for (const auto&& [allocation_index, spec_with_original_index] :
|
||||
c10::enumerate(descending_allocation_specs)) {
|
||||
auto& spec = spec_with_original_index.spec;
|
||||
|
||||
auto new_allocation = GreedyAllocation(
|
||||
Allocation{.size = spec->size, .offset = get_next_offset(*spec)},
|
||||
allocation_index,
|
||||
spec_with_original_index.index);
|
||||
|
||||
total_allocation_size += new_allocation.size;
|
||||
plan.total_size =
|
||||
std::max(plan.total_size, new_allocation.offset + new_allocation.size);
|
||||
|
||||
VLOG(1) << "allocation with interval " << spec->lifetime.start << "-->"
|
||||
<< spec->lifetime.end << " placed at offset "
|
||||
<< new_allocation.offset;
|
||||
|
||||
// insert new allocation while maintaining relative-offset ordering
|
||||
// the algorithm is already quadratic because of get_next_offset
|
||||
// so this is negligible
|
||||
|
||||
auto it = std::lower_bound(
|
||||
allocations.begin(),
|
||||
allocations.end(),
|
||||
new_allocation,
|
||||
[](auto& lhs, auto& rhs) { return lhs.offset < rhs.offset; });
|
||||
allocations.insert(it, new_allocation);
|
||||
}
|
||||
|
||||
// sort allocations so their ordering is consistent with the input specs
|
||||
std::sort(allocations.begin(), allocations.end(), [](auto& lhs, auto& rhs) {
|
||||
return lhs.input_spec_index < rhs.input_spec_index;
|
||||
});
|
||||
|
||||
plan.allocations.reserve(allocations.size());
|
||||
std::move(
|
||||
allocations.begin(),
|
||||
allocations.end(),
|
||||
std::back_inserter(plan.allocations));
|
||||
|
||||
if (plan.total_size > 0) {
|
||||
VLOG(1) << std::fixed << std::setprecision(2)
|
||||
<< "greedy-by-size bytes saved over strictly increasing: "
|
||||
<< (1.0 - ((float)plan.total_size / (float)total_allocation_size)) *
|
||||
100
|
||||
<< "% (" << total_allocation_size << " - " << plan.total_size
|
||||
<< " = " << (total_allocation_size - plan.total_size) << " bytes)";
|
||||
}
|
||||
|
||||
return plan;
|
||||
}
|
||||
|
||||
} // namespace torch::nativert
|
@ -1,10 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <torch/nativert/executor/memory/LayoutPlannerAlgorithm.h>
|
||||
|
||||
namespace torch::nativert {
|
||||
|
||||
LayoutPlan GreedyBySizeAllocationPlanner(
|
||||
const std::vector<AllocationSpec>& allocation_specs);
|
||||
|
||||
} // namespace torch::nativert
|
Reference in New Issue
Block a user