[nativert] reland D76832891 remove designated initializer cpp20 (#156565)

Summary: fix windows build broke in https://github.com/pytorch/pytorch/pull/156508

Test Plan:
ci

Rollback Plan:

Differential Revision: D77080420

Pull Request resolved: https://github.com/pytorch/pytorch/pull/156565
Approved by: https://github.com/zhxchen17
This commit is contained in:
dolpm
2025-06-24 02:38:04 +00:00
committed by PyTorch MergeBot
parent 6a3d00aa3b
commit 9665702c64
7 changed files with 280 additions and 0 deletions

View File

@ -611,6 +611,8 @@ libtorch_nativert_sources = [
"torch/nativert/kernels/C10Kernel.cpp",
"torch/nativert/kernels/AutoFunctionalizeKernel.cpp",
"torch/nativert/kernels/HigherOrderKernel.cpp",
"torch/nativert/executor/memory/GreedyBySize.cpp",
"torch/nativert/executor/memory/Bump.cpp",
]
torch_mobile_tracer_sources = [

View File

@ -18,6 +18,8 @@ set(NATIVERT_TEST_SRCS
${TORCH_ROOT}/torch/nativert/detail/ITree.cpp
${TORCH_ROOT}/torch/nativert/executor/ExecutionFrame.cpp
${TORCH_ROOT}/torch/nativert/kernels/C10Kernel.cpp
${TORCH_ROOT}/torch/nativert/executor/memory/GreedyBySize.cpp
${TORCH_ROOT}/torch/nativert/executor/memory/Bump.cpp
)
add_executable(test_nativert

View File

@ -0,0 +1,63 @@
#include <c10/util/Enumerate.h>
#include <gtest/gtest.h>
#include <torch/nativert/executor/memory/Bump.h>
#include <torch/nativert/executor/memory/GreedyBySize.h>
using namespace ::testing;
using namespace torch::nativert;
std::vector<AllocationSpec> create_test_allocation_specs() {
std::vector<AllocationSpec> specs;
const std::vector<std::tuple<size_t, size_t, size_t>> test_cases = {
{0, 1, 32},
{1, 4, 28},
{2, 5, 36},
{3, 5, 16},
{4, 5, 8},
{5, 7, 64},
{6, 8, 10},
{7, 8, 40},
};
specs.reserve(test_cases.size());
for (const auto& [l_start, l_end, size] : test_cases) {
specs.push_back(AllocationSpec{AllocationLifetime(l_start, l_end), size});
};
return specs;
}
// figure 6 -- https://arxiv.org/pdf/2001.03288
TEST(LayoutPlannerAlgorithmTests, TestGreedyBySize) {
auto result = GreedyBySizeAllocationPlanner(create_test_allocation_specs());
EXPECT_EQ(result.total_size, 124);
auto& allocations = result.allocations;
EXPECT_EQ(allocations[0].offset, 0);
EXPECT_EQ(allocations[1].offset, 32);
EXPECT_EQ(allocations[2].offset, 64);
EXPECT_EQ(allocations[3].offset, 100);
EXPECT_EQ(allocations[4].offset, 116);
EXPECT_EQ(allocations[5].offset, 0);
EXPECT_EQ(allocations[6].offset, 104);
EXPECT_EQ(allocations[7].offset, 64);
}
TEST(LayoutPlannerAlgorithmTests, TestBump) {
auto specs = create_test_allocation_specs();
auto result = BumpAllocationPlanner(create_test_allocation_specs());
auto& allocations = result.allocations;
size_t offset = 0;
for (auto&& [i, spec] : c10::enumerate(specs)) {
EXPECT_EQ(allocations[i].offset, offset);
offset += spec.size;
}
EXPECT_EQ(result.total_size, offset);
}

View File

@ -0,0 +1,24 @@
#include <torch/nativert/executor/memory/Bump.h>
namespace torch::nativert {
LayoutPlan BumpAllocationPlanner(
const std::vector<AllocationSpec>& allocation_specs) {
LayoutPlan plan;
auto& allocations = plan.allocations;
auto& total_size = plan.total_size;
allocations.reserve(allocation_specs.size());
for (const auto& spec : allocation_specs) {
allocations.push_back(Allocation{
spec.size,
total_size,
});
total_size += spec.size;
}
return plan;
}
} // namespace torch::nativert

View File

@ -0,0 +1,13 @@
#pragma once
#include <torch/nativert/executor/memory/LayoutPlannerAlgorithm.h>
namespace torch::nativert {
// lay out all tensors contiguously in memory
// this doesn't take into account lifetimes,
// it literally just puts them all next to each other
LayoutPlan BumpAllocationPlanner(
const std::vector<AllocationSpec>& allocation_specs);
} // namespace torch::nativert

View File

@ -0,0 +1,166 @@
#include <iomanip>
#include <limits>
#include <optional>
#include <c10/util/Enumerate.h>
#include <c10/util/Logging.h>
#include <c10/util/irange.h>
#include <torch/nativert/executor/memory/GreedyBySize.h>
namespace {
using namespace torch::nativert;
// we need to track the original order in which allocations were made
// since they will be re-sorted between iterations
struct GreedyAllocation : public Allocation {
explicit GreedyAllocation(
Allocation allocation,
size_t allocation_idx,
size_t input_spec_idx)
: Allocation(allocation),
allocation_index(allocation_idx),
input_spec_index(input_spec_idx) {}
// we need to maintain the allocation ordering s.t., we can look up
// previous allocations directly from descending_allocation_specs_
// even after allocations has been re-sorted, which happens after
// each allocation is complete.
//
// i.e., this index represents the index of the spec that was used
// to create this allocation inside descending_allocation_specs_
// AFTER the sorting was completed.
size_t allocation_index{0};
// index of the spec associated with this allocation
// in the event that the specs get re-ordered
// in the process of creating allocations
// e.g.,
// allocation_specs[sX, sY, sZ]
// ^ ^ ^
// values[vX, vY, vZ]
//
// means that an allocation created from sY
// will have an input_spec_index of 1
//
// this allows us to return to the original
// ordering before returning the allocations
size_t input_spec_index{0};
};
struct AllocationSpecWithIndex {
const AllocationSpec* spec;
size_t index;
};
// associate specs with their original (unsorted) index
// and then sort them in descending order by byte size
std::vector<AllocationSpecWithIndex> prepare_allocation_specs(
const std::vector<AllocationSpec>& allocation_specs) {
std::vector<AllocationSpecWithIndex> specs;
specs.reserve(allocation_specs.size());
for (const auto i : c10::irange(allocation_specs.size())) {
specs.push_back({&allocation_specs[i], i});
}
std::sort(specs.begin(), specs.end(), [](auto& lhs, auto& rhs) {
return lhs.spec->size > rhs.spec->size;
});
return specs;
}
} // namespace
namespace torch::nativert {
// https://arxiv.org/pdf/2001.03288
LayoutPlan GreedyBySizeAllocationPlanner(
const std::vector<AllocationSpec>& allocation_specs) {
LayoutPlan plan;
auto descending_allocation_specs = prepare_allocation_specs(allocation_specs);
std::vector<GreedyAllocation> allocations;
allocations.reserve(allocation_specs.size());
auto get_next_offset = [&](const AllocationSpec& spec) -> size_t {
size_t prev_offset = 0;
std::optional<size_t> best_offset = std::nullopt;
size_t smallest_gap = std::numeric_limits<size_t>::max();
for (const auto& alloc : allocations) {
if (auto* allocated_spec =
descending_allocation_specs.at(alloc.allocation_index).spec;
allocated_spec->not_overlapping_with(spec)) {
continue;
}
if (alloc.offset > prev_offset) {
if (size_t gap = alloc.offset - prev_offset;
gap >= spec.size && gap < smallest_gap) {
smallest_gap = gap;
best_offset = prev_offset;
}
}
prev_offset = std::max(prev_offset, alloc.offset + alloc.size);
}
return best_offset.value_or(prev_offset);
};
size_t total_allocation_size = 0;
for (const auto&& [allocation_index, spec_with_original_index] :
c10::enumerate(descending_allocation_specs)) {
auto& spec = spec_with_original_index.spec;
auto new_allocation = GreedyAllocation(
Allocation{spec->size, get_next_offset(*spec)},
allocation_index,
spec_with_original_index.index);
total_allocation_size += new_allocation.size;
plan.total_size =
std::max(plan.total_size, new_allocation.offset + new_allocation.size);
VLOG(1) << "allocation with interval " << spec->lifetime.start << "-->"
<< spec->lifetime.end << " placed at offset "
<< new_allocation.offset;
// insert new allocation while maintaining relative-offset ordering
// the algorithm is already quadratic because of get_next_offset
// so this is negligible
auto it = std::lower_bound(
allocations.begin(),
allocations.end(),
new_allocation,
[](auto& lhs, auto& rhs) { return lhs.offset < rhs.offset; });
allocations.insert(it, new_allocation);
}
// sort allocations so their ordering is consistent with the input specs
std::sort(allocations.begin(), allocations.end(), [](auto& lhs, auto& rhs) {
return lhs.input_spec_index < rhs.input_spec_index;
});
plan.allocations.reserve(allocations.size());
std::move(
allocations.begin(),
allocations.end(),
std::back_inserter(plan.allocations));
if (plan.total_size > 0) {
VLOG(1) << std::fixed << std::setprecision(2)
<< "greedy-by-size bytes saved over strictly increasing: "
<< (1.0 - ((float)plan.total_size / (float)total_allocation_size)) *
100
<< "% (" << total_allocation_size << " - " << plan.total_size
<< " = " << (total_allocation_size - plan.total_size) << " bytes)";
}
return plan;
}
} // namespace torch::nativert

View File

@ -0,0 +1,10 @@
#pragma once
#include <torch/nativert/executor/memory/LayoutPlannerAlgorithm.h>
namespace torch::nativert {
LayoutPlan GreedyBySizeAllocationPlanner(
const std::vector<AllocationSpec>& allocation_specs);
} // namespace torch::nativert