[nativert] reland D76832891 remove designated initializer cpp20 (#156565)

Summary: fix windows build broke in https://github.com/pytorch/pytorch/pull/156508 Test Plan: ci Rollback Plan: Differential Revision: D77080420 Pull Request resolved: https://github.com/pytorch/pytorch/pull/156565 Approved by: https://github.com/zhxchen17
2025-10-20 12:54:11 +08:00 · 2025-06-24 02:38:04 +00:00
parent 6a3d00aa3b
commit 9665702c64
7 changed files with 280 additions and 0 deletions
--- a/build_variables.bzl
+++ b/build_variables.bzl
@ -611,6 +611,8 @@ libtorch_nativert_sources = [
    "torch/nativert/kernels/C10Kernel.cpp",
    "torch/nativert/kernels/AutoFunctionalizeKernel.cpp",
    "torch/nativert/kernels/HigherOrderKernel.cpp",
+    "torch/nativert/executor/memory/GreedyBySize.cpp",
+    "torch/nativert/executor/memory/Bump.cpp",
 ]

 torch_mobile_tracer_sources = [
--- a/test/cpp/nativert/CMakeLists.txt
+++ b/test/cpp/nativert/CMakeLists.txt
@ -18,6 +18,8 @@ set(NATIVERT_TEST_SRCS
  ${TORCH_ROOT}/torch/nativert/detail/ITree.cpp
  ${TORCH_ROOT}/torch/nativert/executor/ExecutionFrame.cpp
  ${TORCH_ROOT}/torch/nativert/kernels/C10Kernel.cpp
+  ${TORCH_ROOT}/torch/nativert/executor/memory/GreedyBySize.cpp
+  ${TORCH_ROOT}/torch/nativert/executor/memory/Bump.cpp
 )

 add_executable(test_nativert
--- a/test/cpp/nativert/test_layout_planner_algorithm.cpp
+++ b/test/cpp/nativert/test_layout_planner_algorithm.cpp
@ -0,0 +1,63 @@
+#include <c10/util/Enumerate.h>
+#include <gtest/gtest.h>
+
+#include <torch/nativert/executor/memory/Bump.h>
+#include <torch/nativert/executor/memory/GreedyBySize.h>
+
+using namespace ::testing;
+using namespace torch::nativert;
+
+std::vector<AllocationSpec> create_test_allocation_specs() {
+  std::vector<AllocationSpec> specs;
+
+  const std::vector<std::tuple<size_t, size_t, size_t>> test_cases = {
+      {0, 1, 32},
+      {1, 4, 28},
+      {2, 5, 36},
+      {3, 5, 16},
+      {4, 5, 8},
+      {5, 7, 64},
+      {6, 8, 10},
+      {7, 8, 40},
+  };
+
+  specs.reserve(test_cases.size());
+  for (const auto& [l_start, l_end, size] : test_cases) {
+    specs.push_back(AllocationSpec{AllocationLifetime(l_start, l_end), size});
+  };
+
+  return specs;
+}
+
+// figure 6 -- https://arxiv.org/pdf/2001.03288
+TEST(LayoutPlannerAlgorithmTests, TestGreedyBySize) {
+  auto result = GreedyBySizeAllocationPlanner(create_test_allocation_specs());
+
+  EXPECT_EQ(result.total_size, 124);
+
+  auto& allocations = result.allocations;
+
+  EXPECT_EQ(allocations[0].offset, 0);
+  EXPECT_EQ(allocations[1].offset, 32);
+  EXPECT_EQ(allocations[2].offset, 64);
+  EXPECT_EQ(allocations[3].offset, 100);
+  EXPECT_EQ(allocations[4].offset, 116);
+  EXPECT_EQ(allocations[5].offset, 0);
+  EXPECT_EQ(allocations[6].offset, 104);
+  EXPECT_EQ(allocations[7].offset, 64);
+}
+
+TEST(LayoutPlannerAlgorithmTests, TestBump) {
+  auto specs = create_test_allocation_specs();
+  auto result = BumpAllocationPlanner(create_test_allocation_specs());
+
+  auto& allocations = result.allocations;
+
+  size_t offset = 0;
+  for (auto&& [i, spec] : c10::enumerate(specs)) {
+    EXPECT_EQ(allocations[i].offset, offset);
+    offset += spec.size;
+  }
+
+  EXPECT_EQ(result.total_size, offset);
+}
--- a/torch/nativert/executor/memory/Bump.cpp
+++ b/torch/nativert/executor/memory/Bump.cpp
@ -0,0 +1,24 @@
+#include <torch/nativert/executor/memory/Bump.h>
+
+namespace torch::nativert {
+
+LayoutPlan BumpAllocationPlanner(
+    const std::vector<AllocationSpec>& allocation_specs) {
+  LayoutPlan plan;
+
+  auto& allocations = plan.allocations;
+  auto& total_size = plan.total_size;
+
+  allocations.reserve(allocation_specs.size());
+  for (const auto& spec : allocation_specs) {
+    allocations.push_back(Allocation{
+        spec.size,
+        total_size,
+    });
+    total_size += spec.size;
+  }
+
+  return plan;
+}
+
+} // namespace torch::nativert
--- a/torch/nativert/executor/memory/Bump.h
+++ b/torch/nativert/executor/memory/Bump.h
@ -0,0 +1,13 @@
+#pragma once
+
+#include <torch/nativert/executor/memory/LayoutPlannerAlgorithm.h>
+
+namespace torch::nativert {
+
+// lay out all tensors contiguously in memory
+// this doesn't take into account lifetimes,
+// it literally just puts them all next to each other
+LayoutPlan BumpAllocationPlanner(
+    const std::vector<AllocationSpec>& allocation_specs);
+
+} // namespace torch::nativert
--- a/torch/nativert/executor/memory/GreedyBySize.cpp
+++ b/torch/nativert/executor/memory/GreedyBySize.cpp
@ -0,0 +1,166 @@
+#include <iomanip>
+#include <limits>
+#include <optional>
+
+#include <c10/util/Enumerate.h>
+#include <c10/util/Logging.h>
+#include <c10/util/irange.h>
+
+#include <torch/nativert/executor/memory/GreedyBySize.h>
+
+namespace {
+
+using namespace torch::nativert;
+
+// we need to track the original order in which allocations were made
+// since they will be re-sorted between iterations
+struct GreedyAllocation : public Allocation {
+  explicit GreedyAllocation(
+      Allocation allocation,
+      size_t allocation_idx,
+      size_t input_spec_idx)
+      : Allocation(allocation),
+        allocation_index(allocation_idx),
+        input_spec_index(input_spec_idx) {}
+  // we need to maintain the allocation ordering s.t., we can look up
+  // previous allocations directly from descending_allocation_specs_
+  // even after allocations has been re-sorted, which happens after
+  // each allocation is complete.
+  //
+  // i.e., this index represents the index of the spec that was used
+  // to create this allocation inside descending_allocation_specs_
+  // AFTER the sorting was completed.
+  size_t allocation_index{0};
+  // index of the spec associated with this allocation
+  // in the event that the specs get re-ordered
+  // in the process of creating allocations
+  // e.g.,
+  //              allocation_specs[sX, sY, sZ]
+  //                                ^   ^   ^
+  //                        values[vX, vY, vZ]
+  //
+  // means that an allocation created from sY
+  // will have an input_spec_index of 1
+  //
+  // this allows us to return to the original
+  // ordering before returning the allocations
+  size_t input_spec_index{0};
+};
+
+struct AllocationSpecWithIndex {
+  const AllocationSpec* spec;
+  size_t index;
+};
+
+// associate specs with their original (unsorted) index
+// and then sort them in descending order by byte size
+std::vector<AllocationSpecWithIndex> prepare_allocation_specs(
+    const std::vector<AllocationSpec>& allocation_specs) {
+  std::vector<AllocationSpecWithIndex> specs;
+  specs.reserve(allocation_specs.size());
+
+  for (const auto i : c10::irange(allocation_specs.size())) {
+    specs.push_back({&allocation_specs[i], i});
+  }
+
+  std::sort(specs.begin(), specs.end(), [](auto& lhs, auto& rhs) {
+    return lhs.spec->size > rhs.spec->size;
+  });
+
+  return specs;
+}
+
+} // namespace
+
+namespace torch::nativert {
+
+// https://arxiv.org/pdf/2001.03288
+LayoutPlan GreedyBySizeAllocationPlanner(
+    const std::vector<AllocationSpec>& allocation_specs) {
+  LayoutPlan plan;
+
+  auto descending_allocation_specs = prepare_allocation_specs(allocation_specs);
+
+  std::vector<GreedyAllocation> allocations;
+  allocations.reserve(allocation_specs.size());
+
+  auto get_next_offset = [&](const AllocationSpec& spec) -> size_t {
+    size_t prev_offset = 0;
+    std::optional<size_t> best_offset = std::nullopt;
+    size_t smallest_gap = std::numeric_limits<size_t>::max();
+
+    for (const auto& alloc : allocations) {
+      if (auto* allocated_spec =
+              descending_allocation_specs.at(alloc.allocation_index).spec;
+          allocated_spec->not_overlapping_with(spec)) {
+        continue;
+      }
+
+      if (alloc.offset > prev_offset) {
+        if (size_t gap = alloc.offset - prev_offset;
+            gap >= spec.size && gap < smallest_gap) {
+          smallest_gap = gap;
+          best_offset = prev_offset;
+        }
+      }
+
+      prev_offset = std::max(prev_offset, alloc.offset + alloc.size);
+    }
+
+    return best_offset.value_or(prev_offset);
+  };
+
+  size_t total_allocation_size = 0;
+  for (const auto&& [allocation_index, spec_with_original_index] :
+       c10::enumerate(descending_allocation_specs)) {
+    auto& spec = spec_with_original_index.spec;
+
+    auto new_allocation = GreedyAllocation(
+        Allocation{spec->size, get_next_offset(*spec)},
+        allocation_index,
+        spec_with_original_index.index);
+
+    total_allocation_size += new_allocation.size;
+    plan.total_size =
+        std::max(plan.total_size, new_allocation.offset + new_allocation.size);
+
+    VLOG(1) << "allocation with interval " << spec->lifetime.start << "-->"
+            << spec->lifetime.end << " placed at offset "
+            << new_allocation.offset;
+
+    // insert new allocation while maintaining relative-offset ordering
+    // the algorithm is already quadratic because of get_next_offset
+    // so this is negligible
+
+    auto it = std::lower_bound(
+        allocations.begin(),
+        allocations.end(),
+        new_allocation,
+        [](auto& lhs, auto& rhs) { return lhs.offset < rhs.offset; });
+    allocations.insert(it, new_allocation);
+  }
+
+  // sort allocations so their ordering is consistent with the input specs
+  std::sort(allocations.begin(), allocations.end(), [](auto& lhs, auto& rhs) {
+    return lhs.input_spec_index < rhs.input_spec_index;
+  });
+
+  plan.allocations.reserve(allocations.size());
+  std::move(
+      allocations.begin(),
+      allocations.end(),
+      std::back_inserter(plan.allocations));
+
+  if (plan.total_size > 0) {
+    VLOG(1) << std::fixed << std::setprecision(2)
+            << "greedy-by-size bytes saved over strictly increasing: "
+            << (1.0 - ((float)plan.total_size / (float)total_allocation_size)) *
+            100
+            << "% (" << total_allocation_size << " - " << plan.total_size
+            << " = " << (total_allocation_size - plan.total_size) << " bytes)";
+  }
+
+  return plan;
+}
+
+} // namespace torch::nativert
--- a/torch/nativert/executor/memory/GreedyBySize.h
+++ b/torch/nativert/executor/memory/GreedyBySize.h
@ -0,0 +1,10 @@
+#pragma once
+
+#include <torch/nativert/executor/memory/LayoutPlannerAlgorithm.h>
+
+namespace torch::nativert {
+
+LayoutPlan GreedyBySizeAllocationPlanner(
+    const std::vector<AllocationSpec>& allocation_specs);
+
+} // namespace torch::nativert