mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Summary: att Test Plan: ci - unit tests still have some unresolved deps but will move them later. Rollback Plan: Differential Revision: D77320950 Pull Request resolved: https://github.com/pytorch/pytorch/pull/156897 Approved by: https://github.com/zhxchen17
207 lines
5.1 KiB
C++
207 lines
5.1 KiB
C++
#pragma once
|
|
|
|
#include <torch/nativert/executor/memory/LayoutPlanner.h>
|
|
#include <torch/nativert/executor/memory/LayoutPlannerAlgorithm.h>
|
|
#include <torch/nativert/executor/memory/LayoutPlannerSettings.h>
|
|
|
|
#include <c10/core/alignment.h>
|
|
#include <c10/core/impl/alloc_cpu.h>
|
|
|
|
namespace torch::nativert {
|
|
|
|
class ExecutionFrame;
|
|
|
|
struct ContiguousLayoutBuffer {
|
|
public:
|
|
ContiguousLayoutBuffer() = default;
|
|
~ContiguousLayoutBuffer() {
|
|
deallocate();
|
|
}
|
|
|
|
ContiguousLayoutBuffer(ContiguousLayoutBuffer&& other) = delete;
|
|
ContiguousLayoutBuffer(const ContiguousLayoutBuffer& other) = delete;
|
|
ContiguousLayoutBuffer operator=(ContiguousLayoutBuffer&& other) = delete;
|
|
ContiguousLayoutBuffer& operator=(const ContiguousLayoutBuffer& other) =
|
|
delete;
|
|
|
|
void* get_ptr_with_offset(size_t offset) {
|
|
void* raw_ptr = data_ptr_.get();
|
|
TORCH_CHECK_NOTNULL(raw_ptr);
|
|
TORCH_CHECK_LE(offset, size_);
|
|
return reinterpret_cast<void*>(
|
|
reinterpret_cast<uint8_t*>(raw_ptr) + offset);
|
|
}
|
|
|
|
size_t size() {
|
|
return size_;
|
|
}
|
|
|
|
void allocate(size_t size);
|
|
|
|
void deallocate() {
|
|
VLOG(1) << "deallocating layout buffer of size " << size_;
|
|
size_ = 0;
|
|
data_ptr_ = {};
|
|
}
|
|
|
|
void clear(size_t size) {
|
|
VLOG(1) << "clearing first " << size << "bytes of layout buffer of size "
|
|
<< size_;
|
|
TORCH_CHECK_LE(size, size_);
|
|
std::memset(data_ptr_.get(), 0, size);
|
|
}
|
|
|
|
private:
|
|
// the size of the buffer in bytes
|
|
size_t size_{0};
|
|
|
|
// the dataptr returned by the allocator
|
|
at::DataPtr data_ptr_{};
|
|
};
|
|
|
|
struct ContiguousStorageImplBuffer {
|
|
ContiguousStorageImplBuffer() = default;
|
|
~ContiguousStorageImplBuffer() {
|
|
deallocate();
|
|
}
|
|
|
|
ContiguousStorageImplBuffer(ContiguousStorageImplBuffer&& other) = delete;
|
|
ContiguousStorageImplBuffer(const ContiguousStorageImplBuffer& other) =
|
|
delete;
|
|
ContiguousStorageImplBuffer operator=(ContiguousStorageImplBuffer&& other) =
|
|
delete;
|
|
ContiguousStorageImplBuffer& operator=(
|
|
const ContiguousStorageImplBuffer& other) = delete;
|
|
|
|
void deallocate() {
|
|
if (buffer_ == nullptr) {
|
|
return;
|
|
}
|
|
|
|
for (const size_t idx : c10::irange(size_)) {
|
|
buffer_[idx].~StorageImpl();
|
|
}
|
|
|
|
delete[] reinterpret_cast<unsigned char*>(buffer_);
|
|
buffer_ = nullptr;
|
|
size_ = capacity_ = 0;
|
|
}
|
|
|
|
void allocate(size_t capacity) {
|
|
if (size_ > 0) {
|
|
deallocate();
|
|
}
|
|
|
|
capacity_ = capacity;
|
|
|
|
static_assert(alignof(at::StorageImpl) <= 8);
|
|
buffer_ = reinterpret_cast<at::StorageImpl*>(
|
|
new unsigned char[capacity * sizeof(at::StorageImpl)]);
|
|
}
|
|
|
|
size_t capacity() {
|
|
return capacity_;
|
|
}
|
|
|
|
size_t size() {
|
|
return size_;
|
|
}
|
|
|
|
c10::StorageImpl* buffer() const {
|
|
return buffer_;
|
|
}
|
|
|
|
c10::StorageImpl& at(size_t i) {
|
|
TORCH_CHECK_LT(i, size_)
|
|
<< "requested storage index " << i << " out of bounds " << size_;
|
|
return buffer_[i];
|
|
}
|
|
|
|
void reset_all() {
|
|
for (const size_t idx : c10::irange(size_)) {
|
|
buffer_[idx].reset();
|
|
}
|
|
}
|
|
|
|
c10::StorageImpl& to_managed(at::StorageImpl& s) {
|
|
TORCH_CHECK_LT(size_, capacity_);
|
|
return *(new (&buffer_[size_++]) at::StorageImpl(
|
|
at::StorageImpl::use_byte_size_t(),
|
|
static_cast<int64_t>(s.nbytes()),
|
|
s.allocator(),
|
|
s.resizable()));
|
|
}
|
|
|
|
private:
|
|
size_t size_{0};
|
|
size_t capacity_{0};
|
|
c10::StorageImpl* buffer_{nullptr};
|
|
};
|
|
|
|
enum class LayoutManagerState { WaitingForValues, AllocatingStorages, Running };
|
|
|
|
class LayoutManager {
|
|
public:
|
|
LayoutManager(
|
|
LayoutPlanner& planner,
|
|
ExecutionFrame& parent_frame,
|
|
torch::nativert::LayoutManagerSettings settings = {});
|
|
~LayoutManager() = default;
|
|
|
|
void allocate();
|
|
void deallocate_and_plan();
|
|
|
|
private:
|
|
#ifdef LayoutPlannerTests_TEST_FRIENDS
|
|
LayoutPlannerTests_TEST_FRIENDS;
|
|
#endif
|
|
|
|
static size_t get_aligned_nbytes(size_t nbytes) {
|
|
#if defined(__linux__) && !defined(__ANDROID__)
|
|
auto alignment = c10::c10_compute_alignment(nbytes);
|
|
#else
|
|
auto alignment = c10::gAlignment;
|
|
#endif
|
|
return ((nbytes) + alignment - 1) & (~(alignment - 1));
|
|
}
|
|
|
|
void allocate_plan(const LayoutPlan& plan);
|
|
void ensure_managed_storages(bool allocate);
|
|
|
|
void populate_tensor_values();
|
|
void try_update_historical_max_nbytes();
|
|
|
|
LayoutPlanner& planner_;
|
|
ExecutionFrame& parent_frame_;
|
|
|
|
std::vector<c10::IValue*> unplanned_ivalues_;
|
|
|
|
std::vector<const at::Tensor*> planned_tensors_;
|
|
std::vector<size_t> planned_tensors_max_nbytes_local_;
|
|
|
|
ContiguousLayoutBuffer layout_buffer_;
|
|
ContiguousStorageImplBuffer storage_impl_buffer_;
|
|
|
|
LayoutManagerState state_{LayoutManagerState::WaitingForValues};
|
|
torch::nativert::LayoutManagerSettings settings_;
|
|
};
|
|
|
|
class LayoutManagerGuard {
|
|
public:
|
|
explicit LayoutManagerGuard(LayoutManager& manager) : manager_(manager) {
|
|
manager_.allocate();
|
|
}
|
|
~LayoutManagerGuard() {
|
|
manager_.deallocate_and_plan();
|
|
}
|
|
|
|
LayoutManagerGuard(LayoutManagerGuard&& other) = delete;
|
|
LayoutManagerGuard(const LayoutManagerGuard& other) = delete;
|
|
LayoutManagerGuard operator=(LayoutManagerGuard&& other) = delete;
|
|
LayoutManagerGuard& operator=(const LayoutManagerGuard& other) = delete;
|
|
|
|
LayoutManager& manager_;
|
|
};
|
|
|
|
} // namespace torch::nativert
|