[sigmoid] memory planner C10 deps (#151275)

Summary: perf-sensitive util functions for use in our memory planner

Test Plan: CI

Differential Revision: D73002726

Pull Request resolved: https://github.com/pytorch/pytorch/pull/151275
Approved by: https://github.com/georgiaphillips
This commit is contained in:
dolpm
2025-04-24 01:46:29 +00:00
committed by PyTorch MergeBot
parent c91acad73a
commit 4ac2ee573d
6 changed files with 42 additions and 7 deletions

View File

@ -40,6 +40,10 @@ class C10_API DataPtr {
void* operator->() const {
return ptr_.get();
}
C10_ALWAYS_INLINE bool /* success */ unsafe_reset_data_and_ctx(
void* new_data_and_ctx) {
return ptr_.unsafe_reset_data_and_ctx(new_data_and_ctx);
}
void clear() {
ptr_.clear();
}

View File

@ -121,6 +121,11 @@ struct C10_API StorageImpl : public c10::intrusive_ptr_target {
size_bytes_is_heap_allocated_ = false;
}
void unsafe_set_nbytes(size_t size_bytes) {
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!size_bytes_is_heap_allocated_);
size_bytes_.unsafe_set_data(size_bytes);
}
void set_nbytes(c10::SymInt size_bytes) {
size_bytes_ = std::move(size_bytes);
}

View File

@ -224,6 +224,11 @@ class C10_API SymInt {
operator SymFloat() const;
void unsafe_set_data(size_t nbytes) {
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!is_heap_allocated());
data_ = static_cast<int64_t>(nbytes);
}
// Don't use this. Prefer maybe_as_int instead
int64_t as_int_unchecked() const {
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!is_heap_allocated());

View File

@ -64,17 +64,11 @@ static inline bool is_thp_alloc_enabled() {
return value;
}
inline size_t c10_compute_alignment(size_t nbytes) {
static const auto pagesize = sysconf(_SC_PAGESIZE);
// for kernels that don't provide page size, default it to 4K
const size_t thp_alignment = (pagesize < 0 ? gPagesize : pagesize);
return (is_thp_alloc_enabled() ? thp_alignment : gAlignment);
}
inline bool is_thp_alloc(size_t nbytes) {
// enable thp (transparent huge pages) for larger buffers
return (is_thp_alloc_enabled() && (nbytes >= gAlloc_threshold_thp));
}
#elif !defined(__ANDROID__) && !defined(_MSC_VER)
constexpr size_t c10_compute_alignment([[maybe_unused]] size_t nbytes) {
return gAlignment;
@ -86,6 +80,15 @@ constexpr bool is_thp_alloc([[maybe_unused]] size_t nbytes) {
#endif
} // namespace
#if defined(__linux__) && !defined(__ANDROID__)
size_t c10_compute_alignment(size_t nbytes) {
static const auto pagesize = sysconf(_SC_PAGESIZE);
// for kernels that don't provide page size, default it to 4K
const size_t thp_alignment = (pagesize < 0 ? gPagesize : pagesize);
return (is_thp_alloc(nbytes) ? thp_alignment : gAlignment);
}
#endif
void* alloc_cpu(size_t nbytes) {
if (nbytes == 0) {
return nullptr;

View File

@ -1,6 +1,7 @@
#pragma once
#include <c10/macros/Export.h>
#include <c10/macros/Macros.h>
#include <cstddef>
@ -9,6 +10,10 @@ namespace c10 {
C10_API void* alloc_cpu(size_t nbytes);
C10_API void free_cpu(void* data);
#if defined(__linux__) && !defined(__ANDROID__)
C10_API size_t c10_compute_alignment(size_t nbytes);
#endif
#ifdef USE_MIMALLOC_ON_MKL
namespace mi_malloc_wrapper {
C10_API void* c10_mi_malloc(size_t size);

View File

@ -60,6 +60,19 @@ class UniqueVoidPtr {
void* get() const {
return data_;
}
bool /* success */ unsafe_reset_data_and_ctx(void* new_data_and_ctx) {
if (C10_UNLIKELY(ctx_.get_deleter() != &deleteNothing)) {
return false;
}
// seems quicker than calling the no-op deleter when we reset
// NOLINTNEXTLINE(bugprone-unused-return-value)
ctx_.release();
ctx_.reset(new_data_and_ctx);
data_ = new_data_and_ctx;
return true;
}
void* get_context() const {
return ctx_.get();
}