diff --git a/c10/core/Allocator.h b/c10/core/Allocator.h index 510519a77e49..4e07c0c08f39 100644 --- a/c10/core/Allocator.h +++ b/c10/core/Allocator.h @@ -40,6 +40,10 @@ class C10_API DataPtr { void* operator->() const { return ptr_.get(); } + C10_ALWAYS_INLINE bool /* success */ unsafe_reset_data_and_ctx( + void* new_data_and_ctx) { + return ptr_.unsafe_reset_data_and_ctx(new_data_and_ctx); + } void clear() { ptr_.clear(); } diff --git a/c10/core/StorageImpl.h b/c10/core/StorageImpl.h index e45b4953b9c9..f34a1baed7a4 100644 --- a/c10/core/StorageImpl.h +++ b/c10/core/StorageImpl.h @@ -121,6 +121,11 @@ struct C10_API StorageImpl : public c10::intrusive_ptr_target { size_bytes_is_heap_allocated_ = false; } + void unsafe_set_nbytes(size_t size_bytes) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!size_bytes_is_heap_allocated_); + size_bytes_.unsafe_set_data(size_bytes); + } + void set_nbytes(c10::SymInt size_bytes) { size_bytes_ = std::move(size_bytes); } diff --git a/c10/core/SymInt.h b/c10/core/SymInt.h index f965d909c9e1..10fc5d6508ca 100644 --- a/c10/core/SymInt.h +++ b/c10/core/SymInt.h @@ -224,6 +224,11 @@ class C10_API SymInt { operator SymFloat() const; + void unsafe_set_data(size_t nbytes) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!is_heap_allocated()); + data_ = static_cast(nbytes); + } + // Don't use this. Prefer maybe_as_int instead int64_t as_int_unchecked() const { TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!is_heap_allocated()); diff --git a/c10/core/impl/alloc_cpu.cpp b/c10/core/impl/alloc_cpu.cpp index 186c70a22cfc..92c6484a20cf 100644 --- a/c10/core/impl/alloc_cpu.cpp +++ b/c10/core/impl/alloc_cpu.cpp @@ -64,17 +64,11 @@ static inline bool is_thp_alloc_enabled() { return value; } -inline size_t c10_compute_alignment(size_t nbytes) { - static const auto pagesize = sysconf(_SC_PAGESIZE); - // for kernels that don't provide page size, default it to 4K - const size_t thp_alignment = (pagesize < 0 ? gPagesize : pagesize); - return (is_thp_alloc_enabled() ? thp_alignment : gAlignment); -} - inline bool is_thp_alloc(size_t nbytes) { // enable thp (transparent huge pages) for larger buffers return (is_thp_alloc_enabled() && (nbytes >= gAlloc_threshold_thp)); } + #elif !defined(__ANDROID__) && !defined(_MSC_VER) constexpr size_t c10_compute_alignment([[maybe_unused]] size_t nbytes) { return gAlignment; @@ -86,6 +80,15 @@ constexpr bool is_thp_alloc([[maybe_unused]] size_t nbytes) { #endif } // namespace +#if defined(__linux__) && !defined(__ANDROID__) +size_t c10_compute_alignment(size_t nbytes) { + static const auto pagesize = sysconf(_SC_PAGESIZE); + // for kernels that don't provide page size, default it to 4K + const size_t thp_alignment = (pagesize < 0 ? gPagesize : pagesize); + return (is_thp_alloc(nbytes) ? thp_alignment : gAlignment); +} +#endif + void* alloc_cpu(size_t nbytes) { if (nbytes == 0) { return nullptr; diff --git a/c10/core/impl/alloc_cpu.h b/c10/core/impl/alloc_cpu.h index 8d506acf392f..213990269316 100644 --- a/c10/core/impl/alloc_cpu.h +++ b/c10/core/impl/alloc_cpu.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -9,6 +10,10 @@ namespace c10 { C10_API void* alloc_cpu(size_t nbytes); C10_API void free_cpu(void* data); +#if defined(__linux__) && !defined(__ANDROID__) +C10_API size_t c10_compute_alignment(size_t nbytes); +#endif + #ifdef USE_MIMALLOC_ON_MKL namespace mi_malloc_wrapper { C10_API void* c10_mi_malloc(size_t size); diff --git a/c10/util/UniqueVoidPtr.h b/c10/util/UniqueVoidPtr.h index 175697f7f63b..fe2a3c650cdd 100644 --- a/c10/util/UniqueVoidPtr.h +++ b/c10/util/UniqueVoidPtr.h @@ -60,6 +60,19 @@ class UniqueVoidPtr { void* get() const { return data_; } + + bool /* success */ unsafe_reset_data_and_ctx(void* new_data_and_ctx) { + if (C10_UNLIKELY(ctx_.get_deleter() != &deleteNothing)) { + return false; + } + // seems quicker than calling the no-op deleter when we reset + // NOLINTNEXTLINE(bugprone-unused-return-value) + ctx_.release(); + ctx_.reset(new_data_and_ctx); + data_ = new_data_and_ctx; + return true; + } + void* get_context() const { return ctx_.get(); }