Files
pytorch/c10/util/ExclusivelyOwnedTensorTraits.h
Ben Niu 281f8f407e Combine strong and weak refcounts in intrusive_ptr in a single refcount (#163394)
Summary:
Currently, we assume that refcount_ and weakcount_ are always stored in an 8-byte aligned address right next to each other. Based on this assumption, we load 8 bytes in intrusive_ptr::reset_ to check the values of both counts. However, that assumption is not part of C++ language standard so it's essentially undefined behavior.

This change eliminates that assumption by combining refcount_ and weakcount_ in a single 64-bit count and we use the lower 32 bits for refcount_ and upper 32 bits for the weakcount_.

In addition to eliminating the undefined behavior, the change also eliminates the read of weakcount_ after decrementing refcount_ in intrusive_ptr::reset_. This claws back lost performance introduced in https://github.com/pytorch/pytorch/pull/162784 for non-final refcount_ decrementing.

Reviewed By: yfeldblum

Differential Revision: D82869192

Pull Request resolved: https://github.com/pytorch/pytorch/pull/163394
Approved by: https://github.com/Skylion007
2025-09-22 17:53:28 +00:00

76 lines
2.2 KiB
C++

#pragma once
#include <c10/core/TensorImpl.h>
#include <c10/core/UndefinedTensorImpl.h>
#include <utility>
namespace c10 {
// Shared ExclusivelyOwnedTraits implementation between caffe2::Tensor and
// at::TensorBase.
template <typename TensorType>
struct ExclusivelyOwnedTensorTraits {
using repr_type = TensorType;
using pointer_type = TensorType*;
using const_pointer_type = const TensorType*;
static repr_type nullRepr() {
return TensorType();
}
template <class... Args>
static repr_type createInPlace(Args&&... args) {
return TensorType(std::forward<Args>(args)...);
}
static repr_type moveToRepr(TensorType&& x) {
return std::move(x);
}
static void destroyOwned(TensorType& x) {
TensorImpl* const toDestroy = x.unsafeReleaseTensorImpl();
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
toDestroy != nullptr, "Tensor somehow got null TensorImpl?");
// May be 0 because UndefinedTensorImpl doesn't get its refcount
// incremented.
const bool isUndefined = toDestroy == UndefinedTensorImpl::singleton();
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
toDestroy->refcount() == 1 ||
(toDestroy->refcount() == 0 && isUndefined),
"ExclusivelyOwned<Tensor> destroyed with isUndefined ",
isUndefined,
" and refcount ",
toDestroy->refcount(),
", expected 1 or, if isUndefined, 0!");
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
toDestroy->weakcount() == 1 ||
(toDestroy->weakcount() == 0 &&
toDestroy == UndefinedTensorImpl::singleton()),
"ExclusivelyOwned<Tensor> destroyed with isUndefined ",
isUndefined,
" and weakcount ",
toDestroy->weakcount(),
", expected 1 or, if isUndefined, 0!");
if (!isUndefined) {
#ifndef NDEBUG
// Needed to pass the debug assertions in ~intrusive_ptr_target.
toDestroy->combined_refcount_.store(0, std::memory_order_relaxed);
#endif
delete toDestroy;
}
}
static TensorType take(TensorType& x) {
return std::move(x);
}
static pointer_type getImpl(repr_type& x) {
return &x;
}
static const_pointer_type getImpl(const repr_type& x) {
return &x;
}
};
} // namespace c10