mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
Summary: Currently, we assume that refcount_ and weakcount_ are always stored in an 8-byte aligned address right next to each other. Based on this assumption, we load 8 bytes in intrusive_ptr::reset_ to check the values of both counts. However, that assumption is not part of C++ language standard so it's essentially undefined behavior. This change eliminates that assumption by combining refcount_ and weakcount_ in a single 64-bit count and we use the lower 32 bits for refcount_ and upper 32 bits for the weakcount_. In addition to eliminating the undefined behavior, the change also eliminates the read of weakcount_ after decrementing refcount_ in intrusive_ptr::reset_. This claws back lost performance introduced in https://github.com/pytorch/pytorch/pull/162784 for non-final refcount_ decrementing. Reviewed By: yfeldblum Differential Revision: D82869192 Pull Request resolved: https://github.com/pytorch/pytorch/pull/163394 Approved by: https://github.com/Skylion007
76 lines
2.2 KiB
C++
76 lines
2.2 KiB
C++
#pragma once
|
|
|
|
#include <c10/core/TensorImpl.h>
|
|
#include <c10/core/UndefinedTensorImpl.h>
|
|
|
|
#include <utility>
|
|
|
|
namespace c10 {
|
|
// Shared ExclusivelyOwnedTraits implementation between caffe2::Tensor and
|
|
// at::TensorBase.
|
|
template <typename TensorType>
|
|
struct ExclusivelyOwnedTensorTraits {
|
|
using repr_type = TensorType;
|
|
using pointer_type = TensorType*;
|
|
using const_pointer_type = const TensorType*;
|
|
|
|
static repr_type nullRepr() {
|
|
return TensorType();
|
|
}
|
|
|
|
template <class... Args>
|
|
static repr_type createInPlace(Args&&... args) {
|
|
return TensorType(std::forward<Args>(args)...);
|
|
}
|
|
|
|
static repr_type moveToRepr(TensorType&& x) {
|
|
return std::move(x);
|
|
}
|
|
|
|
static void destroyOwned(TensorType& x) {
|
|
TensorImpl* const toDestroy = x.unsafeReleaseTensorImpl();
|
|
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
|
|
toDestroy != nullptr, "Tensor somehow got null TensorImpl?");
|
|
// May be 0 because UndefinedTensorImpl doesn't get its refcount
|
|
// incremented.
|
|
const bool isUndefined = toDestroy == UndefinedTensorImpl::singleton();
|
|
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
|
|
toDestroy->refcount() == 1 ||
|
|
(toDestroy->refcount() == 0 && isUndefined),
|
|
"ExclusivelyOwned<Tensor> destroyed with isUndefined ",
|
|
isUndefined,
|
|
" and refcount ",
|
|
toDestroy->refcount(),
|
|
", expected 1 or, if isUndefined, 0!");
|
|
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
|
|
toDestroy->weakcount() == 1 ||
|
|
(toDestroy->weakcount() == 0 &&
|
|
toDestroy == UndefinedTensorImpl::singleton()),
|
|
"ExclusivelyOwned<Tensor> destroyed with isUndefined ",
|
|
isUndefined,
|
|
" and weakcount ",
|
|
toDestroy->weakcount(),
|
|
", expected 1 or, if isUndefined, 0!");
|
|
if (!isUndefined) {
|
|
#ifndef NDEBUG
|
|
// Needed to pass the debug assertions in ~intrusive_ptr_target.
|
|
toDestroy->combined_refcount_.store(0, std::memory_order_relaxed);
|
|
#endif
|
|
delete toDestroy;
|
|
}
|
|
}
|
|
|
|
static TensorType take(TensorType& x) {
|
|
return std::move(x);
|
|
}
|
|
|
|
static pointer_type getImpl(repr_type& x) {
|
|
return &x;
|
|
}
|
|
|
|
static const_pointer_type getImpl(const repr_type& x) {
|
|
return &x;
|
|
}
|
|
};
|
|
} // namespace c10
|