Reland: Add PyObject preservation for UntypedStorage (#103907)

This relands #97470 after #102553 reverted it. This PR attempts to fix the internal failure by avoiding an unnecessary intermediate storage buffer allocation in `c10::newStorageImplFromRefcountedDataPtr`.

Part of #91395

Pull Request resolved: https://github.com/pytorch/pytorch/pull/103907
Approved by: https://github.com/ezyang
This commit is contained in:
Kurt Mohler
2023-09-07 04:24:08 +00:00
committed by PyTorch MergeBot
parent 35974234c4
commit 56b848157c
26 changed files with 1169 additions and 260 deletions

View File

@ -26,6 +26,7 @@
#include <torch/csrc/tensor/python_tensor.h>
#include <torch/csrc/utils/pybind.h>
#include <torch/csrc/utils/pycfunction_helpers.h>
#include <torch/csrc/utils/pyobject_preservation.h>
#include <torch/csrc/utils/python_arg_parser.h>
#include <torch/csrc/utils/python_dispatch.h>
#include <torch/csrc/utils/python_strings.h>
@ -268,7 +269,8 @@ PyObject* THPVariable_Wrap(at::TensorBase var) {
}
c10::optional<PyObject*> mb_obj =
var.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(getPyInterpreter());
var.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(
getPyInterpreter(), /*ignore_hermetic_tls=*/false);
c10::impl::PyInterpreterStatus status;
if (mb_obj.has_value()) {
auto obj = *mb_obj;
@ -345,7 +347,8 @@ bool isResurrectable(THPVariable* self) {
}
// Check if this is hermetic. If it is, no resurrection.
if (tensor.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(
getPyInterpreter()) != c10::make_optional((PyObject*)self)) {
getPyInterpreter(), /*ignore_hermetic_tls=*/false) !=
c10::make_optional((PyObject*)self)) {
return false;
}
return true;
@ -369,7 +372,16 @@ static bool THPVariable_tryResurrect(THPVariable* self) {
TORCH_INTERNAL_ASSERT(
!tensor.unsafeGetTensorImpl()->pyobj_slot()->owns_pyobj());
tensor.unsafeGetTensorImpl()->pyobj_slot()->set_owns_pyobj(true);
c10::TensorImpl* tensor_impl = tensor.unsafeGetTensorImpl();
auto maybe_pyobj = tensor_impl->pyobj_slot()->check_pyobj(
getPyInterpreter(),
/*ignore_hermetic_tls=*/false);
TORCH_INTERNAL_ASSERT(
maybe_pyobj.has_value(),
"Trying to preserve a Python tensor whose PyObjectSlot does not have a PyObject");
tensor_impl->pyobj_slot()->set_owns_pyobj(true);
// Resurrect the Python object. This is something CPython does
// internally occasionally, see
@ -443,7 +455,8 @@ static int THPVariable_clear(THPVariable* self) {
if (!self->cdata.unsafeIsBorrowed() &&
tensor.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(
getPyInterpreter()) == c10::make_optional((PyObject*)self)) {
getPyInterpreter(), /*ignore_hermetic_tls=*/false) ==
c10::make_optional((PyObject*)self)) {
// TODO: empirically, on OS X this assert appears to be untrue
// In test_py_tensors_multi_async_call - ProcessGroupRpcTestWithSpawn
// distributed/rpc/test_process_group_agent.py
@ -1738,26 +1751,6 @@ PyObject* THPVariable_pynew(
END_HANDLE_TH_ERRORS
}
static void clear_slots(PyTypeObject* type, PyObject* self) {
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
Py_ssize_t i, n;
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
PyMemberDef* mp;
n = Py_SIZE(type);
mp = type->tp_members;
for (i = 0; i < n; i++, mp++) {
if (mp->type == T_OBJECT_EX && !(mp->flags & READONLY)) {
char* addr = (char*)self + mp->offset;
PyObject* obj = *(PyObject**)addr;
if (obj != nullptr) {
*(PyObject**)addr = nullptr;
Py_DECREF(obj);
}
}
}
}
// NB: this is not the tp_dealloc on THPVariable; instead, its the dealloc
// on subclasses. It's never valid to construct a THPVariable so it's not
// necessary to implement the dealloc for that case
@ -1877,8 +1870,8 @@ static PyObject* THPVariable_NewWithVar(
// This function overwrite the Tensor's pyobj field without extra checks
// Make sure it is not set otherwise we would leak memory
auto mb_obj =
_var.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(getPyInterpreter());
auto mb_obj = _var.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(
getPyInterpreter(), /*ignore_hermetic_tls=*/false);
// Under some circumstances, we may attempt to create a new Python
// object for a variable that already has a Python object. The most common