Files
pytorch/torch/csrc/autograd/python_variable.h
Sam Gross 6ca8cc6edf Rework PyObject preservation (#166342)
Make the PyObject preservation scheme thread-safe with free threaded (nogil) Python. The general idea is:

* Python Tensor and Storage objects always hold a strong reference to their underlying c10 object
* c10 objects hold a strong reference to their Python objects if there's at least one other reference to the c10 object

This is implemented in `intrusive_ptr`:

* The top most bit (`kHasPyObject`) from the weakref count is now used to indicate if the `intrusive_ptr_target` has an associated PyObject. So `kHasPyObject` is one bit, the weakref count is now 31 bits and the strong refcount remains 32 bits.
* When the reference count increases from one to two and `kHasPyObject` is set, we incref the associated Python object to ensure that it's kept alive.
* When the reference count decreases from two to one (i.e., there are no C++ reference to the `intrusive_ptr_target` other than from the Python object), we decre the associated Python object to break the cycle.

Other benefits:

* We can delete a lot of the copypasta from Python internal `subtype_dealloc`
* This fixes the weakref and GC bugs we had in the previous scheme. Python weakrefs on Tensors and Storages should just work as expected now.

Risks:

* Extra branch for reference count operations on `intrusive_ptr<TensorImpl>`, `intrusive_ptr<StorageImpl>`, and the generic `intrusive_ptr<intrusive_ptr_target>` even when we're not using Python.
* It's a big change
Pull Request resolved: https://github.com/pytorch/pytorch/pull/166342
Approved by: https://github.com/albanD
2025-11-10 21:47:53 +00:00

116 lines
3.5 KiB
C++

#pragma once
#include <ATen/core/Tensor.h>
#include <torch/csrc/python_headers.h>
#include <torch/csrc/utils/pythoncapi_compat.h>
#include <ATen/core/function_schema.h>
#include <pybind11/pybind11.h>
#include <torch/csrc/Exceptions.h>
#include <torch/csrc/Export.h>
#include <torch/csrc/autograd/variable.h>
#include <torch/csrc/utils/pybind.h>
namespace py = pybind11;
// Python object that backs torch.autograd.Variable
struct THPVariable {
PyObject_HEAD
// Payload
at::Tensor cdata;
// Hooks to be run on backwards pass (corresponds to Python attr
// '_backwards_hooks', set by 'register_hook')
PyObject* backward_hooks = nullptr;
// Hooks to be run in the backwards pass after accumulate grad,
// i.e., after the .grad has been set (corresponds to Python attr
// '_post_accumulate_grad_hooks', set by 'register_post_accumulate_grad_hook')
PyObject* post_accumulate_grad_hooks = nullptr;
};
TORCH_PYTHON_API void registerPythonTensorClass(
const std::string& device,
PyObject* python_tensor_class);
TORCH_PYTHON_API void activateGPUTrace();
TORCH_PYTHON_API extern PyObject* THPVariableClass;
TORCH_PYTHON_API extern PyObject* ParameterClass;
bool THPVariable_initModule(PyObject* module);
TORCH_PYTHON_API PyObject* THPVariable_Wrap(at::TensorBase&& var);
TORCH_PYTHON_API PyObject* THPVariable_Wrap(const at::TensorBase& var);
inline bool THPVariable_CheckTypeExact(PyTypeObject* tp) {
// Check that a python object is a `Tensor`, but not a `Tensor` subclass.
// (A subclass could have different semantics.) The one exception is
// Parameter, which is used for Python bookkeeping but is equivalent to
// Tensor as far as C++ is concerned.
return (
tp == (PyTypeObject*)THPVariableClass ||
tp == (PyTypeObject*)ParameterClass);
}
inline bool THPVariable_CheckExact(PyObject* obj) {
return THPVariable_CheckTypeExact(Py_TYPE(obj));
}
inline bool THPVariable_Check(PyObject* obj) {
if (!THPVariableClass)
return false;
// Fast path
if (THPVariable_CheckExact(obj)) {
return true;
}
const auto result = PyObject_IsInstance(obj, THPVariableClass);
if (result == -1)
throw python_error();
return result;
}
inline const at::Tensor& THPVariable_Unpack(THPVariable* var) {
return var->cdata;
}
inline const at::Tensor& THPVariable_Unpack(PyObject* obj) {
return THPVariable_Unpack(reinterpret_cast<THPVariable*>(obj));
}
std::pair<py::object, py::dict> parseIValuesToPyArgsKwargs(
const c10::OperatorHandle& op,
const std::vector<c10::IValue>& arguments);
void pushPyOutToStack(
const c10::OperatorHandle& op,
torch::jit::Stack* stack,
py::object out,
const char* msg);
inline PyObject* THPVariable_WrapList(
const torch::autograd::variable_list& inputs) {
PyObject* pyinput = PyList_New(static_cast<Py_ssize_t>(inputs.size()));
for (const auto i : c10::irange(inputs.size())) {
PyList_SET_ITEM(pyinput, i, THPVariable_Wrap(inputs[i]));
}
return pyinput;
}
inline torch::autograd::variable_list THPVariable_UnpackList(
PyObject* pyresult) {
TORCH_CHECK(PyList_CheckExact(pyresult));
auto result_len = PyList_GET_SIZE(pyresult);
torch::autograd::variable_list result;
result.reserve(result_len);
for (const auto i : c10::irange(result_len)) {
PyObject* item = PyList_GET_ITEM(pyresult, i);
if (!Py_IsNone(item)) {
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(THPVariable_Check(item));
result.emplace_back(THPVariable_Unpack(item));
} else {
result.emplace_back();
}
}
return result;
}