Revert "[BE] Make PyObjectSlot use a global PyInterpreter (#162659)"

This reverts commit 05ee8114f818a95745c812c3cd7aa8e784e61a9a.

Reverted https://github.com/pytorch/pytorch/pull/162659 on behalf of https://github.com/jeanschmidt due to seems to have introduced errors in linting see https://github.com/pytorch/pytorch/actions/runs/17750689989/job/50444910643 ([comment](https://github.com/pytorch/pytorch/pull/162659#issuecomment-3298626136))
This commit is contained in:
PyTorch MergeBot
2025-09-16 12:52:57 +00:00
parent cef815dc2c
commit 4db203f875
10 changed files with 74 additions and 43 deletions

View File

@ -127,8 +127,6 @@ jobs:
uses: ./.github/workflows/_linux-build.yml uses: ./.github/workflows/_linux-build.yml
needs: get-label-type needs: get-label-type
with: with:
# More memory is needed to build with asan
runner: linux.2xlarge.memory
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-jammy-py3.10-clang18-asan build-environment: linux-jammy-py3.10-clang18-asan
docker-image-name: ci-image:pytorch-linux-jammy-py3-clang18-asan docker-image-name: ci-image:pytorch-linux-jammy-py3-clang18-asan

View File

@ -3269,7 +3269,7 @@ class C10_TensorImpl_Size_Check_Dummy_Class : private TensorImpl {
is_le<sizeof(autograd_meta_), 16, FieldNameEnum::autograd_meta_>(); is_le<sizeof(autograd_meta_), 16, FieldNameEnum::autograd_meta_>();
is_le<sizeof(extra_meta_), 16, FieldNameEnum::extra_meta_>(); is_le<sizeof(extra_meta_), 16, FieldNameEnum::extra_meta_>();
are_equal<sizeof(version_counter_), 8, FieldNameEnum::version_counter_>(); are_equal<sizeof(version_counter_), 8, FieldNameEnum::version_counter_>();
are_equal<sizeof(pyobj_slot_), 8, FieldNameEnum::pyobj_slot_>(); are_equal<sizeof(pyobj_slot_), 16, FieldNameEnum::pyobj_slot_>();
are_equal<sizeof(sizes_and_strides_), 88, FieldNameEnum::sizes_and_strides_>(); are_equal<sizeof(sizes_and_strides_), 88, FieldNameEnum::sizes_and_strides_>();
are_equal<sizeof(storage_offset_), 8, FieldNameEnum::storage_offset_>(); are_equal<sizeof(storage_offset_), 8, FieldNameEnum::storage_offset_>();
are_equal<sizeof(numel_), 8, FieldNameEnum::numel_>(); are_equal<sizeof(numel_), 8, FieldNameEnum::numel_>();

View File

@ -13,10 +13,11 @@ struct C10_API PyInterpreterHooksInterface {
// Get the PyInterpreter instance // Get the PyInterpreter instance
// Stub implementation throws error when Python is not available // Stub implementation throws error when Python is not available
// We return nullptr rather than throwing an error since there are bits of c10
// that expect an empty PyObjectSlot when python is not available.
virtual PyInterpreter* getPyInterpreter() const { virtual PyInterpreter* getPyInterpreter() const {
return nullptr; TORCH_CHECK(
false,
"PyTorch was compiled without Python support. "
"Cannot access Python interpreter from C++.");
} }
}; };

View File

@ -2,7 +2,7 @@
namespace c10::impl { namespace c10::impl {
PyObjectSlot::PyObjectSlot() : pyobj_(nullptr) {} PyObjectSlot::PyObjectSlot() : pyobj_interpreter_(nullptr), pyobj_(nullptr) {}
PyObjectSlot::~PyObjectSlot() { PyObjectSlot::~PyObjectSlot() {
maybe_destroy_pyobj(); maybe_destroy_pyobj();
@ -10,9 +10,9 @@ PyObjectSlot::~PyObjectSlot() {
void PyObjectSlot::maybe_destroy_pyobj() { void PyObjectSlot::maybe_destroy_pyobj() {
if (owns_pyobj()) { if (owns_pyobj()) {
TORCH_INTERNAL_ASSERT(getGlobalPyInterpreter() != nullptr); TORCH_INTERNAL_ASSERT(pyobj_interpreter_ != nullptr);
TORCH_INTERNAL_ASSERT(pyobj_ != nullptr); TORCH_INTERNAL_ASSERT(pyobj_ != nullptr);
(*getGlobalPyInterpreter()) (*pyobj_interpreter_.load(std::memory_order_acquire))
->decref(_unchecked_untagged_pyobj(), /*has_pyobj_slot*/ true); ->decref(_unchecked_untagged_pyobj(), /*has_pyobj_slot*/ true);
// NB: this destructor can only be entered when there are no // NB: this destructor can only be entered when there are no
// references to this C++ object (obviously), NOR any references // references to this C++ object (obviously), NOR any references
@ -25,7 +25,7 @@ void PyObjectSlot::maybe_destroy_pyobj() {
} }
PyInterpreter* PyObjectSlot::pyobj_interpreter() { PyInterpreter* PyObjectSlot::pyobj_interpreter() {
return getGlobalPyInterpreter(); return pyobj_interpreter_.load(std::memory_order_acquire);
} }
PyObject* PyObjectSlot::_unchecked_untagged_pyobj() const { PyObject* PyObjectSlot::_unchecked_untagged_pyobj() const {
@ -35,7 +35,7 @@ PyObject* PyObjectSlot::_unchecked_untagged_pyobj() const {
} }
PyInterpreter& PyObjectSlot::load_pyobj_interpreter() const { PyInterpreter& PyObjectSlot::load_pyobj_interpreter() const {
auto interpreter = getGlobalPyInterpreter(); auto interpreter = pyobj_interpreter_.load(std::memory_order_acquire);
if (interpreter) { if (interpreter) {
return *interpreter; return *interpreter;
} }

View File

@ -6,17 +6,10 @@
#include <c10/util/python_stub.h> #include <c10/util/python_stub.h>
#include <optional> #include <optional>
#include <atomic>
namespace c10::impl { namespace c10::impl {
// Function pointer type for getting the global interpreter
using GetPyInterpreterFn = PyInterpreter* (*)();
// Global function pointer (set by csrc initialization)
C10_API extern GetPyInterpreterFn g_get_pyinterpreter_fn;
// Helper function to get the global interpreter
C10_API PyInterpreter* getGlobalPyInterpreter();
struct C10_API PyObjectSlot { struct C10_API PyObjectSlot {
public: public:
PyObjectSlot(); PyObjectSlot();
@ -33,6 +26,8 @@ struct C10_API PyObjectSlot {
// NB: THIS FUNCTION CAN RAISE AN EXCEPTION. Make sure to clean up after // NB: THIS FUNCTION CAN RAISE AN EXCEPTION. Make sure to clean up after
// PyObject if necessary! // PyObject if necessary!
void init_pyobj(PyObject* pyobj) { void init_pyobj(PyObject* pyobj) {
pyobj_interpreter_.store(
getGlobalPyInterpreter(), std::memory_order_relaxed);
pyobj_ = pyobj; pyobj_ = pyobj;
} }
@ -60,15 +55,18 @@ struct C10_API PyObjectSlot {
// @todo alban: I'm not too sure what's going on here, we can probably delete // @todo alban: I'm not too sure what's going on here, we can probably delete
// it but it's worthwhile making sure // it but it's worthwhile making sure
std::optional<PyObject*> check_pyobj() const { std::optional<PyObject*> check_pyobj(bool ignore_hermetic_tls = false) const {
impl::PyInterpreter* interpreter = getGlobalPyInterpreter(); impl::PyInterpreter* interpreter =
if (interpreter == nullptr || pyobj_ == nullptr) { pyobj_interpreter_.load(std::memory_order_acquire);
if (interpreter == nullptr) {
return std::nullopt; return std::nullopt;
} }
if (c10::impl::HermeticPyObjectTLS::get_state()) {
if (!ignore_hermetic_tls && c10::impl::HermeticPyObjectTLS::get_state()) {
return std::nullopt; return std::nullopt;
} else {
return _unchecked_untagged_pyobj();
} }
return _unchecked_untagged_pyobj();
} }
PyInterpreter& load_pyobj_interpreter() const; PyInterpreter& load_pyobj_interpreter() const;
@ -78,6 +76,30 @@ struct C10_API PyObjectSlot {
void set_owns_pyobj(bool b); void set_owns_pyobj(bool b);
private: private:
// This field contains the interpreter tag for this object. See
// Note [Python interpreter tag] for general context
//
// Note [Memory ordering on Python interpreter tag]
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// What memory_order do we need when accessing this atomic? We don't
// need a single total modification order (as provided by
// memory_order_seq_cst) as pyobj_interpreter_ is monotonic: it can only
// transition from -1 to some positive integer and never changes afterwards.
// Because there is only one modification, it trivially already has a total
// modification order (e.g., we don't need fences or locked instructions on
// x86)
//
// In fact, one could make a reasonable argument that relaxed reads are OK,
// due to the presence of external locking (GIL) to ensure that interactions
// with other data structures are still correctly synchronized, so that
// we fall in the "Single-Location Data Structures" case as described in
// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2055r0.pdf
// However, on x86, it doesn't matter if I use acquire or relaxed on the load
// as I get the same assembly in both cases. So I just use the more
// conservative acquire (which will impede compiler optimizations but I don't
// care)
std::atomic<PyInterpreter*> pyobj_interpreter_;
// This field contains a reference to a PyObject representing this Tensor. // This field contains a reference to a PyObject representing this Tensor.
// If pyobj is nullptr, when we transfer Tensor to Python, we allocate a new // If pyobj is nullptr, when we transfer Tensor to Python, we allocate a new
// PyObject for it and set this field. This field does not have to be // PyObject for it and set this field. This field does not have to be

View File

@ -1187,7 +1187,8 @@ int64_t _Tensor_ndim(mpy::handle h) {
mpy::handle handle_from_tensor(Arena& A, TensorRef t) { mpy::handle handle_from_tensor(Arena& A, TensorRef t) {
// fast case: tensor is live in python // fast case: tensor is live in python
std::optional<PyObject*> mb_obj = std::optional<PyObject*> mb_obj =
t->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(); t->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(
/*ignore_hermetic_tls=*/false);
if (mb_obj.has_value() && if (mb_obj.has_value() &&
!t->unsafeGetTensorImpl()->pyobj_slot()->owns_pyobj()) { !t->unsafeGetTensorImpl()->pyobj_slot()->owns_pyobj()) {
return *mb_obj; return *mb_obj;

View File

@ -403,9 +403,11 @@ static PyObject* THPModule_swap_tensor_impl(PyObject* _unused, PyObject* args) {
// The TensorImpls contain PyObjectSlots that have a reference to the PyObject // The TensorImpls contain PyObjectSlots that have a reference to the PyObject
// associated with the TensorImpl. Swap this field as well. // associated with the TensorImpl. Swap this field as well.
std::optional<PyObject*> mb_obj_a = std::optional<PyObject*> mb_obj_a =
a->cdata->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(); a->cdata->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(
/*ignore_hermetic_tls=*/false);
std::optional<PyObject*> mb_obj_b = std::optional<PyObject*> mb_obj_b =
b->cdata->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(); b->cdata->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(
/*ignore_hermetic_tls=*/false);
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
mb_obj_a.has_value() && mb_obj_b.has_value(), mb_obj_a.has_value() && mb_obj_b.has_value(),
"Both tensors should have PyObjects tagged by the current python interpreter"); "Both tensors should have PyObjects tagged by the current python interpreter");

View File

@ -614,7 +614,8 @@ static void set_tensor_attr_with_capsule(
const c10::TensorImpl* tensor, const c10::TensorImpl* tensor,
py::capsule& capsule, py::capsule& capsule,
const char* attr_name) { const char* attr_name) {
std::optional<PyObject*> mb_obj = tensor->pyobj_slot()->check_pyobj(); std::optional<PyObject*> mb_obj = tensor->pyobj_slot()->check_pyobj(
/*ignore_hermetic_tls=*/false);
TORCH_CHECK( TORCH_CHECK(
mb_obj.has_value(), "Tensor subclass's PyInterpreter has no value"); mb_obj.has_value(), "Tensor subclass's PyInterpreter has no value");
auto obj = mb_obj.value(); auto obj = mb_obj.value();
@ -641,7 +642,8 @@ static c10::ArrayRef<T> get_set_cached_attr(
const c10::TensorImpl* tensor, const c10::TensorImpl* tensor,
const char* base_attr_name, const char* base_attr_name,
const py::object& obj) { const py::object& obj) {
std::optional<PyObject*> mb_obj = tensor->pyobj_slot()->check_pyobj(); std::optional<PyObject*> mb_obj =
tensor->pyobj_slot()->check_pyobj(getPyInterpreter());
TORCH_CHECK( TORCH_CHECK(
mb_obj.has_value(), "Tensor subclass's PyInterpreter has no value"); mb_obj.has_value(), "Tensor subclass's PyInterpreter has no value");
auto tensor_obj = mb_obj.value(); auto tensor_obj = mb_obj.value();

View File

@ -41,8 +41,8 @@ PyObject* THPStorage_NewWithStorage(
"Creating a Storage subclass from a class that does not inherit from ", "Creating a Storage subclass from a class that does not inherit from ",
"Storage is not possible. Make sure your class inherits from Storage."); "Storage is not possible. Make sure your class inherits from Storage.");
auto maybe_pyobj = auto maybe_pyobj = _storage.unsafeGetStorageImpl()->pyobj_slot()->check_pyobj(
_storage.unsafeGetStorageImpl()->pyobj_slot()->check_pyobj(); /*ignore_hermetic_tls=*/false);
if (maybe_pyobj.has_value() && maybe_pyobj.value()) { if (maybe_pyobj.has_value() && maybe_pyobj.value()) {
TORCH_CHECK( TORCH_CHECK(
allow_preexisting_pyobj, allow_preexisting_pyobj,
@ -93,7 +93,8 @@ PyObject* THPStorage_Wrap(c10::Storage storage) {
} }
c10::impl::PyObjectSlot* pyobj_slot = storage_impl->pyobj_slot(); c10::impl::PyObjectSlot* pyobj_slot = storage_impl->pyobj_slot();
std::optional<PyObject*> maybe_pyobj = pyobj_slot->check_pyobj(); std::optional<PyObject*> maybe_pyobj = pyobj_slot->check_pyobj(
/*ignore_hermetic_tls=*/false);
if (maybe_pyobj.has_value()) { if (maybe_pyobj.has_value()) {
auto obj = *maybe_pyobj; auto obj = *maybe_pyobj;
if (obj) { if (obj) {
@ -126,8 +127,8 @@ static bool THPStorage_isPreservable(THPStorage* self) {
return false; return false;
} }
if (storage.unsafeGetStorageImpl()->pyobj_slot()->check_pyobj() != if (storage.unsafeGetStorageImpl()->pyobj_slot()->check_pyobj(
(PyObject*)self) { /*ignore_hermetic_tls=*/true) != (PyObject*)self) {
return false; return false;
} }
if (storage.use_count() <= 1) { if (storage.use_count() <= 1) {
@ -144,7 +145,8 @@ static bool THPStorage_tryPreserve(THPStorage* self) {
const auto& storage = THPStorage_Unpack(self); const auto& storage = THPStorage_Unpack(self);
c10::StorageImpl* storage_impl = storage.unsafeGetStorageImpl(); c10::StorageImpl* storage_impl = storage.unsafeGetStorageImpl();
auto maybe_pyobj = storage_impl->pyobj_slot()->check_pyobj(); auto maybe_pyobj = storage_impl->pyobj_slot()->check_pyobj(
/*ignore_hermetic_tls=*/true);
// NOTE: It is possible to just set the PyObjectSlot here, but the point is // NOTE: It is possible to just set the PyObjectSlot here, but the point is
// that we should have already set PyObjectSlot when the storage PyObject // that we should have already set PyObjectSlot when the storage PyObject
// was created. // was created.

View File

@ -265,7 +265,8 @@ PyObject* THPVariable_Wrap(const at::TensorBase& var) {
} }
std::optional<PyObject*> mb_obj = std::optional<PyObject*> mb_obj =
var.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(); var.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(
/*ignore_hermetic_tls=*/false);
if (mb_obj.has_value()) { if (mb_obj.has_value()) {
auto obj = *mb_obj; auto obj = *mb_obj;
if (obj) { if (obj) {
@ -328,8 +329,8 @@ static bool isResurrectable(THPVariable* self) {
return false; return false;
} }
// Check if this is hermetic. If it is, no resurrection. // Check if this is hermetic. If it is, no resurrection.
if (tensor.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj() != if (tensor.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(
(PyObject*)self) { /*ignore_hermetic_tls=*/false) != (PyObject*)self) {
return false; return false;
} }
return true; return true;
@ -354,7 +355,8 @@ static bool THPVariable_tryResurrect(THPVariable* self) {
!tensor.unsafeGetTensorImpl()->pyobj_slot()->owns_pyobj()); !tensor.unsafeGetTensorImpl()->pyobj_slot()->owns_pyobj());
c10::TensorImpl* tensor_impl = tensor.unsafeGetTensorImpl(); c10::TensorImpl* tensor_impl = tensor.unsafeGetTensorImpl();
auto maybe_pyobj = tensor_impl->pyobj_slot()->check_pyobj(); auto maybe_pyobj = tensor_impl->pyobj_slot()->check_pyobj(
/*ignore_hermetic_tls=*/false);
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
maybe_pyobj.has_value(), maybe_pyobj.has_value(),
@ -1932,8 +1934,8 @@ static int THPVariable_subclass_clear(THPVariable* self) {
// because Tensor asked us to (it's already destructing). // because Tensor asked us to (it's already destructing).
if (!self->cdata.unsafeIsBorrowed() && if (!self->cdata.unsafeIsBorrowed() &&
tensor.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj() == tensor.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(
(PyObject*)self) { /*ignore_hermetic_tls=*/false) == (PyObject*)self) {
// TODO: empirically, on OS X this assert appears to be untrue // TODO: empirically, on OS X this assert appears to be untrue
// In test_py_tensors_multi_async_call - ProcessGroupRpcTestWithSpawn // In test_py_tensors_multi_async_call - ProcessGroupRpcTestWithSpawn
// distributed/rpc/test_process_group_agent.py // distributed/rpc/test_process_group_agent.py
@ -2119,7 +2121,8 @@ static PyObject* THPVariable_NewWithVar(
// This function overwrite the Tensor's pyobj field without extra checks // This function overwrite the Tensor's pyobj field without extra checks
// Make sure it is not set otherwise we would leak memory // Make sure it is not set otherwise we would leak memory
auto mb_obj = _var.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(); auto mb_obj = _var.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(
/*ignore_hermetic_tls=*/false);
// Under some circumstances, we may attempt to create a new Python // Under some circumstances, we may attempt to create a new Python
// object for a variable that already has a Python object. The most common // object for a variable that already has a Python object. The most common