Revert "[BE] Make PyObjectSlot use a global PyInterpreter (#162659)"

This reverts commit 29cbcbac4215e0d9070a1b7a07ddaec9a36bbd08.

Reverted https://github.com/pytorch/pytorch/pull/162659 on behalf of https://github.com/izaitsevfb due to reverted internally, see [D83214133](https://www.internalfb.com/diff/D83214133) ([comment](https://github.com/pytorch/pytorch/pull/162659#issuecomment-3369348172))
This commit is contained in:
PyTorch MergeBot
2025-10-05 21:39:56 +00:00
parent 2c5ed6e7c0
commit 331191ce4b
10 changed files with 73 additions and 45 deletions

View File

@ -3244,7 +3244,7 @@ class C10_TensorImpl_Size_Check_Dummy_Class : private TensorImpl {
are_equal<sizeof(autograd_meta_), 4, FieldNameEnum::autograd_meta_>();
are_equal<sizeof(extra_meta_), 4, FieldNameEnum::extra_meta_>();
are_equal<sizeof(version_counter_), 4, FieldNameEnum::version_counter_>();
are_equal<sizeof(pyobj_slot_), 4, FieldNameEnum::pyobj_slot_>();
are_equal<sizeof(pyobj_slot_), 8, FieldNameEnum::pyobj_slot_>();
is_le<sizeof(sizes_and_strides_), 88, FieldNameEnum::sizes_and_strides_>();
are_equal<sizeof(storage_offset_), 8, FieldNameEnum::storage_offset_>();
are_equal<sizeof(numel_), 8, FieldNameEnum::numel_>();
@ -3269,7 +3269,7 @@ class C10_TensorImpl_Size_Check_Dummy_Class : private TensorImpl {
is_le<sizeof(autograd_meta_), 16, FieldNameEnum::autograd_meta_>();
is_le<sizeof(extra_meta_), 16, FieldNameEnum::extra_meta_>();
are_equal<sizeof(version_counter_), 8, FieldNameEnum::version_counter_>();
are_equal<sizeof(pyobj_slot_), 8, FieldNameEnum::pyobj_slot_>();
are_equal<sizeof(pyobj_slot_), 16, FieldNameEnum::pyobj_slot_>();
are_equal<sizeof(sizes_and_strides_), 88, FieldNameEnum::sizes_and_strides_>();
are_equal<sizeof(storage_offset_), 8, FieldNameEnum::storage_offset_>();
are_equal<sizeof(numel_), 8, FieldNameEnum::numel_>();

View File

@ -13,10 +13,11 @@ struct C10_API PyInterpreterHooksInterface {
// Get the PyInterpreter instance
// Stub implementation throws error when Python is not available
// We return nullptr rather than throwing an error since there are bits of c10
// that expect an empty PyObjectSlot when python is not available.
virtual PyInterpreter* getPyInterpreter() const {
return nullptr;
TORCH_CHECK(
false,
"PyTorch was compiled without Python support. "
"Cannot access Python interpreter from C++.");
}
};

View File

@ -2,7 +2,7 @@
namespace c10::impl {
PyObjectSlot::PyObjectSlot() : pyobj_(nullptr) {}
PyObjectSlot::PyObjectSlot() : pyobj_interpreter_(nullptr), pyobj_(nullptr) {}
PyObjectSlot::~PyObjectSlot() {
maybe_destroy_pyobj();
@ -10,9 +10,9 @@ PyObjectSlot::~PyObjectSlot() {
void PyObjectSlot::maybe_destroy_pyobj() {
if (owns_pyobj()) {
TORCH_INTERNAL_ASSERT(getGlobalPyInterpreter() != nullptr);
TORCH_INTERNAL_ASSERT(pyobj_interpreter_ != nullptr);
TORCH_INTERNAL_ASSERT(pyobj_ != nullptr);
(*getGlobalPyInterpreter())
(*pyobj_interpreter_.load(std::memory_order_acquire))
->decref(_unchecked_untagged_pyobj(), /*has_pyobj_slot*/ true);
// NB: this destructor can only be entered when there are no
// references to this C++ object (obviously), NOR any references
@ -25,7 +25,7 @@ void PyObjectSlot::maybe_destroy_pyobj() {
}
PyInterpreter* PyObjectSlot::pyobj_interpreter() {
return getGlobalPyInterpreter();
return pyobj_interpreter_.load(std::memory_order_acquire);
}
PyObject* PyObjectSlot::_unchecked_untagged_pyobj() const {
@ -35,7 +35,7 @@ PyObject* PyObjectSlot::_unchecked_untagged_pyobj() const {
}
PyInterpreter& PyObjectSlot::load_pyobj_interpreter() const {
auto interpreter = getGlobalPyInterpreter();
auto interpreter = pyobj_interpreter_.load(std::memory_order_acquire);
if (interpreter) {
return *interpreter;
}

View File

@ -6,17 +6,10 @@
#include <c10/util/python_stub.h>
#include <optional>
#include <atomic>
namespace c10::impl {
// Function pointer type for getting the global interpreter
using GetPyInterpreterFn = PyInterpreter* (*)();
// Global function pointer (set by csrc initialization)
C10_API extern GetPyInterpreterFn g_get_pyinterpreter_fn;
// Helper function to get the global interpreter
C10_API PyInterpreter* getGlobalPyInterpreter();
struct C10_API PyObjectSlot {
public:
PyObjectSlot();
@ -33,6 +26,8 @@ struct C10_API PyObjectSlot {
// NB: THIS FUNCTION CAN RAISE AN EXCEPTION. Make sure to clean up after
// PyObject if necessary!
void init_pyobj(PyObject* pyobj) {
pyobj_interpreter_.store(
getGlobalPyInterpreter(), std::memory_order_relaxed);
pyobj_ = pyobj;
}
@ -60,15 +55,18 @@ struct C10_API PyObjectSlot {
// @todo alban: I'm not too sure what's going on here, we can probably delete
// it but it's worthwhile making sure
std::optional<PyObject*> check_pyobj() const {
impl::PyInterpreter* interpreter = getGlobalPyInterpreter();
if (interpreter == nullptr || pyobj_ == nullptr) {
std::optional<PyObject*> check_pyobj(bool ignore_hermetic_tls = false) const {
impl::PyInterpreter* interpreter =
pyobj_interpreter_.load(std::memory_order_acquire);
if (interpreter == nullptr) {
return std::nullopt;
}
if (c10::impl::HermeticPyObjectTLS::get_state()) {
if (!ignore_hermetic_tls && c10::impl::HermeticPyObjectTLS::get_state()) {
return std::nullopt;
} else {
return _unchecked_untagged_pyobj();
}
return _unchecked_untagged_pyobj();
}
PyInterpreter& load_pyobj_interpreter() const;
@ -78,6 +76,30 @@ struct C10_API PyObjectSlot {
void set_owns_pyobj(bool b);
private:
// This field contains the interpreter tag for this object. See
// Note [Python interpreter tag] for general context
//
// Note [Memory ordering on Python interpreter tag]
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// What memory_order do we need when accessing this atomic? We don't
// need a single total modification order (as provided by
// memory_order_seq_cst) as pyobj_interpreter_ is monotonic: it can only
// transition from -1 to some positive integer and never changes afterwards.
// Because there is only one modification, it trivially already has a total
// modification order (e.g., we don't need fences or locked instructions on
// x86)
//
// In fact, one could make a reasonable argument that relaxed reads are OK,
// due to the presence of external locking (GIL) to ensure that interactions
// with other data structures are still correctly synchronized, so that
// we fall in the "Single-Location Data Structures" case as described in
// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2055r0.pdf
// However, on x86, it doesn't matter if I use acquire or relaxed on the load
// as I get the same assembly in both cases. So I just use the more
// conservative acquire (which will impede compiler optimizations but I don't
// care)
std::atomic<PyInterpreter*> pyobj_interpreter_;
// This field contains a reference to a PyObject representing this Tensor.
// If pyobj is nullptr, when we transfer Tensor to Python, we allocate a new
// PyObject for it and set this field. This field does not have to be