From 94195a37ae4eae9c486a81b0f67725c8970f74d6 Mon Sep 17 00:00:00 2001 From: PaliC Date: Thu, 25 Sep 2025 22:11:36 +0900 Subject: [PATCH] [BE] Remove HermeticPyObjectTLS and Simplify PythonOpRegistrationTrampoline (#163464) Removes HermeticPyObjectTLS as we no longer need since torch deploy is no longer supported. PythonOpRegistrationTrampoline is also drastically simplified as and being prepped for removal in a future PR. Pull Request resolved: https://github.com/pytorch/pytorch/pull/163464 Approved by: https://github.com/albanD, https://github.com/Skylion007 --- .../core/PythonOpRegistrationTrampoline.cpp | 24 +++---- .../core/PythonOpRegistrationTrampoline.h | 10 +-- c10/core/impl/HermeticPyObjectTLS.cpp | 21 ------- c10/core/impl/HermeticPyObjectTLS.h | 62 ------------------- c10/core/impl/PyObjectSlot.h | 20 +----- cmake/prioritized_text.txt | 1 - torch/csrc/PyInterpreter.cpp | 13 ++-- torch/csrc/Storage.cpp | 13 +--- torch/csrc/autograd/python_variable.cpp | 35 +++-------- torch/csrc/utils/python_dispatch.cpp | 34 ---------- 10 files changed, 31 insertions(+), 202 deletions(-) delete mode 100644 c10/core/impl/HermeticPyObjectTLS.cpp delete mode 100644 c10/core/impl/HermeticPyObjectTLS.h diff --git a/aten/src/ATen/core/PythonOpRegistrationTrampoline.cpp b/aten/src/ATen/core/PythonOpRegistrationTrampoline.cpp index 219d774de3a5..f50b2507d914 100644 --- a/aten/src/ATen/core/PythonOpRegistrationTrampoline.cpp +++ b/aten/src/ATen/core/PythonOpRegistrationTrampoline.cpp @@ -1,32 +1,22 @@ #include +#include +// TODO: delete this namespace at::impl { -// The strategy is that all python interpreters attempt to register themselves -// as the main interpreter, but only one wins. Only that interpreter is -// allowed to interact with the C++ dispatcher. Furthermore, when we execute -// logic on that interpreter, we do so hermetically, never setting pyobj field -// on Tensor. - -std::atomic - PythonOpRegistrationTrampoline::interpreter_{nullptr}; +c10::impl::PyInterpreter* PythonOpRegistrationTrampoline::interpreter_ = nullptr; c10::impl::PyInterpreter* PythonOpRegistrationTrampoline::getInterpreter() { - return PythonOpRegistrationTrampoline::interpreter_.load(); + return c10::impl::getGlobalPyInterpreter(); } bool PythonOpRegistrationTrampoline::registerInterpreter( c10::impl::PyInterpreter* interp) { - c10::impl::PyInterpreter* expected = nullptr; - interpreter_.compare_exchange_strong(expected, interp); - if (expected != nullptr) { - // This is the second (or later) Python interpreter, which means we need - // non-trivial hermetic PyObject TLS - c10::impl::HermeticPyObjectTLS::init_state(); + if (interpreter_ != nullptr) { return false; - } else { - return true; } + interpreter_ = interp; + return true; } } // namespace at::impl diff --git a/aten/src/ATen/core/PythonOpRegistrationTrampoline.h b/aten/src/ATen/core/PythonOpRegistrationTrampoline.h index bec323c7d25b..062dbebc3ceb 100644 --- a/aten/src/ATen/core/PythonOpRegistrationTrampoline.h +++ b/aten/src/ATen/core/PythonOpRegistrationTrampoline.h @@ -2,19 +2,21 @@ #include -// TODO: this can probably live in c10 +// TODO: We can get rid of this namespace at::impl { +// Manages the single Python interpreter instance for PyTorch. class TORCH_API PythonOpRegistrationTrampoline final { - static std::atomic interpreter_; + static c10::impl::PyInterpreter* interpreter_; public: - // Returns true if you successfully registered yourself (that means - // you are in the hot seat for doing the operator registrations!) + // Register the Python interpreter. Returns true on first registration, + // false if an interpreter was already registered. static bool registerInterpreter(c10::impl::PyInterpreter*); + // Returns the registered interpreter via the global PyInterpreter hooks. // Returns nullptr if no interpreter has been registered yet. static c10::impl::PyInterpreter* getInterpreter(); }; diff --git a/c10/core/impl/HermeticPyObjectTLS.cpp b/c10/core/impl/HermeticPyObjectTLS.cpp deleted file mode 100644 index 856c63a93a92..000000000000 --- a/c10/core/impl/HermeticPyObjectTLS.cpp +++ /dev/null @@ -1,21 +0,0 @@ -#include - -namespace c10::impl { - -thread_local static std::atomic hermeticPyObjectState{false}; - -std::atomic HermeticPyObjectTLS::haveState_{false}; - -void HermeticPyObjectTLS::set_state(bool state) { - hermeticPyObjectState = state; -} - -bool HermeticPyObjectTLS::get_tls_state() { - return hermeticPyObjectState; -} - -void HermeticPyObjectTLS::init_state() { - haveState_ = true; -} - -} // namespace c10::impl diff --git a/c10/core/impl/HermeticPyObjectTLS.h b/c10/core/impl/HermeticPyObjectTLS.h deleted file mode 100644 index a973a5d2cef8..000000000000 --- a/c10/core/impl/HermeticPyObjectTLS.h +++ /dev/null @@ -1,62 +0,0 @@ -#pragma once - -#include -#include - -namespace c10::impl { - -// This TLS controls whether or not we permanently associate PyObject -// with Tensor the first time it is allocated. When hermetic PyObject -// TLS is enabled (state is true), we DO NOT save PyObjects to Tensor, -// meaning you get a distinct PyObject whenever you execute the code in -// question. -struct C10_API HermeticPyObjectTLS { - static void set_state(bool state); - static bool get_state() { - // Hypothetical fastpath if torchdeploy/multipy // codespell:ignore multipy - // isn't used. Per - // https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2055r0.pdf - // this qualifies relaxed access because it is a single-location data - // structure (only the boolean here). - // - // Forgetting about data races for a moment, is there a logical race? - // - // - Boolean only ever transitions from false to true. So the - // critical situation is when one interpreter is already running - // when a second interpreter switches haveState from false to true. - // - // - The first interpreter is indifferent whether or not it sees - // hasState true/false; obviously false works (this is what the - // interpreter was previously using; more directly, the interpreter - // calls into itself as the handler, so being hermetic is not - // required), and true simply means serviced python operator calls will - // be hermetic; in these cases it is expected to be functionally - // equivalent. - // - // - The second interpreter MUST see hasState true (as its requests will - // be forwarded to the first interpreter), but it is assumed that there - // is a synchronization between the interpreter initialization, and - // when we actually perform operations, so it is guaranteed to see - // hasState true. - // - // QED. - // - // This fastpath is currently disabled so that we can more easily test that - // hermetic mode works correctly even on stock build of PyTorch. - if (false && !haveState_.load(std::memory_order_relaxed)) - return false; - return get_tls_state(); - } - // Call this from the multipy/torchdeploy // codespell:ignore multipy - // top level - static void init_state(); - - private: - // This only flipped once from false to true during - // torchdeploy/multipy initialization, // codespell:ignore multipy - // and never again. - static std::atomic haveState_; - static bool get_tls_state(); -}; - -} // namespace c10::impl diff --git a/c10/core/impl/PyObjectSlot.h b/c10/core/impl/PyObjectSlot.h index e7d78f8360c3..8824d33eb224 100644 --- a/c10/core/impl/PyObjectSlot.h +++ b/c10/core/impl/PyObjectSlot.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include @@ -42,32 +41,15 @@ struct C10_API PyObjectSlot { PyObject* _unchecked_untagged_pyobj() const; - // Test the interpreter tag. If tagged for the current interpreter, return - // a non-nullopt (but possibly null) PyObject. If (possibly) untagged, - // returns a nullopt. If it is definitely invalid, raises an error. - // - // If `ignore_hermetic_tls` is false and this function is called from a - // hermetic context (ie, `HermeticPyObjectTLS::get_state()` is true), then - // nullopt is returned. If `ignore_hermetic_tls` is true, then the hermetic - // context is ignored, allowing you to check the interpreter tag of a - // nonhermetic PyObject from within a hermetic context. This is necessary - // because there are some cases where the deallocator function of a - // nonhermetic PyObject is called from within a hermetic context, so it must - // be properly treated as a nonhermetic PyObject. - // + // Test the interpreter / PyObj as they may be null // NB: this lives in header so that we can avoid actually creating the // std::optional - // @todo alban: I'm not too sure what's going on here, we can probably delete - // it but it's worthwhile making sure std::optional check_pyobj() const { impl::PyInterpreter* interpreter = getGlobalPyInterpreter(); if (interpreter == nullptr || pyobj_ == nullptr) { return std::nullopt; } - if (c10::impl::HermeticPyObjectTLS::get_state()) { - return std::nullopt; - } return _unchecked_untagged_pyobj(); } diff --git a/cmake/prioritized_text.txt b/cmake/prioritized_text.txt index e5e36f34f98d..f7d41b0bec3e 100644 --- a/cmake/prioritized_text.txt +++ b/cmake/prioritized_text.txt @@ -153,7 +153,6 @@ _ZN3c104impl12PyObjectSlot10owns_pyobjEv _ZN3c104impl12PyObjectSlot19maybe_destroy_pyobjEv _ZN3c104impl12PyObjectSlotC1Ev _ZN3c104impl12PyObjectSlotD2Ev -_ZN3c104impl19HermeticPyObjectTLS13get_tls_stateEv _ZN3c104impl20TorchDispatchModeTLS13any_modes_setEb _ZN3c104impl23ExcludeDispatchKeyGuardC1ENS_14DispatchKeySetE _ZN3c104impl23ExcludeDispatchKeyGuardD2Ev diff --git a/torch/csrc/PyInterpreter.cpp b/torch/csrc/PyInterpreter.cpp index 993f8b8216a6..b863fd44c152 100644 --- a/torch/csrc/PyInterpreter.cpp +++ b/torch/csrc/PyInterpreter.cpp @@ -157,10 +157,10 @@ class PyInterpreterHolder { public: PyInterpreterHolder() : impl_(new c10::impl::PyInterpreter( - ConcretePyInterpreterVTable::instance())), - is_main_interpreter_( - at::impl::PythonOpRegistrationTrampoline::registerInterpreter( - impl_)) {} + ConcretePyInterpreterVTable::instance())) { + // Register the single interpreter + at::impl::PythonOpRegistrationTrampoline::registerInterpreter(impl_); + } PyInterpreterHolder(const PyInterpreterHolder&) = delete; PyInterpreterHolder(PyInterpreterHolder&&) = delete; PyInterpreterHolder& operator=(const PyInterpreterHolder&) = delete; @@ -174,13 +174,14 @@ class PyInterpreterHolder { c10::impl::PyInterpreter* get() const noexcept { return impl_; } + // In single-interpreter mode, this is always true + // TODO: delete this bool is_main_interpreter() const noexcept { - return is_main_interpreter_; + return true; } private: c10::impl::PyInterpreter* impl_; - bool is_main_interpreter_; }; py::object torchDispatchFromTensorImpl( diff --git a/torch/csrc/Storage.cpp b/torch/csrc/Storage.cpp index f6638bbd10c1..f8fe81c52e84 100644 --- a/torch/csrc/Storage.cpp +++ b/torch/csrc/Storage.cpp @@ -74,13 +74,9 @@ PyObject* THPStorage_NewWithStorage( s->cdata = c10::MaybeOwned::owned(std::move(_storage)); - if (!c10::impl::HermeticPyObjectTLS::get_state()) { - s->is_hermetic = false; - const auto& storage = THPStorage_Unpack(s); - storage.unsafeGetStorageImpl()->pyobj_slot()->init_pyobj(obj); - } else { - s->is_hermetic = true; - } + s->is_hermetic = false; + const auto& storage = THPStorage_Unpack(s); + storage.unsafeGetStorageImpl()->pyobj_slot()->init_pyobj(obj); return obj; } @@ -88,9 +84,6 @@ PyObject* THPStorage_NewWithStorage( // Wraps the c10::Storage with a storage PyObject PyObject* THPStorage_Wrap(c10::Storage storage) { c10::StorageImpl* storage_impl = storage.unsafeGetStorageImpl(); - if (c10::impl::HermeticPyObjectTLS::get_state()) { - return THPStorage_NewWithStorage(THPStorageClass, std::move(storage)); - } c10::impl::PyObjectSlot* pyobj_slot = storage_impl->pyobj_slot(); std::optional maybe_pyobj = pyobj_slot->check_pyobj(); diff --git a/torch/csrc/autograd/python_variable.cpp b/torch/csrc/autograd/python_variable.cpp index 50b845adb637..066afff18dce 100644 --- a/torch/csrc/autograd/python_variable.cpp +++ b/torch/csrc/autograd/python_variable.cpp @@ -1,7 +1,6 @@ #include #include #include -#include #include #include #include @@ -260,10 +259,6 @@ PyObject* THPVariable_Wrap(const at::TensorBase& var) { Py_RETURN_NONE; } - if (c10::impl::HermeticPyObjectTLS::get_state()) { - return THPVariable_NewWithVar((PyTypeObject*)THPVariableClass, var); - } - std::optional mb_obj = var.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(); if (mb_obj.has_value()) { @@ -379,7 +374,6 @@ static bool THPVariable_tryResurrect(THPVariable* self) { // Flip THPVariable to be non-owning // (near use-after-free miss here: fresh MaybeOwned is created breaking // reference on Tensor in struct BEFORE we overwrite the old one) - TORCH_INTERNAL_ASSERT(!c10::impl::HermeticPyObjectTLS::get_state()); self->cdata = MaybeOwned::borrowed(tensor); // NB: At this point, tensor *could* be dead (e.g., some other C++ thread @@ -2474,28 +2468,13 @@ static PyObject* THPVariable_NewWithVar( auto v = (THPVariable*)obj; // TODO: named constructor to avoid default initialization new (&v->cdata) MaybeOwned(); - if (c10::impl::HermeticPyObjectTLS::get_state()) { - // Do NOT initialize pyobj field on the tensor, you own the C++ - v->cdata = MaybeOwned::owned(Variable(_var)); - TORCH_INTERNAL_ASSERT( - !check_has_torch_dispatch(obj), - "While HermeticPyObject was enabled, we attempted to create a tensor " - "subclass with __torch_dispatch__. This violates the invariant that " - "operations in HermeticPyObject have equivalent C++ implementations. " - "If your operator registered from Python operator registration isn't " - "doing anything strange, there may be an internal PyTorch bug involving " - "not appropriately disabling TorchDispatchMode before executing " - "Python op registration."); - } else { - // Normal codepath - v->cdata = MaybeOwned::owned(Variable(_var)); - const auto& var = THPVariable_Unpack(v); - var.unsafeGetTensorImpl()->pyobj_slot()->init_pyobj(obj); - if (has_torch_dispatch_if_known.has_value() - ? *has_torch_dispatch_if_known - : check_has_torch_dispatch(obj)) { - var.unsafeGetTensorImpl()->set_python_dispatch(true); - } + v->cdata = MaybeOwned::owned(Variable(_var)); + const auto& var = THPVariable_Unpack(v); + var.unsafeGetTensorImpl()->pyobj_slot()->init_pyobj(obj); + if (has_torch_dispatch_if_known.has_value() + ? *has_torch_dispatch_if_known + : check_has_torch_dispatch(obj)) { + var.unsafeGetTensorImpl()->set_python_dispatch(true); } } return obj; diff --git a/torch/csrc/utils/python_dispatch.cpp b/torch/csrc/utils/python_dispatch.cpp index 9d6eb35c7178..4dad66f9fb06 100644 --- a/torch/csrc/utils/python_dispatch.cpp +++ b/torch/csrc/utils/python_dispatch.cpp @@ -81,40 +81,6 @@ inline static torch::CppFunction dispatch_str(const char* key, Func&& raw_f) { } } -struct EnableHermeticPyObject { - EnableHermeticPyObject() - : old_(c10::impl::HermeticPyObjectTLS::get_state()), - old_excluded_python_( - c10::impl::tls_is_dispatch_key_excluded(at::DispatchKey::Python)), - old_python_( - c10::impl::tls_is_dispatch_key_included(at::DispatchKey::Python)), - old_python_snapshot_(c10::impl::tls_is_dispatch_key_included( - at::DispatchKey::PythonTLSSnapshot)) { - c10::impl::HermeticPyObjectTLS::set_state(true); - c10::impl::tls_set_dispatch_key_excluded(at::DispatchKey::Python, true); - c10::impl::tls_set_dispatch_key_included(at::DispatchKey::Python, false); - c10::impl::tls_set_dispatch_key_included( - at::DispatchKey::PythonTLSSnapshot, false); - } - ~EnableHermeticPyObject() { - c10::impl::HermeticPyObjectTLS::set_state(old_); - c10::impl::tls_set_dispatch_key_excluded( - at::DispatchKey::Python, old_excluded_python_); - c10::impl::tls_set_dispatch_key_included( - at::DispatchKey::Python, old_python_); - c10::impl::tls_set_dispatch_key_included( - at::DispatchKey::PythonTLSSnapshot, old_python_snapshot_); - } - EnableHermeticPyObject(const EnableHermeticPyObject&) = delete; - EnableHermeticPyObject(EnableHermeticPyObject&&) = delete; - EnableHermeticPyObject& operator=(const EnableHermeticPyObject&) = delete; - EnableHermeticPyObject& operator=(EnableHermeticPyObject&&) = delete; - bool old_; - bool old_excluded_python_; - bool old_python_; - bool old_python_snapshot_; -}; - class PythonKernelHolder : public c10::OperatorKernel { c10::SafePyObject func_; c10::DispatchKey dispatch_key_;