From 94195a37ae4eae9c486a81b0f67725c8970f74d6 Mon Sep 17 00:00:00 2001
From: PaliC <sahancpal@gmail.com>
Date: Thu, 25 Sep 2025 22:11:36 +0900
Subject: [PATCH] [BE] Remove HermeticPyObjectTLS and Simplify
 PythonOpRegistrationTrampoline (#163464)

Removes HermeticPyObjectTLS as we no longer need since torch deploy is no longer supported. PythonOpRegistrationTrampoline is also drastically simplified as and being prepped for removal in a future PR.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/163464
Approved by: https://github.com/albanD, https://github.com/Skylion007
---
 .../core/PythonOpRegistrationTrampoline.cpp   | 24 +++----
 .../core/PythonOpRegistrationTrampoline.h     | 10 +--
 c10/core/impl/HermeticPyObjectTLS.cpp         | 21 -------
 c10/core/impl/HermeticPyObjectTLS.h           | 62 -------------------
 c10/core/impl/PyObjectSlot.h                  | 20 +-----
 cmake/prioritized_text.txt                    |  1 -
 torch/csrc/PyInterpreter.cpp                  | 13 ++--
 torch/csrc/Storage.cpp                        | 13 +---
 torch/csrc/autograd/python_variable.cpp       | 35 +++--------
 torch/csrc/utils/python_dispatch.cpp          | 34 ----------
 10 files changed, 31 insertions(+), 202 deletions(-)
 delete mode 100644 c10/core/impl/HermeticPyObjectTLS.cpp
 delete mode 100644 c10/core/impl/HermeticPyObjectTLS.h

diff --git a/aten/src/ATen/core/PythonOpRegistrationTrampoline.cpp b/aten/src/ATen/core/PythonOpRegistrationTrampoline.cpp
index 219d774de3a5..f50b2507d914 100644
--- a/aten/src/ATen/core/PythonOpRegistrationTrampoline.cpp
+++ b/aten/src/ATen/core/PythonOpRegistrationTrampoline.cpp
@@ -1,32 +1,22 @@
 #include <ATen/core/PythonOpRegistrationTrampoline.h>
+#include <c10/core/impl/PyInterpreterHooks.h>
 
+// TODO: delete this
 namespace at::impl {
 
-// The strategy is that all python interpreters attempt to register themselves
-// as the main interpreter, but only one wins.  Only that interpreter is
-// allowed to interact with the C++ dispatcher.  Furthermore, when we execute
-// logic on that interpreter, we do so hermetically, never setting pyobj field
-// on Tensor.
-
-std::atomic<c10::impl::PyInterpreter*>
-    PythonOpRegistrationTrampoline::interpreter_{nullptr};
+c10::impl::PyInterpreter* PythonOpRegistrationTrampoline::interpreter_ = nullptr;
 
 c10::impl::PyInterpreter* PythonOpRegistrationTrampoline::getInterpreter() {
-  return PythonOpRegistrationTrampoline::interpreter_.load();
+  return c10::impl::getGlobalPyInterpreter();
 }
 
 bool PythonOpRegistrationTrampoline::registerInterpreter(
     c10::impl::PyInterpreter* interp) {
-  c10::impl::PyInterpreter* expected = nullptr;
-  interpreter_.compare_exchange_strong(expected, interp);
-  if (expected != nullptr) {
-    // This is the second (or later) Python interpreter, which means we need
-    // non-trivial hermetic PyObject TLS
-    c10::impl::HermeticPyObjectTLS::init_state();
+  if (interpreter_ != nullptr) {
     return false;
-  } else {
-    return true;
   }
+  interpreter_ = interp;
+  return true;
 }
 
 } // namespace at::impl
diff --git a/aten/src/ATen/core/PythonOpRegistrationTrampoline.h b/aten/src/ATen/core/PythonOpRegistrationTrampoline.h
index bec323c7d25b..062dbebc3ceb 100644
--- a/aten/src/ATen/core/PythonOpRegistrationTrampoline.h
+++ b/aten/src/ATen/core/PythonOpRegistrationTrampoline.h
@@ -2,19 +2,21 @@
 
 #include <ATen/core/dispatch/Dispatcher.h>
 
-// TODO: this can probably live in c10
+// TODO: We can get rid of this
 
 
 namespace at::impl {
 
+// Manages the single Python interpreter instance for PyTorch.
 class TORCH_API PythonOpRegistrationTrampoline final {
-  static std::atomic<c10::impl::PyInterpreter*> interpreter_;
+  static c10::impl::PyInterpreter* interpreter_;
 
 public:
-  //  Returns true if you successfully registered yourself (that means
-  //  you are in the hot seat for doing the operator registrations!)
+  // Register the Python interpreter. Returns true on first registration,
+  // false if an interpreter was already registered.
   static bool registerInterpreter(c10::impl::PyInterpreter*);
 
+  // Returns the registered interpreter via the global PyInterpreter hooks.
   // Returns nullptr if no interpreter has been registered yet.
   static c10::impl::PyInterpreter* getInterpreter();
 };
diff --git a/c10/core/impl/HermeticPyObjectTLS.cpp b/c10/core/impl/HermeticPyObjectTLS.cpp
deleted file mode 100644
index 856c63a93a92..000000000000
--- a/c10/core/impl/HermeticPyObjectTLS.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-#include <c10/core/impl/HermeticPyObjectTLS.h>
-
-namespace c10::impl {
-
-thread_local static std::atomic<bool> hermeticPyObjectState{false};
-
-std::atomic<bool> HermeticPyObjectTLS::haveState_{false};
-
-void HermeticPyObjectTLS::set_state(bool state) {
-  hermeticPyObjectState = state;
-}
-
-bool HermeticPyObjectTLS::get_tls_state() {
-  return hermeticPyObjectState;
-}
-
-void HermeticPyObjectTLS::init_state() {
-  haveState_ = true;
-}
-
-} // namespace c10::impl
diff --git a/c10/core/impl/HermeticPyObjectTLS.h b/c10/core/impl/HermeticPyObjectTLS.h
deleted file mode 100644
index a973a5d2cef8..000000000000
--- a/c10/core/impl/HermeticPyObjectTLS.h
+++ /dev/null
@@ -1,62 +0,0 @@
-#pragma once
-
-#include <c10/macros/Export.h>
-#include <atomic>
-
-namespace c10::impl {
-
-// This TLS controls whether or not we permanently associate PyObject
-// with Tensor the first time it is allocated.  When hermetic PyObject
-// TLS is enabled (state is true), we DO NOT save PyObjects to Tensor,
-// meaning you get a distinct PyObject whenever you execute the code in
-// question.
-struct C10_API HermeticPyObjectTLS {
-  static void set_state(bool state);
-  static bool get_state() {
-    // Hypothetical fastpath if torchdeploy/multipy // codespell:ignore multipy
-    // isn't used. Per
-    // https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2055r0.pdf
-    // this qualifies relaxed access because it is a single-location data
-    // structure (only the boolean here).
-    //
-    // Forgetting about data races for a moment, is there a logical race?
-    //
-    //  - Boolean only ever transitions from false to true.  So the
-    //    critical situation is when one interpreter is already running
-    //    when a second interpreter switches haveState from false to true.
-    //
-    //  - The first interpreter is indifferent whether or not it sees
-    //    hasState true/false; obviously false works (this is what the
-    //    interpreter was previously using; more directly, the interpreter
-    //    calls into itself as the handler, so being hermetic is not
-    //    required), and true simply means serviced python operator calls will
-    //    be hermetic; in these cases it is expected to be functionally
-    //    equivalent.
-    //
-    //  - The second interpreter MUST see hasState true (as its requests will
-    //    be forwarded to the first interpreter), but it is assumed that there
-    //    is a synchronization between the interpreter initialization, and
-    //    when we actually perform operations, so it is guaranteed to see
-    //    hasState true.
-    //
-    // QED.
-    //
-    // This fastpath is currently disabled so that we can more easily test that
-    // hermetic mode works correctly even on stock build of PyTorch.
-    if (false && !haveState_.load(std::memory_order_relaxed))
-      return false;
-    return get_tls_state();
-  }
-  // Call this from the multipy/torchdeploy // codespell:ignore multipy
-  // top level
-  static void init_state();
-
- private:
-  // This only flipped once from false to true during
-  // torchdeploy/multipy initialization, // codespell:ignore multipy
-  // and never again.
-  static std::atomic<bool> haveState_;
-  static bool get_tls_state();
-};
-
-} // namespace c10::impl
diff --git a/c10/core/impl/PyObjectSlot.h b/c10/core/impl/PyObjectSlot.h
index e7d78f8360c3..8824d33eb224 100644
--- a/c10/core/impl/PyObjectSlot.h
+++ b/c10/core/impl/PyObjectSlot.h
@@ -1,6 +1,5 @@
 #pragma once
 
-#include <c10/core/impl/HermeticPyObjectTLS.h>
 #include <c10/core/impl/PyInterpreter.h>
 #include <c10/core/impl/PyInterpreterHooks.h>
 #include <c10/util/python_stub.h>
@@ -42,32 +41,15 @@ struct C10_API PyObjectSlot {
 
   PyObject* _unchecked_untagged_pyobj() const;
 
-  // Test the interpreter tag.  If tagged for the current interpreter, return
-  // a non-nullopt (but possibly null) PyObject.  If (possibly) untagged,
-  // returns a nullopt.  If it is definitely invalid, raises an error.
-  //
-  // If `ignore_hermetic_tls` is false and this function is called from a
-  // hermetic context (ie, `HermeticPyObjectTLS::get_state()` is true), then
-  // nullopt is returned. If `ignore_hermetic_tls` is true, then the hermetic
-  // context is ignored, allowing you to check the interpreter tag of a
-  // nonhermetic PyObject from within a hermetic context. This is necessary
-  // because there are some cases where the deallocator function of a
-  // nonhermetic PyObject is called from within a hermetic context, so it must
-  // be properly treated as a nonhermetic PyObject.
-  //
+  // Test the interpreter / PyObj as they may be null
   // NB: this lives in header so that we can avoid actually creating the
   // std::optional
 
-  // @todo alban: I'm not too sure what's going on here, we can probably delete
-  // it but it's worthwhile making sure
   std::optional<PyObject*> check_pyobj() const {
     impl::PyInterpreter* interpreter = getGlobalPyInterpreter();
     if (interpreter == nullptr || pyobj_ == nullptr) {
       return std::nullopt;
     }
-    if (c10::impl::HermeticPyObjectTLS::get_state()) {
-      return std::nullopt;
-    }
     return _unchecked_untagged_pyobj();
   }
 
diff --git a/cmake/prioritized_text.txt b/cmake/prioritized_text.txt
index e5e36f34f98d..f7d41b0bec3e 100644
--- a/cmake/prioritized_text.txt
+++ b/cmake/prioritized_text.txt
@@ -153,7 +153,6 @@ _ZN3c104impl12PyObjectSlot10owns_pyobjEv
 _ZN3c104impl12PyObjectSlot19maybe_destroy_pyobjEv
 _ZN3c104impl12PyObjectSlotC1Ev
 _ZN3c104impl12PyObjectSlotD2Ev
-_ZN3c104impl19HermeticPyObjectTLS13get_tls_stateEv
 _ZN3c104impl20TorchDispatchModeTLS13any_modes_setEb
 _ZN3c104impl23ExcludeDispatchKeyGuardC1ENS_14DispatchKeySetE
 _ZN3c104impl23ExcludeDispatchKeyGuardD2Ev
diff --git a/torch/csrc/PyInterpreter.cpp b/torch/csrc/PyInterpreter.cpp
index 993f8b8216a6..b863fd44c152 100644
--- a/torch/csrc/PyInterpreter.cpp
+++ b/torch/csrc/PyInterpreter.cpp
@@ -157,10 +157,10 @@ class PyInterpreterHolder {
  public:
   PyInterpreterHolder()
       : impl_(new c10::impl::PyInterpreter(
-            ConcretePyInterpreterVTable::instance())),
-        is_main_interpreter_(
-            at::impl::PythonOpRegistrationTrampoline::registerInterpreter(
-                impl_)) {}
+            ConcretePyInterpreterVTable::instance())) {
+    // Register the single interpreter
+    at::impl::PythonOpRegistrationTrampoline::registerInterpreter(impl_);
+  }
   PyInterpreterHolder(const PyInterpreterHolder&) = delete;
   PyInterpreterHolder(PyInterpreterHolder&&) = delete;
   PyInterpreterHolder& operator=(const PyInterpreterHolder&) = delete;
@@ -174,13 +174,14 @@ class PyInterpreterHolder {
   c10::impl::PyInterpreter* get() const noexcept {
     return impl_;
   }
+  // In single-interpreter mode, this is always true
+  // TODO: delete this
   bool is_main_interpreter() const noexcept {
-    return is_main_interpreter_;
+    return true;
   }
 
  private:
   c10::impl::PyInterpreter* impl_;
-  bool is_main_interpreter_;
 };
 
 py::object torchDispatchFromTensorImpl(
diff --git a/torch/csrc/Storage.cpp b/torch/csrc/Storage.cpp
index f6638bbd10c1..f8fe81c52e84 100644
--- a/torch/csrc/Storage.cpp
+++ b/torch/csrc/Storage.cpp
@@ -74,13 +74,9 @@ PyObject* THPStorage_NewWithStorage(
 
   s->cdata = c10::MaybeOwned<c10::Storage>::owned(std::move(_storage));
 
-  if (!c10::impl::HermeticPyObjectTLS::get_state()) {
-    s->is_hermetic = false;
-    const auto& storage = THPStorage_Unpack(s);
-    storage.unsafeGetStorageImpl()->pyobj_slot()->init_pyobj(obj);
-  } else {
-    s->is_hermetic = true;
-  }
+  s->is_hermetic = false;
+  const auto& storage = THPStorage_Unpack(s);
+  storage.unsafeGetStorageImpl()->pyobj_slot()->init_pyobj(obj);
 
   return obj;
 }
@@ -88,9 +84,6 @@ PyObject* THPStorage_NewWithStorage(
 // Wraps the c10::Storage with a storage PyObject
 PyObject* THPStorage_Wrap(c10::Storage storage) {
   c10::StorageImpl* storage_impl = storage.unsafeGetStorageImpl();
-  if (c10::impl::HermeticPyObjectTLS::get_state()) {
-    return THPStorage_NewWithStorage(THPStorageClass, std::move(storage));
-  }
   c10::impl::PyObjectSlot* pyobj_slot = storage_impl->pyobj_slot();
 
   std::optional<PyObject*> maybe_pyobj = pyobj_slot->check_pyobj();
diff --git a/torch/csrc/autograd/python_variable.cpp b/torch/csrc/autograd/python_variable.cpp
index 50b845adb637..066afff18dce 100644
--- a/torch/csrc/autograd/python_variable.cpp
+++ b/torch/csrc/autograd/python_variable.cpp
@@ -1,7 +1,6 @@
 #include <ATen/NamedTensorUtils.h>
 #include <c10/core/DeviceType.h>
 #include <c10/core/impl/GPUTrace.h>
-#include <c10/core/impl/HermeticPyObjectTLS.h>
 #include <c10/core/impl/PythonDispatcherTLS.h>
 #include <c10/util/irange.h>
 #include <pybind11/pytypes.h>
@@ -260,10 +259,6 @@ PyObject* THPVariable_Wrap(const at::TensorBase& var) {
     Py_RETURN_NONE;
   }
 
-  if (c10::impl::HermeticPyObjectTLS::get_state()) {
-    return THPVariable_NewWithVar((PyTypeObject*)THPVariableClass, var);
-  }
-
   std::optional<PyObject*> mb_obj =
       var.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj();
   if (mb_obj.has_value()) {
@@ -379,7 +374,6 @@ static bool THPVariable_tryResurrect(THPVariable* self) {
   // Flip THPVariable to be non-owning
   // (near use-after-free miss here: fresh MaybeOwned is created breaking
   // reference on Tensor in struct BEFORE we overwrite the old one)
-  TORCH_INTERNAL_ASSERT(!c10::impl::HermeticPyObjectTLS::get_state());
   self->cdata = MaybeOwned<Variable>::borrowed(tensor);
 
   // NB: At this point, tensor *could* be dead (e.g., some other C++ thread
@@ -2474,28 +2468,13 @@ static PyObject* THPVariable_NewWithVar(
     auto v = (THPVariable*)obj;
     // TODO: named constructor to avoid default initialization
     new (&v->cdata) MaybeOwned<Variable>();
-    if (c10::impl::HermeticPyObjectTLS::get_state()) {
-      // Do NOT initialize pyobj field on the tensor, you own the C++
-      v->cdata = MaybeOwned<Variable>::owned(Variable(_var));
-      TORCH_INTERNAL_ASSERT(
-          !check_has_torch_dispatch(obj),
-          "While HermeticPyObject was enabled, we attempted to create a tensor "
-          "subclass with __torch_dispatch__.  This violates the invariant that "
-          "operations in HermeticPyObject have equivalent C++ implementations. "
-          "If your operator registered from Python operator registration isn't "
-          "doing anything strange, there may be an internal PyTorch bug involving "
-          "not appropriately disabling TorchDispatchMode before executing "
-          "Python op registration.");
-    } else {
-      // Normal codepath
-      v->cdata = MaybeOwned<Variable>::owned(Variable(_var));
-      const auto& var = THPVariable_Unpack(v);
-      var.unsafeGetTensorImpl()->pyobj_slot()->init_pyobj(obj);
-      if (has_torch_dispatch_if_known.has_value()
-              ? *has_torch_dispatch_if_known
-              : check_has_torch_dispatch(obj)) {
-        var.unsafeGetTensorImpl()->set_python_dispatch(true);
-      }
+    v->cdata = MaybeOwned<Variable>::owned(Variable(_var));
+    const auto& var = THPVariable_Unpack(v);
+    var.unsafeGetTensorImpl()->pyobj_slot()->init_pyobj(obj);
+    if (has_torch_dispatch_if_known.has_value()
+            ? *has_torch_dispatch_if_known
+            : check_has_torch_dispatch(obj)) {
+      var.unsafeGetTensorImpl()->set_python_dispatch(true);
     }
   }
   return obj;
diff --git a/torch/csrc/utils/python_dispatch.cpp b/torch/csrc/utils/python_dispatch.cpp
index 9d6eb35c7178..4dad66f9fb06 100644
--- a/torch/csrc/utils/python_dispatch.cpp
+++ b/torch/csrc/utils/python_dispatch.cpp
@@ -81,40 +81,6 @@ inline static torch::CppFunction dispatch_str(const char* key, Func&& raw_f) {
   }
 }
 
-struct EnableHermeticPyObject {
-  EnableHermeticPyObject()
-      : old_(c10::impl::HermeticPyObjectTLS::get_state()),
-        old_excluded_python_(
-            c10::impl::tls_is_dispatch_key_excluded(at::DispatchKey::Python)),
-        old_python_(
-            c10::impl::tls_is_dispatch_key_included(at::DispatchKey::Python)),
-        old_python_snapshot_(c10::impl::tls_is_dispatch_key_included(
-            at::DispatchKey::PythonTLSSnapshot)) {
-    c10::impl::HermeticPyObjectTLS::set_state(true);
-    c10::impl::tls_set_dispatch_key_excluded(at::DispatchKey::Python, true);
-    c10::impl::tls_set_dispatch_key_included(at::DispatchKey::Python, false);
-    c10::impl::tls_set_dispatch_key_included(
-        at::DispatchKey::PythonTLSSnapshot, false);
-  }
-  ~EnableHermeticPyObject() {
-    c10::impl::HermeticPyObjectTLS::set_state(old_);
-    c10::impl::tls_set_dispatch_key_excluded(
-        at::DispatchKey::Python, old_excluded_python_);
-    c10::impl::tls_set_dispatch_key_included(
-        at::DispatchKey::Python, old_python_);
-    c10::impl::tls_set_dispatch_key_included(
-        at::DispatchKey::PythonTLSSnapshot, old_python_snapshot_);
-  }
-  EnableHermeticPyObject(const EnableHermeticPyObject&) = delete;
-  EnableHermeticPyObject(EnableHermeticPyObject&&) = delete;
-  EnableHermeticPyObject& operator=(const EnableHermeticPyObject&) = delete;
-  EnableHermeticPyObject& operator=(EnableHermeticPyObject&&) = delete;
-  bool old_;
-  bool old_excluded_python_;
-  bool old_python_;
-  bool old_python_snapshot_;
-};
-
 class PythonKernelHolder : public c10::OperatorKernel {
   c10::SafePyObject func_;
   c10::DispatchKey dispatch_key_;