[BE] Make PyObjectSlot use a global PyInterpreter and remove (#158427)

This PR is a bit more involved but effectively works to drastically simplify PyObjectSlot and PyInterpreter. 1) For PyObjectSlot we now use a global pyinterpreter since there only is one. From here we change all of the call sites to rely on this assumption. 2) We also remove the "tags" of the PyInterpreter by deprecating `PyInterpreterStatus`. For the reviewer, sadly it seems like `functorch/csrc/dim/dim.cpp` needed to get linted, so there is an unreadable amount of changes there. Fortunately, the only actual change in the file is as follows which just removes `getPyInterpreter()` from the `check_pyobj` call. ``` mpy::handle handle_from_tensor(Arena& A, TensorRef t) { - // fast case: tensor is live in python - std::optional<PyObject*> mb_obj = - t->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(getPyInterpreter(), /*ignore_hermetic_tls=*/false); - if (mb_obj.has_value() && !t->unsafeGetTensorImpl()->pyobj_slot()->owns_pyobj()) { - return *mb_obj; - } - return A.autorelease(mpy::object::checked_steal(THPVariable_Wrap(*t))); -} -} + // fast case: tensor is live in python + std::optional<PyObject*> mb_obj = + t->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj( + /*ignore_hermetic_tls=*/false); + if (mb_obj.has_value() && + !t->unsafeGetTensorImpl()->pyobj_slot()->owns_pyobj()) { + return *mb_obj; + } + return A.autorelease(mpy::object::checked_steal(THPVariable_Wrap(*t))); +} ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/158427 Approved by: https://github.com/albanD
2025-10-20 12:54:11 +08:00 · 2025-07-24 11:22:56 -07:00
parent 435edbcb5d
commit 1b99c1859c
18 changed files with 3224 additions and 2846 deletions
--- a/build_variables.bzl
+++ b/build_variables.bzl
@ -865,6 +865,7 @@ libtorch_python_core_sources = [
    "torch/csrc/QScheme.cpp",
    "torch/csrc/Module.cpp",
    "torch/csrc/PyInterpreter.cpp",
+    "torch/csrc/PyInterpreterHooks.cpp",
    "torch/csrc/python_dimname.cpp",
    "torch/csrc/Size.cpp",
    "torch/csrc/Storage.cpp",
--- a/c10/core/impl/PyInterpreter.h
+++ b/c10/core/impl/PyInterpreter.h
@ -240,24 +240,4 @@ struct C10_API PyInterpreter {
  void disarm() noexcept;
 };

-// PyInterpreterStatus describes what the state of its interpreter tag
-// is, relative to the thread currently holding the GIL.
-enum class PyInterpreterStatus {
-  // We just allocated the Tensor, it hasn't escaped to other threads,
-  // we know that it definitely hasn't been tagged to be associated
-  // with an interpreter.
-  DEFINITELY_UNINITIALIZED,
-  // We queried the interpreter field and it looked uninitialized.  But
-  // another thread may have raced with us to tag it with some other
-  // interpreter id.  So we will have to do a CEX to make sure we can
-  // actually nab it.
-  MAYBE_UNINITIALIZED,
-  // We queried the interpreter field and it was tagged to belong to us.
-  // This means we have sole write access (as we hold the GIL for this
-  // interpreter)
-  TAGGED_BY_US,
-  // Someone else tagged this.  We can't use this TensorImpl from Python.
-  TAGGED_BY_OTHER,
-};
-
 } // namespace c10::impl
--- a/c10/core/impl/PyInterpreterHooks.cpp
+++ b/c10/core/impl/PyInterpreterHooks.cpp
@ -0,0 +1,32 @@
+#include <c10/core/impl/PyInterpreterHooks.h>
+
+namespace c10::impl {
+
+// Define the registry
+C10_DEFINE_REGISTRY(
+    PyInterpreterHooksRegistry,
+    PyInterpreterHooksInterface,
+    PyInterpreterHooksArgs)
+
+const PyInterpreterHooksInterface& getPyInterpreterHooks() {
+  auto create_impl = [] {
+#if !defined C10_MOBILE
+    auto hooks = PyInterpreterHooksRegistry()->Create(
+        "PyInterpreterHooks", PyInterpreterHooksArgs{});
+    if (hooks) {
+      return hooks;
+    }
+#endif
+    // Return stub implementation that will throw errors when methods are called
+    return std::make_unique<PyInterpreterHooksInterface>();
+  };
+  static auto hooks = create_impl();
+  return *hooks;
+}
+
+// Main function to get global PyInterpreter
+PyInterpreter* getGlobalPyInterpreter() {
+  return getPyInterpreterHooks().getPyInterpreter();
+}
+
+} // namespace c10::impl
--- a/c10/core/impl/PyInterpreterHooks.h
+++ b/c10/core/impl/PyInterpreterHooks.h
@ -0,0 +1,39 @@
+#pragma once
+
+#include <c10/core/impl/PyInterpreter.h>
+#include <c10/macros/Export.h>
+#include <c10/util/Registry.h>
+#include <memory>
+
+namespace c10::impl {
+
+// Minimal interface for PyInterpreter hooks
+struct C10_API PyInterpreterHooksInterface {
+  virtual ~PyInterpreterHooksInterface() = default;
+
+  // Get the PyInterpreter instance
+  // Stub implementation throws error when Python is not available
+  virtual PyInterpreter* getPyInterpreter() const {
+    TORCH_CHECK(
+        false,
+        "PyTorch was compiled without Python support. "
+        "Cannot access Python interpreter from C++.");
+  }
+};
+
+struct C10_API PyInterpreterHooksArgs{};
+
+C10_DECLARE_REGISTRY(
+    PyInterpreterHooksRegistry,
+    PyInterpreterHooksInterface,
+    PyInterpreterHooksArgs);
+
+#define REGISTER_PYTHON_HOOKS(clsname) \
+  C10_REGISTER_CLASS(PyInterpreterHooksRegistry, clsname, clsname)
+
+// Get the global PyInterpreter hooks instance
+C10_API const PyInterpreterHooksInterface& getPyInterpreterHooks();
+
+C10_API PyInterpreter* getGlobalPyInterpreter();
+
+} // namespace c10::impl
--- a/c10/core/impl/PyObjectSlot.cpp
+++ b/c10/core/impl/PyObjectSlot.cpp
@ -34,11 +34,6 @@ PyObject* PyObjectSlot::_unchecked_untagged_pyobj() const {
      reinterpret_cast<uintptr_t>(pyobj_) & ~0x1ULL);
 }

-void PyObjectSlot::unchecked_clear_pyobj(PyInterpreter* interpreter) {
-  TORCH_INTERNAL_ASSERT_DEBUG_ONLY(interpreter == pyobj_interpreter_.load());
-  pyobj_ = nullptr;
-}
-
 PyInterpreter& PyObjectSlot::load_pyobj_interpreter() const {
  auto interpreter = pyobj_interpreter_.load(std::memory_order_acquire);
  if (interpreter) {
--- a/c10/core/impl/PyObjectSlot.h
+++ b/c10/core/impl/PyObjectSlot.h
@ -2,6 +2,7 @@

 #include <c10/core/impl/HermeticPyObjectTLS.h>
 #include <c10/core/impl/PyInterpreter.h>
+#include <c10/core/impl/PyInterpreterHooks.h>
 #include <c10/util/python_stub.h>
 #include <optional>

@ -24,11 +25,9 @@ struct C10_API PyObjectSlot {
  //
  // NB: THIS FUNCTION CAN RAISE AN EXCEPTION.  Make sure to clean up after
  // PyObject if necessary!
-  void init_pyobj(
-      PyInterpreter* self_interpreter,
-      PyObject* pyobj,
-      PyInterpreterStatus status) {
-    pyobj_interpreter_.store(self_interpreter, std::memory_order_relaxed);
+  void init_pyobj(PyObject* pyobj) {
+    pyobj_interpreter_.store(
+        getGlobalPyInterpreter(), std::memory_order_relaxed);
    pyobj_ = pyobj;
  }

@ -53,9 +52,10 @@ struct C10_API PyObjectSlot {
  //
  // NB: this lives in header so that we can avoid actually creating the
  // std::optional
-  std::optional<PyObject*> check_pyobj(
-      PyInterpreter* self_interpreter,
-      bool ignore_hermetic_tls = false) const {
+
+  // @todo alban: I'm not too sure what's going on here, we can probably delete
+  // it but it's worthwhile making sure
+  std::optional<PyObject*> check_pyobj(bool ignore_hermetic_tls = false) const {
    impl::PyInterpreter* interpreter =
        pyobj_interpreter_.load(std::memory_order_acquire);
    if (interpreter == nullptr) {
@ -69,10 +69,6 @@ struct C10_API PyObjectSlot {
    }
  }

-  // Clear the PyObject field for an interpreter, in situations where we
-  // statically know the tensor is tagged with our interpreter.
-  void unchecked_clear_pyobj(PyInterpreter* interpreter);
-
  PyInterpreter& load_pyobj_interpreter() const;

  bool owns_pyobj();
--- a/functorch/csrc/dim/dim.cpp
+++ b/functorch/csrc/dim/dim.cpp
--- a/torch/_dynamo/trace_rules.py
+++ b/torch/_dynamo/trace_rules.py
@ -583,7 +583,6 @@ torch_c_binding_in_graph_functions = dict.fromkeys(
        "torch._C._dispatch_has_kernel",
        "torch._C._dispatch_is_alias_key",
        "torch._C._dispatch_is_included_in_alias",
-        "torch._C._dispatch_is_main_interpreter",
        "torch._C._dispatch_isTensorSubclassLike",
        "torch._C._dispatch_key_for_device",
        "torch._C._dispatch_key_name",
--- a/torch/csrc/Module.cpp
+++ b/torch/csrc/Module.cpp
@ -409,10 +409,10 @@ static PyObject* THPModule_swap_tensor_impl(PyObject* _unused, PyObject* args) {
  // associated with the TensorImpl. Swap this field as well.
  std::optional<PyObject*> mb_obj_a =
      a->cdata->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(
-          getPyInterpreter(), /*ignore_hermetic_tls=*/false);
+          /*ignore_hermetic_tls=*/false);
  std::optional<PyObject*> mb_obj_b =
      b->cdata->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(
-          getPyInterpreter(), /*ignore_hermetic_tls=*/false);
+          /*ignore_hermetic_tls=*/false);
  TORCH_INTERNAL_ASSERT(
      mb_obj_a.has_value() && mb_obj_b.has_value(),
      "Both tensors should have PyObjects tagged by the current python interpreter");
@ -422,10 +422,8 @@ static PyObject* THPModule_swap_tensor_impl(PyObject* _unused, PyObject* args) {
  a->cdata = b->cdata;
  b->cdata = tmp;

-  a->cdata->unsafeGetTensorImpl()->pyobj_slot()->init_pyobj(
-      getPyInterpreter(), a_, c10::impl::PyInterpreterStatus::TAGGED_BY_US);
-  b->cdata->unsafeGetTensorImpl()->pyobj_slot()->init_pyobj(
-      getPyInterpreter(), b_, c10::impl::PyInterpreterStatus::TAGGED_BY_US);
+  a->cdata->unsafeGetTensorImpl()->pyobj_slot()->init_pyobj(a_);
+  b->cdata->unsafeGetTensorImpl()->pyobj_slot()->init_pyobj(b_);

  Py_RETURN_NONE;
  END_HANDLE_TH_ERRORS
--- a/torch/csrc/PyInterpreter.cpp
+++ b/torch/csrc/PyInterpreter.cpp
@ -586,7 +586,7 @@ static void set_tensor_attr_with_capsule(
    py::capsule& capsule,
    const char* attr_name) {
  std::optional<PyObject*> mb_obj = tensor->pyobj_slot()->check_pyobj(
-      getPyInterpreter(), /*ignore_hermetic_tls=*/false);
+      /*ignore_hermetic_tls=*/false);
  TORCH_CHECK(
      mb_obj.has_value(), "Tensor subclass's PyInterpreter has no value");
  auto obj = mb_obj.value();
@ -987,7 +987,3 @@ py::handle getTorchApiFunction(const c10::OperatorHandle& op) {
 c10::impl::PyInterpreter* getPyInterpreter() {
  return torch::detail::self_interpreter.get();
 }
-
-bool isMainPyInterpreter() {
-  return torch::detail::self_interpreter.is_main_interpreter();
-}
--- a/torch/csrc/PyInterpreter.h
+++ b/torch/csrc/PyInterpreter.h
@ -10,4 +10,4 @@ TORCH_PYTHON_API py::handle getTorchApiFunction(const c10::OperatorHandle& op);

 // TODO: Move these to a proper namespace
 TORCH_PYTHON_API c10::impl::PyInterpreter* getPyInterpreter();
-TORCH_PYTHON_API bool isMainPyInterpreter();
+TORCH_PYTHON_API void initializeGlobalPyInterpreter();
--- a/torch/csrc/PyInterpreterHooks.h
+++ b/torch/csrc/PyInterpreterHooks.h
@ -0,0 +1,15 @@
+#pragma once
+
+#include <c10/core/impl/PyInterpreterHooks.h>
+
+namespace torch::detail {
+
+// Concrete implementation of PyInterpreterHooks
+class PyInterpreterHooks : public c10::impl::PyInterpreterHooksInterface {
+ public:
+  explicit PyInterpreterHooks(c10::impl::PyInterpreterHooksArgs);
+
+  c10::impl::PyInterpreter* getPyInterpreter() const override;
+};
+
+} // namespace torch::detail
--- a/torch/csrc/Storage.cpp
+++ b/torch/csrc/Storage.cpp
@ -35,7 +35,6 @@ PyTypeObject* THPStorageClass = nullptr;
 PyObject* THPStorage_NewWithStorage(
    PyTypeObject* type,
    c10::Storage _storage,
-    c10::impl::PyInterpreterStatus status,
    bool allow_preexisting_pyobj) {
  TORCH_CHECK(
      PyType_IsSubtype(type, &THPStorageType),
@ -43,7 +42,7 @@ PyObject* THPStorage_NewWithStorage(
      "Storage is not possible. Make sure your class inherits from Storage.");

  auto maybe_pyobj = _storage.unsafeGetStorageImpl()->pyobj_slot()->check_pyobj(
-      getPyInterpreter(), /*ignore_hermetic_tls=*/false);
+      /*ignore_hermetic_tls=*/false);
  if (maybe_pyobj.has_value() && maybe_pyobj.value()) {
    TORCH_CHECK(
        allow_preexisting_pyobj,
@ -78,8 +77,7 @@ PyObject* THPStorage_NewWithStorage(
  if (!c10::impl::HermeticPyObjectTLS::get_state()) {
    s->is_hermetic = false;
    const auto& storage = THPStorage_Unpack(s);
-    storage.unsafeGetStorageImpl()->pyobj_slot()->init_pyobj(
-        getPyInterpreter(), obj, status);
+    storage.unsafeGetStorageImpl()->pyobj_slot()->init_pyobj(obj);
  } else {
    s->is_hermetic = true;
  }
@ -91,17 +89,12 @@ PyObject* THPStorage_NewWithStorage(
 PyObject* THPStorage_Wrap(c10::Storage storage) {
  c10::StorageImpl* storage_impl = storage.unsafeGetStorageImpl();
  if (c10::impl::HermeticPyObjectTLS::get_state()) {
-    return THPStorage_NewWithStorage(
-        THPStorageClass,
-        std::move(storage),
-        c10::impl::PyInterpreterStatus::DEFINITELY_UNINITIALIZED);
+    return THPStorage_NewWithStorage(THPStorageClass, std::move(storage));
  }
  c10::impl::PyObjectSlot* pyobj_slot = storage_impl->pyobj_slot();

  std::optional<PyObject*> maybe_pyobj = pyobj_slot->check_pyobj(
-      getPyInterpreter(), /*ignore_hermetic_tls=*/false);
-  c10::impl::PyInterpreterStatus status =
-      c10::impl::PyInterpreterStatus::TAGGED_BY_US;
+      /*ignore_hermetic_tls=*/false);
  if (maybe_pyobj.has_value()) {
    auto obj = *maybe_pyobj;
    if (obj) {
@ -120,15 +113,8 @@ PyObject* THPStorage_Wrap(c10::Storage storage) {
        return obj;
      }
    }
-    status = c10::impl::PyInterpreterStatus::TAGGED_BY_US;
-  } else {
-    if (storage.use_count() <= 1) {
-      status = c10::impl::PyInterpreterStatus::DEFINITELY_UNINITIALIZED;
-    } else {
-      status = c10::impl::PyInterpreterStatus::MAYBE_UNINITIALIZED;
-    }
  }
-  return THPStorage_NewWithStorage(THPStorageClass, std::move(storage), status);
+  return THPStorage_NewWithStorage(THPStorageClass, std::move(storage));
 }

 static bool THPStorage_isPreservable(THPStorage* self) {
@ -142,8 +128,7 @@ static bool THPStorage_isPreservable(THPStorage* self) {
  }

  if (storage.unsafeGetStorageImpl()->pyobj_slot()->check_pyobj(
-          getPyInterpreter(), /*ignore_hermetic_tls=*/true) !=
-      (PyObject*)self) {
+          /*ignore_hermetic_tls=*/true) != (PyObject*)self) {
    return false;
  }
  if (storage.use_count() <= 1) {
@ -161,11 +146,10 @@ static bool THPStorage_tryPreserve(THPStorage* self) {
  c10::StorageImpl* storage_impl = storage.unsafeGetStorageImpl();

  auto maybe_pyobj = storage_impl->pyobj_slot()->check_pyobj(
-      getPyInterpreter(),
      /*ignore_hermetic_tls=*/true);
  // NOTE: It is possible to just set the PyObjectSlot here, but the point is
-  // that we should have already set PyObjectSlot when the storage PyObject was
-  // created.
+  // that we should have already set PyObjectSlot when the storage PyObject
+  // was created.
  TORCH_INTERNAL_ASSERT(
      maybe_pyobj.has_value(),
      "Trying to preserve a Python storage whose PyObjectSlot does not have a PyObject");
@ -373,8 +357,7 @@ static PyObject* THPStorage_pynew(
            at::DataPtr(),
            allocator,
            /*resizable=*/true,
-            device_opt),
-        c10::impl::PyInterpreterStatus::DEFINITELY_UNINITIALIZED);
+            device_opt));

    // torch.Storage(size, *, ...)
  } else if (r.idx == 1) {
@ -387,8 +370,7 @@ static PyObject* THPStorage_pynew(
            at::DataPtr(),
            allocator,
            /*resizable=*/true,
-            device_opt),
-        c10::impl::PyInterpreterStatus::DEFINITELY_UNINITIALIZED);
+            device_opt));

    // torch.Storage(sequence, *, ...)
  } else if (r.idx == 2) {
@ -412,8 +394,7 @@ static PyObject* THPStorage_pynew(
            at::DataPtr(),
            allocator,
            /*resizable=*/true,
-            device_opt),
-        c10::impl::PyInterpreterStatus::DEFINITELY_UNINITIALIZED);
+            device_opt));
    THPObjectPtr item;
    try {
      const auto& storage = THPStorage_Unpack(self);
@ -509,10 +490,8 @@ static PyObject* THPStorage_get(THPStorage* self, PyObject* index) {
        /* resizable */ false,
        device_opt);

-    PyObject* _ret = THPStorage_NewWithStorage(
-        Py_TYPE(self),
-        std::move(new_storage_impl),
-        c10::impl::PyInterpreterStatus::DEFINITELY_UNINITIALIZED);
+    PyObject* _ret =
+        THPStorage_NewWithStorage(Py_TYPE(self), std::move(new_storage_impl));

    return _ret;
  }
--- a/torch/csrc/Storage.h
+++ b/torch/csrc/Storage.h
@ -19,7 +19,6 @@ TORCH_PYTHON_API PyObject* THPStorage_Wrap(c10::Storage storage);
 TORCH_PYTHON_API PyObject* THPStorage_NewWithStorage(
    PyTypeObject* type,
    c10::Storage _storage,
-    c10::impl::PyInterpreterStatus status,
    bool allow_preexisting_pyobj = false);
 TORCH_PYTHON_API extern PyTypeObject* THPStorageClass;

--- a/torch/csrc/StorageMethods.cpp
+++ b/torch/csrc/StorageMethods.cpp
@ -390,10 +390,7 @@ static PyObject* THPStorage_fromFile(
    storage->set_nbytes(actual_nbytes);
  }

-  return THPStorage_NewWithStorage(
-      THPStorageClass,
-      std::move(storage),
-      c10::impl::PyInterpreterStatus::TAGGED_BY_US);
+  return THPStorage_NewWithStorage(THPStorageClass, std::move(storage));
  END_HANDLE_TH_ERRORS
 }

--- a/torch/csrc/StorageSharing.cpp
+++ b/torch/csrc/StorageSharing.cpp
@ -86,8 +86,7 @@ static PyObject* THPStorage_pyNewFilenameStorage(
          THManagedMapAllocator::makeDataPtr(
              "", handle.c_str(), flags, static_cast<size_t>(size)),
          /*allocator=*/nullptr,
-          /*resizable=*/false),
-      c10::impl::PyInterpreterStatus::TAGGED_BY_US);
+          /*resizable=*/false));
  END_HANDLE_TH_ERRORS
 }

@ -182,8 +181,7 @@ static PyObject* THPStorage_newSharedFilename(
          THManagedMapAllocator::makeDataPtr(
              manager_handle, object_handle, flags, size),
          /*allocator=*/nullptr,
-          /*resizable=*/false),
-      c10::impl::PyInterpreterStatus::TAGGED_BY_US);
+          /*resizable=*/false));
  END_HANDLE_TH_ERRORS
 }

@ -197,9 +195,7 @@ static PyObject* THPStorage_pyNewFdStorage(PyObject* _unused, PyObject* args) {
    return nullptr;
  }
  return THPStorage_NewWithStorage(
-      THPStorageClass,
-      at::new_shm_fd_storage(size),
-      c10::impl::PyInterpreterStatus::TAGGED_BY_US);
+      THPStorageClass, at::new_shm_fd_storage(size));
  END_HANDLE_TH_ERRORS
 }

@ -278,8 +274,7 @@ static PyObject* THPStorage_newSharedFd(PyObject* _unused, PyObject* args) {
          at::MapAllocator::makeDataPtr(
              at::WITH_FD, "", fd, flags, size, nullptr),
          /*allocator=*/nullptr,
-          /*resizable=*/false),
-      c10::impl::PyInterpreterStatus::TAGGED_BY_US);
+          /*resizable=*/false));
  END_HANDLE_TH_ERRORS
 }

@ -560,10 +555,7 @@ static PyObject* THPStorage_newSharedCuda(PyObject* _unused, PyObject* args) {
  base->set_resizable(false);
  base->set_received_cuda(true);

-  return THPStorage_NewWithStorage(
-      THPStorageClass,
-      std::move(base),
-      c10::impl::PyInterpreterStatus::TAGGED_BY_US);
+  return THPStorage_NewWithStorage(THPStorageClass, std::move(base));
 #else
  TORCH_CHECK(false, "CUDA is not available");
 #endif
--- a/torch/csrc/autograd/python_variable.cpp
+++ b/torch/csrc/autograd/python_variable.cpp
@ -209,7 +209,6 @@ PyObject* ParameterClass = nullptr;
 static PyObject* THPVariable_NewWithVar(
    PyTypeObject* type,
    const at::TensorBase& _var,
-    c10::impl::PyInterpreterStatus status,
    bool allow_preexisting_pyobj = false);

 // clang-tidy gets confused by static const
@ -261,16 +260,12 @@ PyObject* THPVariable_Wrap(const at::TensorBase& var) {
  }

  if (c10::impl::HermeticPyObjectTLS::get_state()) {
-    return THPVariable_NewWithVar(
-        (PyTypeObject*)THPVariableClass,
-        var,
-        c10::impl::PyInterpreterStatus::DEFINITELY_UNINITIALIZED);
+    return THPVariable_NewWithVar((PyTypeObject*)THPVariableClass, var);
  }

  std::optional<PyObject*> mb_obj =
      var.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(
-          getPyInterpreter(), /*ignore_hermetic_tls=*/false);
-  c10::impl::PyInterpreterStatus status{};
+          /*ignore_hermetic_tls=*/false);
  if (mb_obj.has_value()) {
    auto obj = *mb_obj;
    if (obj) {
@ -295,27 +290,17 @@ PyObject* THPVariable_Wrap(const at::TensorBase& var) {
    // (https://github.com/pytorch/pytorch/pull/56017).  Prior to this PR
    // being a thing, the PyObject field will get cleared when all references
    // to the Python object are removed.
-    status = c10::impl::PyInterpreterStatus::TAGGED_BY_US;
-  } else {
-    // Assumption: if a Tensor has been shared across threads, this induces
-    // a refcount bump.  Therefore, if the use count 1, we are the sole thread
-    // with access to this tensor and no race is possible.
-    if (var.use_count() <= 1) {
-      status = c10::impl::PyInterpreterStatus::DEFINITELY_UNINITIALIZED;
-    } else {
-      status = c10::impl::PyInterpreterStatus::MAYBE_UNINITIALIZED;
-    }
  }

  if (C10_LIKELY(var.device().type() != c10::kXLA)) {
-    return THPVariable_NewWithVar((PyTypeObject*)THPVariableClass, var, status);
+    return THPVariable_NewWithVar((PyTypeObject*)THPVariableClass, var);
  }

  if (auto clazz = getPythonTensorClass(var.device())) {
-    return THPVariable_NewWithVar((PyTypeObject*)clazz, var, status);
+    return THPVariable_NewWithVar((PyTypeObject*)clazz, var);
  }

-  return THPVariable_NewWithVar((PyTypeObject*)THPVariableClass, var, status);
+  return THPVariable_NewWithVar((PyTypeObject*)THPVariableClass, var);
 }

 static bool isResurrectable(THPVariable* self) {
@ -344,8 +329,7 @@ static bool isResurrectable(THPVariable* self) {
  }
  // Check if this is hermetic. If it is, no resurrection.
  if (tensor.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(
-          getPyInterpreter(), /*ignore_hermetic_tls=*/false) !=
-      (PyObject*)self) {
+          /*ignore_hermetic_tls=*/false) != (PyObject*)self) {
    return false;
  }
  return true;
@ -371,7 +355,6 @@ static bool THPVariable_tryResurrect(THPVariable* self) {

  c10::TensorImpl* tensor_impl = tensor.unsafeGetTensorImpl();
  auto maybe_pyobj = tensor_impl->pyobj_slot()->check_pyobj(
-      getPyInterpreter(),
      /*ignore_hermetic_tls=*/false);

  TORCH_INTERNAL_ASSERT(
@ -587,10 +570,7 @@ static PyObject* THPVariable_as_subclass(
  // stack
  torch_dispatch_mode::StashTorchDispatchStackGuard td_g;
  c10::impl::DisablePythonDispatcher dpd_g;
-  return THPVariable_NewWithVar(
-      (PyTypeObject*)cls,
-      self.alias(),
-      c10::impl::PyInterpreterStatus::DEFINITELY_UNINITIALIZED);
+  return THPVariable_NewWithVar((PyTypeObject*)cls, self.alias());
  END_HANDLE_TH_ERRORS
 }

@ -642,10 +622,7 @@ static PyObject* THPVariable_make_subclass(
    data.unsafeGetTensorImpl()->_change_backend_component_keys(r.device(6));
  }

-  return THPVariable_NewWithVar(
-      (PyTypeObject*)cls,
-      data,
-      c10::impl::PyInterpreterStatus::DEFINITELY_UNINITIALIZED);
+  return THPVariable_NewWithVar((PyTypeObject*)cls, data);
  END_HANDLE_TH_ERRORS
 }

@ -790,10 +767,7 @@ static PyObject* THPVariable_make_wrapper_subclass(
    tensor.unsafeGetTensorImpl()->set_python_custom_layout(true);
  }

-  return THPVariable_NewWithVar(
-      (PyTypeObject*)cls,
-      tensor,
-      c10::impl::PyInterpreterStatus::DEFINITELY_UNINITIALIZED);
+  return THPVariable_NewWithVar((PyTypeObject*)cls, tensor);
  END_HANDLE_TH_ERRORS
 }

@ -1821,7 +1795,6 @@ PyObject* THPVariable_pynew(
  return THPVariable_NewWithVar(
      type,
      tensor,
-      c10::impl::PyInterpreterStatus::MAYBE_UNINITIALIZED,
      /*allow_preexisting_pyobj=*/true);
  END_HANDLE_TH_ERRORS
 }
@ -1874,8 +1847,7 @@ static int THPVariable_subclass_clear(THPVariable* self) {

    if (!self->cdata.unsafeIsBorrowed() &&
        tensor.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(
-            getPyInterpreter(), /*ignore_hermetic_tls=*/false) ==
-            (PyObject*)self) {
+            /*ignore_hermetic_tls=*/false) == (PyObject*)self) {
      // TODO: empirically, on OS X this assert appears to be untrue
      // In test_py_tensors_multi_async_call - ProcessGroupRpcTestWithSpawn
      // distributed/rpc/test_process_group_agent.py
@ -2047,17 +2019,10 @@ static void THPVariable_subclass_dealloc(PyObject* self) {
  Py_DECREF(type);
 }

-// Creates a new Python object for a Variable.  The status parameter
-// specifies what the interpreter tag status on the object is; for
-// example, if you ran check_pyobj, the return optional of this object
-// tells you if the tensor was already tagged or not so you can pass
-// TAGGED_BY_US or MAYBE_UNINITIALIZED; in other cases, you know where
-// var came from and can directly assert that it's DEFINITELY_UNINITIALIZED.
-// It's ALWAYS safe (albeit slower) to call this with MAYBE_UNINITIALIZED.
+// Creates a new Python object for a Variable.
 static PyObject* THPVariable_NewWithVar(
    PyTypeObject* type,
    const at::TensorBase& _var,
-    c10::impl::PyInterpreterStatus status,
    bool allow_preexisting_pyobj) {
  // Make sure that the reinterpret into a THPVariable* will be valid
  TORCH_CHECK(
@ -2068,7 +2033,7 @@ static PyObject* THPVariable_NewWithVar(
  // This function overwrite the Tensor's pyobj field without extra checks
  // Make sure it is not set otherwise we would leak memory
  auto mb_obj = _var.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(
-      getPyInterpreter(), /*ignore_hermetic_tls=*/false);
+      /*ignore_hermetic_tls=*/false);

  // Under some circumstances, we may attempt to create a new Python
  // object for a variable that already has a Python object.  The most common
@ -2150,8 +2115,7 @@ static PyObject* THPVariable_NewWithVar(
      // Normal codepath
      v->cdata = MaybeOwned<Variable>::owned(Variable(_var));
      const auto& var = THPVariable_Unpack(v);
-      var.unsafeGetTensorImpl()->pyobj_slot()->init_pyobj(
-          getPyInterpreter(), obj, status);
+      var.unsafeGetTensorImpl()->pyobj_slot()->init_pyobj(obj);
      if (check_has_torch_dispatch(obj)) {
        var.unsafeGetTensorImpl()->set_python_dispatch(true);
      }
--- a/torch/csrc/utils/python_dispatch.cpp
+++ b/torch/csrc/utils/python_dispatch.cpp
@ -209,12 +209,10 @@ class PythonKernelHolder : public c10::OperatorKernel {
  }
 };

+// @todo sahanp: Afait only register is used in the codebase. This can be
+// removed / simplified
 static torch::_RegisterOrVerify register_or_verify() {
-  if (isMainPyInterpreter()) {
-    return torch::_RegisterOrVerify::REGISTER;
-  } else {
-    return torch::_RegisterOrVerify::VERIFY;
-  }
+  return torch::_RegisterOrVerify::REGISTER;
 }

 static py::object ophandle_call_boxed(
@ -287,7 +285,6 @@ void initDispatchBindings(PyObject* module) {
      .def(
          "reset",
          [](const py::object& self) {
-            TORCH_INTERNAL_ASSERT(isMainPyInterpreter());
            self.cast<torch::Library&>().reset();
            return;
          },
@ -297,7 +294,6 @@ void initDispatchBindings(PyObject* module) {
      .def(
          "def_",
          [](py::object self, const char* schema, const char* alias) {
-            TORCH_INTERNAL_ASSERT(isMainPyInterpreter());
            self.cast<torch::Library&>().def(
                torch::schema(schema, parseAliasAnalysisKind(alias)));
            return self;
@ -311,7 +307,6 @@ void initDispatchBindings(PyObject* module) {
      .def(
          "def_legacy",
          [](py::object self, const char* schema) {
-            TORCH_INTERNAL_ASSERT(isMainPyInterpreter());
            self.cast<torch::Library&>().def(torch::jit::parseSchema(schema));
            return self;
          },
@ -331,7 +326,6 @@ void initDispatchBindings(PyObject* module) {
             const char* name,
             const char* dispatch,
             const char* debug) {
-            TORCH_INTERNAL_ASSERT(isMainPyInterpreter());
            self.cast<torch::Library&>().def(
                name, dispatch_str(dispatch, [](const at::Tensor& a) {
                        return a;
@ -349,7 +343,6 @@ void initDispatchBindings(PyObject* module) {
             const char* dispatch,
             const char* alias,
             const char* debug) {
-            TORCH_INTERNAL_ASSERT(isMainPyInterpreter());
            self.cast<torch::Library&>().def(
                torch::schema(schema, parseAliasAnalysisKind(alias)),
                dispatch_str(dispatch, [](const at::Tensor& a) {
@ -370,7 +363,6 @@ void initDispatchBindings(PyObject* module) {
             const char* name,
             const char* dispatch,
             const char* debug) {
-            TORCH_INTERNAL_ASSERT(isMainPyInterpreter());
            self.cast<torch::Library&>().impl(
                name, dispatch_str(dispatch, [](const at::Tensor& a) {
                        return a;
@ -465,7 +457,6 @@ void initDispatchBindings(PyObject* module) {
      .def(
          "fallback_fallthrough",
          [](py::object self, const char* dispatch) {
-            TORCH_INTERNAL_ASSERT(isMainPyInterpreter());
            self.cast<torch::Library&>().fallback(
                dispatch_str(dispatch, CppFunction::makeFallthrough()));
            return self;
@ -480,7 +471,6 @@ void initDispatchBindings(PyObject* module) {
             bool with_keyset) {
            HANDLE_TH_ERRORS
            auto& lib = self.cast<torch::Library&>();
-            TORCH_INTERNAL_ASSERT(isMainPyInterpreter());
            if (func.is(py::module::import("torch.library")
                            .attr("fallthrough_kernel"))) {
              lib.fallback(
@ -913,8 +903,6 @@ void initDispatchBindings(PyObject* module) {
        handle.setReportErrorCallback_(std::move(callback_obj));
      });

-  m.def(
-      "_dispatch_is_main_interpreter", []() { return isMainPyInterpreter(); });
  m.def("_dispatch_pystub", [](const char* name, const char* overload) {
    return c10::Dispatcher::singleton().getPyStub(
        c10::OperatorName(name, overload));