[BE] Remove HermeticPyObjectTLS and Simplify PythonOpRegistrationTrampoline (#163464)

Removes HermeticPyObjectTLS as we no longer need since torch deploy is no longer supported. PythonOpRegistrationTrampoline is also drastically simplified as and being prepped for removal in a future PR.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/163464
Approved by: https://github.com/albanD, https://github.com/Skylion007
This commit is contained in:
PaliC
2025-09-25 22:11:36 +09:00
committed by PyTorch MergeBot
parent c58e096cd0
commit 94195a37ae
10 changed files with 31 additions and 202 deletions

View File

@ -1,32 +1,22 @@
#include <ATen/core/PythonOpRegistrationTrampoline.h>
#include <c10/core/impl/PyInterpreterHooks.h>
// TODO: delete this
namespace at::impl {
// The strategy is that all python interpreters attempt to register themselves
// as the main interpreter, but only one wins. Only that interpreter is
// allowed to interact with the C++ dispatcher. Furthermore, when we execute
// logic on that interpreter, we do so hermetically, never setting pyobj field
// on Tensor.
std::atomic<c10::impl::PyInterpreter*>
PythonOpRegistrationTrampoline::interpreter_{nullptr};
c10::impl::PyInterpreter* PythonOpRegistrationTrampoline::interpreter_ = nullptr;
c10::impl::PyInterpreter* PythonOpRegistrationTrampoline::getInterpreter() {
return PythonOpRegistrationTrampoline::interpreter_.load();
return c10::impl::getGlobalPyInterpreter();
}
bool PythonOpRegistrationTrampoline::registerInterpreter(
c10::impl::PyInterpreter* interp) {
c10::impl::PyInterpreter* expected = nullptr;
interpreter_.compare_exchange_strong(expected, interp);
if (expected != nullptr) {
// This is the second (or later) Python interpreter, which means we need
// non-trivial hermetic PyObject TLS
c10::impl::HermeticPyObjectTLS::init_state();
if (interpreter_ != nullptr) {
return false;
} else {
return true;
}
interpreter_ = interp;
return true;
}
} // namespace at::impl

View File

@ -2,19 +2,21 @@
#include <ATen/core/dispatch/Dispatcher.h>
// TODO: this can probably live in c10
// TODO: We can get rid of this
namespace at::impl {
// Manages the single Python interpreter instance for PyTorch.
class TORCH_API PythonOpRegistrationTrampoline final {
static std::atomic<c10::impl::PyInterpreter*> interpreter_;
static c10::impl::PyInterpreter* interpreter_;
public:
// Returns true if you successfully registered yourself (that means
// you are in the hot seat for doing the operator registrations!)
// Register the Python interpreter. Returns true on first registration,
// false if an interpreter was already registered.
static bool registerInterpreter(c10::impl::PyInterpreter*);
// Returns the registered interpreter via the global PyInterpreter hooks.
// Returns nullptr if no interpreter has been registered yet.
static c10::impl::PyInterpreter* getInterpreter();
};

View File

@ -1,21 +0,0 @@
#include <c10/core/impl/HermeticPyObjectTLS.h>
namespace c10::impl {
thread_local static std::atomic<bool> hermeticPyObjectState{false};
std::atomic<bool> HermeticPyObjectTLS::haveState_{false};
void HermeticPyObjectTLS::set_state(bool state) {
hermeticPyObjectState = state;
}
bool HermeticPyObjectTLS::get_tls_state() {
return hermeticPyObjectState;
}
void HermeticPyObjectTLS::init_state() {
haveState_ = true;
}
} // namespace c10::impl

View File

@ -1,62 +0,0 @@
#pragma once
#include <c10/macros/Export.h>
#include <atomic>
namespace c10::impl {
// This TLS controls whether or not we permanently associate PyObject
// with Tensor the first time it is allocated. When hermetic PyObject
// TLS is enabled (state is true), we DO NOT save PyObjects to Tensor,
// meaning you get a distinct PyObject whenever you execute the code in
// question.
struct C10_API HermeticPyObjectTLS {
static void set_state(bool state);
static bool get_state() {
// Hypothetical fastpath if torchdeploy/multipy // codespell:ignore multipy
// isn't used. Per
// https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2055r0.pdf
// this qualifies relaxed access because it is a single-location data
// structure (only the boolean here).
//
// Forgetting about data races for a moment, is there a logical race?
//
// - Boolean only ever transitions from false to true. So the
// critical situation is when one interpreter is already running
// when a second interpreter switches haveState from false to true.
//
// - The first interpreter is indifferent whether or not it sees
// hasState true/false; obviously false works (this is what the
// interpreter was previously using; more directly, the interpreter
// calls into itself as the handler, so being hermetic is not
// required), and true simply means serviced python operator calls will
// be hermetic; in these cases it is expected to be functionally
// equivalent.
//
// - The second interpreter MUST see hasState true (as its requests will
// be forwarded to the first interpreter), but it is assumed that there
// is a synchronization between the interpreter initialization, and
// when we actually perform operations, so it is guaranteed to see
// hasState true.
//
// QED.
//
// This fastpath is currently disabled so that we can more easily test that
// hermetic mode works correctly even on stock build of PyTorch.
if (false && !haveState_.load(std::memory_order_relaxed))
return false;
return get_tls_state();
}
// Call this from the multipy/torchdeploy // codespell:ignore multipy
// top level
static void init_state();
private:
// This only flipped once from false to true during
// torchdeploy/multipy initialization, // codespell:ignore multipy
// and never again.
static std::atomic<bool> haveState_;
static bool get_tls_state();
};
} // namespace c10::impl

View File

@ -1,6 +1,5 @@
#pragma once
#include <c10/core/impl/HermeticPyObjectTLS.h>
#include <c10/core/impl/PyInterpreter.h>
#include <c10/core/impl/PyInterpreterHooks.h>
#include <c10/util/python_stub.h>
@ -42,32 +41,15 @@ struct C10_API PyObjectSlot {
PyObject* _unchecked_untagged_pyobj() const;
// Test the interpreter tag. If tagged for the current interpreter, return
// a non-nullopt (but possibly null) PyObject. If (possibly) untagged,
// returns a nullopt. If it is definitely invalid, raises an error.
//
// If `ignore_hermetic_tls` is false and this function is called from a
// hermetic context (ie, `HermeticPyObjectTLS::get_state()` is true), then
// nullopt is returned. If `ignore_hermetic_tls` is true, then the hermetic
// context is ignored, allowing you to check the interpreter tag of a
// nonhermetic PyObject from within a hermetic context. This is necessary
// because there are some cases where the deallocator function of a
// nonhermetic PyObject is called from within a hermetic context, so it must
// be properly treated as a nonhermetic PyObject.
//
// Test the interpreter / PyObj as they may be null
// NB: this lives in header so that we can avoid actually creating the
// std::optional
// @todo alban: I'm not too sure what's going on here, we can probably delete
// it but it's worthwhile making sure
std::optional<PyObject*> check_pyobj() const {
impl::PyInterpreter* interpreter = getGlobalPyInterpreter();
if (interpreter == nullptr || pyobj_ == nullptr) {
return std::nullopt;
}
if (c10::impl::HermeticPyObjectTLS::get_state()) {
return std::nullopt;
}
return _unchecked_untagged_pyobj();
}

View File

@ -153,7 +153,6 @@ _ZN3c104impl12PyObjectSlot10owns_pyobjEv
_ZN3c104impl12PyObjectSlot19maybe_destroy_pyobjEv
_ZN3c104impl12PyObjectSlotC1Ev
_ZN3c104impl12PyObjectSlotD2Ev
_ZN3c104impl19HermeticPyObjectTLS13get_tls_stateEv
_ZN3c104impl20TorchDispatchModeTLS13any_modes_setEb
_ZN3c104impl23ExcludeDispatchKeyGuardC1ENS_14DispatchKeySetE
_ZN3c104impl23ExcludeDispatchKeyGuardD2Ev

View File

@ -157,10 +157,10 @@ class PyInterpreterHolder {
public:
PyInterpreterHolder()
: impl_(new c10::impl::PyInterpreter(
ConcretePyInterpreterVTable::instance())),
is_main_interpreter_(
at::impl::PythonOpRegistrationTrampoline::registerInterpreter(
impl_)) {}
ConcretePyInterpreterVTable::instance())) {
// Register the single interpreter
at::impl::PythonOpRegistrationTrampoline::registerInterpreter(impl_);
}
PyInterpreterHolder(const PyInterpreterHolder&) = delete;
PyInterpreterHolder(PyInterpreterHolder&&) = delete;
PyInterpreterHolder& operator=(const PyInterpreterHolder&) = delete;
@ -174,13 +174,14 @@ class PyInterpreterHolder {
c10::impl::PyInterpreter* get() const noexcept {
return impl_;
}
// In single-interpreter mode, this is always true
// TODO: delete this
bool is_main_interpreter() const noexcept {
return is_main_interpreter_;
return true;
}
private:
c10::impl::PyInterpreter* impl_;
bool is_main_interpreter_;
};
py::object torchDispatchFromTensorImpl(

View File

@ -74,13 +74,9 @@ PyObject* THPStorage_NewWithStorage(
s->cdata = c10::MaybeOwned<c10::Storage>::owned(std::move(_storage));
if (!c10::impl::HermeticPyObjectTLS::get_state()) {
s->is_hermetic = false;
const auto& storage = THPStorage_Unpack(s);
storage.unsafeGetStorageImpl()->pyobj_slot()->init_pyobj(obj);
} else {
s->is_hermetic = true;
}
s->is_hermetic = false;
const auto& storage = THPStorage_Unpack(s);
storage.unsafeGetStorageImpl()->pyobj_slot()->init_pyobj(obj);
return obj;
}
@ -88,9 +84,6 @@ PyObject* THPStorage_NewWithStorage(
// Wraps the c10::Storage with a storage PyObject
PyObject* THPStorage_Wrap(c10::Storage storage) {
c10::StorageImpl* storage_impl = storage.unsafeGetStorageImpl();
if (c10::impl::HermeticPyObjectTLS::get_state()) {
return THPStorage_NewWithStorage(THPStorageClass, std::move(storage));
}
c10::impl::PyObjectSlot* pyobj_slot = storage_impl->pyobj_slot();
std::optional<PyObject*> maybe_pyobj = pyobj_slot->check_pyobj();

View File

@ -1,7 +1,6 @@
#include <ATen/NamedTensorUtils.h>
#include <c10/core/DeviceType.h>
#include <c10/core/impl/GPUTrace.h>
#include <c10/core/impl/HermeticPyObjectTLS.h>
#include <c10/core/impl/PythonDispatcherTLS.h>
#include <c10/util/irange.h>
#include <pybind11/pytypes.h>
@ -260,10 +259,6 @@ PyObject* THPVariable_Wrap(const at::TensorBase& var) {
Py_RETURN_NONE;
}
if (c10::impl::HermeticPyObjectTLS::get_state()) {
return THPVariable_NewWithVar((PyTypeObject*)THPVariableClass, var);
}
std::optional<PyObject*> mb_obj =
var.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj();
if (mb_obj.has_value()) {
@ -379,7 +374,6 @@ static bool THPVariable_tryResurrect(THPVariable* self) {
// Flip THPVariable to be non-owning
// (near use-after-free miss here: fresh MaybeOwned is created breaking
// reference on Tensor in struct BEFORE we overwrite the old one)
TORCH_INTERNAL_ASSERT(!c10::impl::HermeticPyObjectTLS::get_state());
self->cdata = MaybeOwned<Variable>::borrowed(tensor);
// NB: At this point, tensor *could* be dead (e.g., some other C++ thread
@ -2474,28 +2468,13 @@ static PyObject* THPVariable_NewWithVar(
auto v = (THPVariable*)obj;
// TODO: named constructor to avoid default initialization
new (&v->cdata) MaybeOwned<Variable>();
if (c10::impl::HermeticPyObjectTLS::get_state()) {
// Do NOT initialize pyobj field on the tensor, you own the C++
v->cdata = MaybeOwned<Variable>::owned(Variable(_var));
TORCH_INTERNAL_ASSERT(
!check_has_torch_dispatch(obj),
"While HermeticPyObject was enabled, we attempted to create a tensor "
"subclass with __torch_dispatch__. This violates the invariant that "
"operations in HermeticPyObject have equivalent C++ implementations. "
"If your operator registered from Python operator registration isn't "
"doing anything strange, there may be an internal PyTorch bug involving "
"not appropriately disabling TorchDispatchMode before executing "
"Python op registration.");
} else {
// Normal codepath
v->cdata = MaybeOwned<Variable>::owned(Variable(_var));
const auto& var = THPVariable_Unpack(v);
var.unsafeGetTensorImpl()->pyobj_slot()->init_pyobj(obj);
if (has_torch_dispatch_if_known.has_value()
? *has_torch_dispatch_if_known
: check_has_torch_dispatch(obj)) {
var.unsafeGetTensorImpl()->set_python_dispatch(true);
}
v->cdata = MaybeOwned<Variable>::owned(Variable(_var));
const auto& var = THPVariable_Unpack(v);
var.unsafeGetTensorImpl()->pyobj_slot()->init_pyobj(obj);
if (has_torch_dispatch_if_known.has_value()
? *has_torch_dispatch_if_known
: check_has_torch_dispatch(obj)) {
var.unsafeGetTensorImpl()->set_python_dispatch(true);
}
}
return obj;

View File

@ -81,40 +81,6 @@ inline static torch::CppFunction dispatch_str(const char* key, Func&& raw_f) {
}
}
struct EnableHermeticPyObject {
EnableHermeticPyObject()
: old_(c10::impl::HermeticPyObjectTLS::get_state()),
old_excluded_python_(
c10::impl::tls_is_dispatch_key_excluded(at::DispatchKey::Python)),
old_python_(
c10::impl::tls_is_dispatch_key_included(at::DispatchKey::Python)),
old_python_snapshot_(c10::impl::tls_is_dispatch_key_included(
at::DispatchKey::PythonTLSSnapshot)) {
c10::impl::HermeticPyObjectTLS::set_state(true);
c10::impl::tls_set_dispatch_key_excluded(at::DispatchKey::Python, true);
c10::impl::tls_set_dispatch_key_included(at::DispatchKey::Python, false);
c10::impl::tls_set_dispatch_key_included(
at::DispatchKey::PythonTLSSnapshot, false);
}
~EnableHermeticPyObject() {
c10::impl::HermeticPyObjectTLS::set_state(old_);
c10::impl::tls_set_dispatch_key_excluded(
at::DispatchKey::Python, old_excluded_python_);
c10::impl::tls_set_dispatch_key_included(
at::DispatchKey::Python, old_python_);
c10::impl::tls_set_dispatch_key_included(
at::DispatchKey::PythonTLSSnapshot, old_python_snapshot_);
}
EnableHermeticPyObject(const EnableHermeticPyObject&) = delete;
EnableHermeticPyObject(EnableHermeticPyObject&&) = delete;
EnableHermeticPyObject& operator=(const EnableHermeticPyObject&) = delete;
EnableHermeticPyObject& operator=(EnableHermeticPyObject&&) = delete;
bool old_;
bool old_excluded_python_;
bool old_python_;
bool old_python_snapshot_;
};
class PythonKernelHolder : public c10::OperatorKernel {
c10::SafePyObject func_;
c10::DispatchKey dispatch_key_;