mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[BE] Remove HermeticPyObjectTLS and Simplify PythonOpRegistrationTrampoline (#163464)
Removes HermeticPyObjectTLS as we no longer need since torch deploy is no longer supported. PythonOpRegistrationTrampoline is also drastically simplified as and being prepped for removal in a future PR. Pull Request resolved: https://github.com/pytorch/pytorch/pull/163464 Approved by: https://github.com/albanD, https://github.com/Skylion007
This commit is contained in:
@ -1,32 +1,22 @@
|
||||
#include <ATen/core/PythonOpRegistrationTrampoline.h>
|
||||
#include <c10/core/impl/PyInterpreterHooks.h>
|
||||
|
||||
// TODO: delete this
|
||||
namespace at::impl {
|
||||
|
||||
// The strategy is that all python interpreters attempt to register themselves
|
||||
// as the main interpreter, but only one wins. Only that interpreter is
|
||||
// allowed to interact with the C++ dispatcher. Furthermore, when we execute
|
||||
// logic on that interpreter, we do so hermetically, never setting pyobj field
|
||||
// on Tensor.
|
||||
|
||||
std::atomic<c10::impl::PyInterpreter*>
|
||||
PythonOpRegistrationTrampoline::interpreter_{nullptr};
|
||||
c10::impl::PyInterpreter* PythonOpRegistrationTrampoline::interpreter_ = nullptr;
|
||||
|
||||
c10::impl::PyInterpreter* PythonOpRegistrationTrampoline::getInterpreter() {
|
||||
return PythonOpRegistrationTrampoline::interpreter_.load();
|
||||
return c10::impl::getGlobalPyInterpreter();
|
||||
}
|
||||
|
||||
bool PythonOpRegistrationTrampoline::registerInterpreter(
|
||||
c10::impl::PyInterpreter* interp) {
|
||||
c10::impl::PyInterpreter* expected = nullptr;
|
||||
interpreter_.compare_exchange_strong(expected, interp);
|
||||
if (expected != nullptr) {
|
||||
// This is the second (or later) Python interpreter, which means we need
|
||||
// non-trivial hermetic PyObject TLS
|
||||
c10::impl::HermeticPyObjectTLS::init_state();
|
||||
if (interpreter_ != nullptr) {
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
interpreter_ = interp;
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace at::impl
|
||||
|
@ -2,19 +2,21 @@
|
||||
|
||||
#include <ATen/core/dispatch/Dispatcher.h>
|
||||
|
||||
// TODO: this can probably live in c10
|
||||
// TODO: We can get rid of this
|
||||
|
||||
|
||||
namespace at::impl {
|
||||
|
||||
// Manages the single Python interpreter instance for PyTorch.
|
||||
class TORCH_API PythonOpRegistrationTrampoline final {
|
||||
static std::atomic<c10::impl::PyInterpreter*> interpreter_;
|
||||
static c10::impl::PyInterpreter* interpreter_;
|
||||
|
||||
public:
|
||||
// Returns true if you successfully registered yourself (that means
|
||||
// you are in the hot seat for doing the operator registrations!)
|
||||
// Register the Python interpreter. Returns true on first registration,
|
||||
// false if an interpreter was already registered.
|
||||
static bool registerInterpreter(c10::impl::PyInterpreter*);
|
||||
|
||||
// Returns the registered interpreter via the global PyInterpreter hooks.
|
||||
// Returns nullptr if no interpreter has been registered yet.
|
||||
static c10::impl::PyInterpreter* getInterpreter();
|
||||
};
|
||||
|
@ -1,21 +0,0 @@
|
||||
#include <c10/core/impl/HermeticPyObjectTLS.h>
|
||||
|
||||
namespace c10::impl {
|
||||
|
||||
thread_local static std::atomic<bool> hermeticPyObjectState{false};
|
||||
|
||||
std::atomic<bool> HermeticPyObjectTLS::haveState_{false};
|
||||
|
||||
void HermeticPyObjectTLS::set_state(bool state) {
|
||||
hermeticPyObjectState = state;
|
||||
}
|
||||
|
||||
bool HermeticPyObjectTLS::get_tls_state() {
|
||||
return hermeticPyObjectState;
|
||||
}
|
||||
|
||||
void HermeticPyObjectTLS::init_state() {
|
||||
haveState_ = true;
|
||||
}
|
||||
|
||||
} // namespace c10::impl
|
@ -1,62 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <c10/macros/Export.h>
|
||||
#include <atomic>
|
||||
|
||||
namespace c10::impl {
|
||||
|
||||
// This TLS controls whether or not we permanently associate PyObject
|
||||
// with Tensor the first time it is allocated. When hermetic PyObject
|
||||
// TLS is enabled (state is true), we DO NOT save PyObjects to Tensor,
|
||||
// meaning you get a distinct PyObject whenever you execute the code in
|
||||
// question.
|
||||
struct C10_API HermeticPyObjectTLS {
|
||||
static void set_state(bool state);
|
||||
static bool get_state() {
|
||||
// Hypothetical fastpath if torchdeploy/multipy // codespell:ignore multipy
|
||||
// isn't used. Per
|
||||
// https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2055r0.pdf
|
||||
// this qualifies relaxed access because it is a single-location data
|
||||
// structure (only the boolean here).
|
||||
//
|
||||
// Forgetting about data races for a moment, is there a logical race?
|
||||
//
|
||||
// - Boolean only ever transitions from false to true. So the
|
||||
// critical situation is when one interpreter is already running
|
||||
// when a second interpreter switches haveState from false to true.
|
||||
//
|
||||
// - The first interpreter is indifferent whether or not it sees
|
||||
// hasState true/false; obviously false works (this is what the
|
||||
// interpreter was previously using; more directly, the interpreter
|
||||
// calls into itself as the handler, so being hermetic is not
|
||||
// required), and true simply means serviced python operator calls will
|
||||
// be hermetic; in these cases it is expected to be functionally
|
||||
// equivalent.
|
||||
//
|
||||
// - The second interpreter MUST see hasState true (as its requests will
|
||||
// be forwarded to the first interpreter), but it is assumed that there
|
||||
// is a synchronization between the interpreter initialization, and
|
||||
// when we actually perform operations, so it is guaranteed to see
|
||||
// hasState true.
|
||||
//
|
||||
// QED.
|
||||
//
|
||||
// This fastpath is currently disabled so that we can more easily test that
|
||||
// hermetic mode works correctly even on stock build of PyTorch.
|
||||
if (false && !haveState_.load(std::memory_order_relaxed))
|
||||
return false;
|
||||
return get_tls_state();
|
||||
}
|
||||
// Call this from the multipy/torchdeploy // codespell:ignore multipy
|
||||
// top level
|
||||
static void init_state();
|
||||
|
||||
private:
|
||||
// This only flipped once from false to true during
|
||||
// torchdeploy/multipy initialization, // codespell:ignore multipy
|
||||
// and never again.
|
||||
static std::atomic<bool> haveState_;
|
||||
static bool get_tls_state();
|
||||
};
|
||||
|
||||
} // namespace c10::impl
|
@ -1,6 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#include <c10/core/impl/HermeticPyObjectTLS.h>
|
||||
#include <c10/core/impl/PyInterpreter.h>
|
||||
#include <c10/core/impl/PyInterpreterHooks.h>
|
||||
#include <c10/util/python_stub.h>
|
||||
@ -42,32 +41,15 @@ struct C10_API PyObjectSlot {
|
||||
|
||||
PyObject* _unchecked_untagged_pyobj() const;
|
||||
|
||||
// Test the interpreter tag. If tagged for the current interpreter, return
|
||||
// a non-nullopt (but possibly null) PyObject. If (possibly) untagged,
|
||||
// returns a nullopt. If it is definitely invalid, raises an error.
|
||||
//
|
||||
// If `ignore_hermetic_tls` is false and this function is called from a
|
||||
// hermetic context (ie, `HermeticPyObjectTLS::get_state()` is true), then
|
||||
// nullopt is returned. If `ignore_hermetic_tls` is true, then the hermetic
|
||||
// context is ignored, allowing you to check the interpreter tag of a
|
||||
// nonhermetic PyObject from within a hermetic context. This is necessary
|
||||
// because there are some cases where the deallocator function of a
|
||||
// nonhermetic PyObject is called from within a hermetic context, so it must
|
||||
// be properly treated as a nonhermetic PyObject.
|
||||
//
|
||||
// Test the interpreter / PyObj as they may be null
|
||||
// NB: this lives in header so that we can avoid actually creating the
|
||||
// std::optional
|
||||
|
||||
// @todo alban: I'm not too sure what's going on here, we can probably delete
|
||||
// it but it's worthwhile making sure
|
||||
std::optional<PyObject*> check_pyobj() const {
|
||||
impl::PyInterpreter* interpreter = getGlobalPyInterpreter();
|
||||
if (interpreter == nullptr || pyobj_ == nullptr) {
|
||||
return std::nullopt;
|
||||
}
|
||||
if (c10::impl::HermeticPyObjectTLS::get_state()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return _unchecked_untagged_pyobj();
|
||||
}
|
||||
|
||||
|
@ -153,7 +153,6 @@ _ZN3c104impl12PyObjectSlot10owns_pyobjEv
|
||||
_ZN3c104impl12PyObjectSlot19maybe_destroy_pyobjEv
|
||||
_ZN3c104impl12PyObjectSlotC1Ev
|
||||
_ZN3c104impl12PyObjectSlotD2Ev
|
||||
_ZN3c104impl19HermeticPyObjectTLS13get_tls_stateEv
|
||||
_ZN3c104impl20TorchDispatchModeTLS13any_modes_setEb
|
||||
_ZN3c104impl23ExcludeDispatchKeyGuardC1ENS_14DispatchKeySetE
|
||||
_ZN3c104impl23ExcludeDispatchKeyGuardD2Ev
|
||||
|
@ -157,10 +157,10 @@ class PyInterpreterHolder {
|
||||
public:
|
||||
PyInterpreterHolder()
|
||||
: impl_(new c10::impl::PyInterpreter(
|
||||
ConcretePyInterpreterVTable::instance())),
|
||||
is_main_interpreter_(
|
||||
at::impl::PythonOpRegistrationTrampoline::registerInterpreter(
|
||||
impl_)) {}
|
||||
ConcretePyInterpreterVTable::instance())) {
|
||||
// Register the single interpreter
|
||||
at::impl::PythonOpRegistrationTrampoline::registerInterpreter(impl_);
|
||||
}
|
||||
PyInterpreterHolder(const PyInterpreterHolder&) = delete;
|
||||
PyInterpreterHolder(PyInterpreterHolder&&) = delete;
|
||||
PyInterpreterHolder& operator=(const PyInterpreterHolder&) = delete;
|
||||
@ -174,13 +174,14 @@ class PyInterpreterHolder {
|
||||
c10::impl::PyInterpreter* get() const noexcept {
|
||||
return impl_;
|
||||
}
|
||||
// In single-interpreter mode, this is always true
|
||||
// TODO: delete this
|
||||
bool is_main_interpreter() const noexcept {
|
||||
return is_main_interpreter_;
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
c10::impl::PyInterpreter* impl_;
|
||||
bool is_main_interpreter_;
|
||||
};
|
||||
|
||||
py::object torchDispatchFromTensorImpl(
|
||||
|
@ -74,13 +74,9 @@ PyObject* THPStorage_NewWithStorage(
|
||||
|
||||
s->cdata = c10::MaybeOwned<c10::Storage>::owned(std::move(_storage));
|
||||
|
||||
if (!c10::impl::HermeticPyObjectTLS::get_state()) {
|
||||
s->is_hermetic = false;
|
||||
const auto& storage = THPStorage_Unpack(s);
|
||||
storage.unsafeGetStorageImpl()->pyobj_slot()->init_pyobj(obj);
|
||||
} else {
|
||||
s->is_hermetic = true;
|
||||
}
|
||||
s->is_hermetic = false;
|
||||
const auto& storage = THPStorage_Unpack(s);
|
||||
storage.unsafeGetStorageImpl()->pyobj_slot()->init_pyobj(obj);
|
||||
|
||||
return obj;
|
||||
}
|
||||
@ -88,9 +84,6 @@ PyObject* THPStorage_NewWithStorage(
|
||||
// Wraps the c10::Storage with a storage PyObject
|
||||
PyObject* THPStorage_Wrap(c10::Storage storage) {
|
||||
c10::StorageImpl* storage_impl = storage.unsafeGetStorageImpl();
|
||||
if (c10::impl::HermeticPyObjectTLS::get_state()) {
|
||||
return THPStorage_NewWithStorage(THPStorageClass, std::move(storage));
|
||||
}
|
||||
c10::impl::PyObjectSlot* pyobj_slot = storage_impl->pyobj_slot();
|
||||
|
||||
std::optional<PyObject*> maybe_pyobj = pyobj_slot->check_pyobj();
|
||||
|
@ -1,7 +1,6 @@
|
||||
#include <ATen/NamedTensorUtils.h>
|
||||
#include <c10/core/DeviceType.h>
|
||||
#include <c10/core/impl/GPUTrace.h>
|
||||
#include <c10/core/impl/HermeticPyObjectTLS.h>
|
||||
#include <c10/core/impl/PythonDispatcherTLS.h>
|
||||
#include <c10/util/irange.h>
|
||||
#include <pybind11/pytypes.h>
|
||||
@ -260,10 +259,6 @@ PyObject* THPVariable_Wrap(const at::TensorBase& var) {
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
if (c10::impl::HermeticPyObjectTLS::get_state()) {
|
||||
return THPVariable_NewWithVar((PyTypeObject*)THPVariableClass, var);
|
||||
}
|
||||
|
||||
std::optional<PyObject*> mb_obj =
|
||||
var.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj();
|
||||
if (mb_obj.has_value()) {
|
||||
@ -379,7 +374,6 @@ static bool THPVariable_tryResurrect(THPVariable* self) {
|
||||
// Flip THPVariable to be non-owning
|
||||
// (near use-after-free miss here: fresh MaybeOwned is created breaking
|
||||
// reference on Tensor in struct BEFORE we overwrite the old one)
|
||||
TORCH_INTERNAL_ASSERT(!c10::impl::HermeticPyObjectTLS::get_state());
|
||||
self->cdata = MaybeOwned<Variable>::borrowed(tensor);
|
||||
|
||||
// NB: At this point, tensor *could* be dead (e.g., some other C++ thread
|
||||
@ -2474,28 +2468,13 @@ static PyObject* THPVariable_NewWithVar(
|
||||
auto v = (THPVariable*)obj;
|
||||
// TODO: named constructor to avoid default initialization
|
||||
new (&v->cdata) MaybeOwned<Variable>();
|
||||
if (c10::impl::HermeticPyObjectTLS::get_state()) {
|
||||
// Do NOT initialize pyobj field on the tensor, you own the C++
|
||||
v->cdata = MaybeOwned<Variable>::owned(Variable(_var));
|
||||
TORCH_INTERNAL_ASSERT(
|
||||
!check_has_torch_dispatch(obj),
|
||||
"While HermeticPyObject was enabled, we attempted to create a tensor "
|
||||
"subclass with __torch_dispatch__. This violates the invariant that "
|
||||
"operations in HermeticPyObject have equivalent C++ implementations. "
|
||||
"If your operator registered from Python operator registration isn't "
|
||||
"doing anything strange, there may be an internal PyTorch bug involving "
|
||||
"not appropriately disabling TorchDispatchMode before executing "
|
||||
"Python op registration.");
|
||||
} else {
|
||||
// Normal codepath
|
||||
v->cdata = MaybeOwned<Variable>::owned(Variable(_var));
|
||||
const auto& var = THPVariable_Unpack(v);
|
||||
var.unsafeGetTensorImpl()->pyobj_slot()->init_pyobj(obj);
|
||||
if (has_torch_dispatch_if_known.has_value()
|
||||
? *has_torch_dispatch_if_known
|
||||
: check_has_torch_dispatch(obj)) {
|
||||
var.unsafeGetTensorImpl()->set_python_dispatch(true);
|
||||
}
|
||||
v->cdata = MaybeOwned<Variable>::owned(Variable(_var));
|
||||
const auto& var = THPVariable_Unpack(v);
|
||||
var.unsafeGetTensorImpl()->pyobj_slot()->init_pyobj(obj);
|
||||
if (has_torch_dispatch_if_known.has_value()
|
||||
? *has_torch_dispatch_if_known
|
||||
: check_has_torch_dispatch(obj)) {
|
||||
var.unsafeGetTensorImpl()->set_python_dispatch(true);
|
||||
}
|
||||
}
|
||||
return obj;
|
||||
|
@ -81,40 +81,6 @@ inline static torch::CppFunction dispatch_str(const char* key, Func&& raw_f) {
|
||||
}
|
||||
}
|
||||
|
||||
struct EnableHermeticPyObject {
|
||||
EnableHermeticPyObject()
|
||||
: old_(c10::impl::HermeticPyObjectTLS::get_state()),
|
||||
old_excluded_python_(
|
||||
c10::impl::tls_is_dispatch_key_excluded(at::DispatchKey::Python)),
|
||||
old_python_(
|
||||
c10::impl::tls_is_dispatch_key_included(at::DispatchKey::Python)),
|
||||
old_python_snapshot_(c10::impl::tls_is_dispatch_key_included(
|
||||
at::DispatchKey::PythonTLSSnapshot)) {
|
||||
c10::impl::HermeticPyObjectTLS::set_state(true);
|
||||
c10::impl::tls_set_dispatch_key_excluded(at::DispatchKey::Python, true);
|
||||
c10::impl::tls_set_dispatch_key_included(at::DispatchKey::Python, false);
|
||||
c10::impl::tls_set_dispatch_key_included(
|
||||
at::DispatchKey::PythonTLSSnapshot, false);
|
||||
}
|
||||
~EnableHermeticPyObject() {
|
||||
c10::impl::HermeticPyObjectTLS::set_state(old_);
|
||||
c10::impl::tls_set_dispatch_key_excluded(
|
||||
at::DispatchKey::Python, old_excluded_python_);
|
||||
c10::impl::tls_set_dispatch_key_included(
|
||||
at::DispatchKey::Python, old_python_);
|
||||
c10::impl::tls_set_dispatch_key_included(
|
||||
at::DispatchKey::PythonTLSSnapshot, old_python_snapshot_);
|
||||
}
|
||||
EnableHermeticPyObject(const EnableHermeticPyObject&) = delete;
|
||||
EnableHermeticPyObject(EnableHermeticPyObject&&) = delete;
|
||||
EnableHermeticPyObject& operator=(const EnableHermeticPyObject&) = delete;
|
||||
EnableHermeticPyObject& operator=(EnableHermeticPyObject&&) = delete;
|
||||
bool old_;
|
||||
bool old_excluded_python_;
|
||||
bool old_python_;
|
||||
bool old_python_snapshot_;
|
||||
};
|
||||
|
||||
class PythonKernelHolder : public c10::OperatorKernel {
|
||||
c10::SafePyObject func_;
|
||||
c10::DispatchKey dispatch_key_;
|
||||
|
Reference in New Issue
Block a user