mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[BE] Make PyObjectSlot use a global PyInterpreter (#162659)
This pr gets rid of the pyobj_interpreter_ variable from PyObjectSlot and saves a word in the process Gonna ask for review from @huydhn as there are some changes to CI. Testing: imported internally and the failed android build seems to work now! Pull Request resolved: https://github.com/pytorch/pytorch/pull/162659 Approved by: https://github.com/albanD, https://github.com/huydhn
This commit is contained in:
2
.github/workflows/pull.yml
vendored
2
.github/workflows/pull.yml
vendored
@ -127,6 +127,8 @@ jobs:
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
# More memory is needed to build with asan
|
||||
runner: linux.2xlarge.memory
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build-environment: linux-jammy-py3.10-clang18-asan
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-py3-clang18-asan
|
||||
|
2
.github/workflows/slow.yml
vendored
2
.github/workflows/slow.yml
vendored
@ -140,6 +140,8 @@ jobs:
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
# More memory is needed to build with asan
|
||||
runner: linux.2xlarge.memory
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build-environment: linux-jammy-py3.10-clang18-asan
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-py3-clang18-asan
|
||||
|
@ -3244,7 +3244,7 @@ class C10_TensorImpl_Size_Check_Dummy_Class : private TensorImpl {
|
||||
are_equal<sizeof(autograd_meta_), 4, FieldNameEnum::autograd_meta_>();
|
||||
are_equal<sizeof(extra_meta_), 4, FieldNameEnum::extra_meta_>();
|
||||
are_equal<sizeof(version_counter_), 4, FieldNameEnum::version_counter_>();
|
||||
are_equal<sizeof(pyobj_slot_), 8, FieldNameEnum::pyobj_slot_>();
|
||||
are_equal<sizeof(pyobj_slot_), 4, FieldNameEnum::pyobj_slot_>();
|
||||
is_le<sizeof(sizes_and_strides_), 88, FieldNameEnum::sizes_and_strides_>();
|
||||
are_equal<sizeof(storage_offset_), 8, FieldNameEnum::storage_offset_>();
|
||||
are_equal<sizeof(numel_), 8, FieldNameEnum::numel_>();
|
||||
@ -3269,7 +3269,7 @@ class C10_TensorImpl_Size_Check_Dummy_Class : private TensorImpl {
|
||||
is_le<sizeof(autograd_meta_), 16, FieldNameEnum::autograd_meta_>();
|
||||
is_le<sizeof(extra_meta_), 16, FieldNameEnum::extra_meta_>();
|
||||
are_equal<sizeof(version_counter_), 8, FieldNameEnum::version_counter_>();
|
||||
are_equal<sizeof(pyobj_slot_), 16, FieldNameEnum::pyobj_slot_>();
|
||||
are_equal<sizeof(pyobj_slot_), 8, FieldNameEnum::pyobj_slot_>();
|
||||
are_equal<sizeof(sizes_and_strides_), 88, FieldNameEnum::sizes_and_strides_>();
|
||||
are_equal<sizeof(storage_offset_), 8, FieldNameEnum::storage_offset_>();
|
||||
are_equal<sizeof(numel_), 8, FieldNameEnum::numel_>();
|
||||
|
@ -13,11 +13,10 @@ struct C10_API PyInterpreterHooksInterface {
|
||||
|
||||
// Get the PyInterpreter instance
|
||||
// Stub implementation throws error when Python is not available
|
||||
// We return nullptr rather than throwing an error since there are bits of c10
|
||||
// that expect an empty PyObjectSlot when python is not available.
|
||||
virtual PyInterpreter* getPyInterpreter() const {
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
"PyTorch was compiled without Python support. "
|
||||
"Cannot access Python interpreter from C++.");
|
||||
return nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
namespace c10::impl {
|
||||
|
||||
PyObjectSlot::PyObjectSlot() : pyobj_interpreter_(nullptr), pyobj_(nullptr) {}
|
||||
PyObjectSlot::PyObjectSlot() : pyobj_(nullptr) {}
|
||||
|
||||
PyObjectSlot::~PyObjectSlot() {
|
||||
maybe_destroy_pyobj();
|
||||
@ -10,9 +10,9 @@ PyObjectSlot::~PyObjectSlot() {
|
||||
|
||||
void PyObjectSlot::maybe_destroy_pyobj() {
|
||||
if (owns_pyobj()) {
|
||||
TORCH_INTERNAL_ASSERT(pyobj_interpreter_ != nullptr);
|
||||
TORCH_INTERNAL_ASSERT(getGlobalPyInterpreter() != nullptr);
|
||||
TORCH_INTERNAL_ASSERT(pyobj_ != nullptr);
|
||||
(*pyobj_interpreter_.load(std::memory_order_acquire))
|
||||
(*getGlobalPyInterpreter())
|
||||
->decref(_unchecked_untagged_pyobj(), /*has_pyobj_slot*/ true);
|
||||
// NB: this destructor can only be entered when there are no
|
||||
// references to this C++ object (obviously), NOR any references
|
||||
@ -25,7 +25,7 @@ void PyObjectSlot::maybe_destroy_pyobj() {
|
||||
}
|
||||
|
||||
PyInterpreter* PyObjectSlot::pyobj_interpreter() {
|
||||
return pyobj_interpreter_.load(std::memory_order_acquire);
|
||||
return getGlobalPyInterpreter();
|
||||
}
|
||||
|
||||
PyObject* PyObjectSlot::_unchecked_untagged_pyobj() const {
|
||||
@ -35,7 +35,7 @@ PyObject* PyObjectSlot::_unchecked_untagged_pyobj() const {
|
||||
}
|
||||
|
||||
PyInterpreter& PyObjectSlot::load_pyobj_interpreter() const {
|
||||
auto interpreter = pyobj_interpreter_.load(std::memory_order_acquire);
|
||||
auto interpreter = getGlobalPyInterpreter();
|
||||
if (interpreter) {
|
||||
return *interpreter;
|
||||
}
|
||||
|
@ -6,10 +6,17 @@
|
||||
#include <c10/util/python_stub.h>
|
||||
#include <optional>
|
||||
|
||||
#include <atomic>
|
||||
|
||||
namespace c10::impl {
|
||||
|
||||
// Function pointer type for getting the global interpreter
|
||||
using GetPyInterpreterFn = PyInterpreter* (*)();
|
||||
|
||||
// Global function pointer (set by csrc initialization)
|
||||
C10_API extern GetPyInterpreterFn g_get_pyinterpreter_fn;
|
||||
|
||||
// Helper function to get the global interpreter
|
||||
C10_API PyInterpreter* getGlobalPyInterpreter();
|
||||
|
||||
struct C10_API PyObjectSlot {
|
||||
public:
|
||||
PyObjectSlot();
|
||||
@ -26,8 +33,6 @@ struct C10_API PyObjectSlot {
|
||||
// NB: THIS FUNCTION CAN RAISE AN EXCEPTION. Make sure to clean up after
|
||||
// PyObject if necessary!
|
||||
void init_pyobj(PyObject* pyobj) {
|
||||
pyobj_interpreter_.store(
|
||||
getGlobalPyInterpreter(), std::memory_order_relaxed);
|
||||
pyobj_ = pyobj;
|
||||
}
|
||||
|
||||
@ -55,19 +60,16 @@ struct C10_API PyObjectSlot {
|
||||
|
||||
// @todo alban: I'm not too sure what's going on here, we can probably delete
|
||||
// it but it's worthwhile making sure
|
||||
std::optional<PyObject*> check_pyobj(bool ignore_hermetic_tls = false) const {
|
||||
impl::PyInterpreter* interpreter =
|
||||
pyobj_interpreter_.load(std::memory_order_acquire);
|
||||
if (interpreter == nullptr) {
|
||||
std::optional<PyObject*> check_pyobj() const {
|
||||
impl::PyInterpreter* interpreter = getGlobalPyInterpreter();
|
||||
if (interpreter == nullptr || pyobj_ == nullptr) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
if (!ignore_hermetic_tls && c10::impl::HermeticPyObjectTLS::get_state()) {
|
||||
if (c10::impl::HermeticPyObjectTLS::get_state()) {
|
||||
return std::nullopt;
|
||||
} else {
|
||||
}
|
||||
return _unchecked_untagged_pyobj();
|
||||
}
|
||||
}
|
||||
|
||||
PyInterpreter& load_pyobj_interpreter() const;
|
||||
|
||||
@ -76,30 +78,6 @@ struct C10_API PyObjectSlot {
|
||||
void set_owns_pyobj(bool b);
|
||||
|
||||
private:
|
||||
// This field contains the interpreter tag for this object. See
|
||||
// Note [Python interpreter tag] for general context
|
||||
//
|
||||
// Note [Memory ordering on Python interpreter tag]
|
||||
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
// What memory_order do we need when accessing this atomic? We don't
|
||||
// need a single total modification order (as provided by
|
||||
// memory_order_seq_cst) as pyobj_interpreter_ is monotonic: it can only
|
||||
// transition from -1 to some positive integer and never changes afterwards.
|
||||
// Because there is only one modification, it trivially already has a total
|
||||
// modification order (e.g., we don't need fences or locked instructions on
|
||||
// x86)
|
||||
//
|
||||
// In fact, one could make a reasonable argument that relaxed reads are OK,
|
||||
// due to the presence of external locking (GIL) to ensure that interactions
|
||||
// with other data structures are still correctly synchronized, so that
|
||||
// we fall in the "Single-Location Data Structures" case as described in
|
||||
// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2055r0.pdf
|
||||
// However, on x86, it doesn't matter if I use acquire or relaxed on the load
|
||||
// as I get the same assembly in both cases. So I just use the more
|
||||
// conservative acquire (which will impede compiler optimizations but I don't
|
||||
// care)
|
||||
std::atomic<PyInterpreter*> pyobj_interpreter_;
|
||||
|
||||
// This field contains a reference to a PyObject representing this Tensor.
|
||||
// If pyobj is nullptr, when we transfer Tensor to Python, we allocate a new
|
||||
// PyObject for it and set this field. This field does not have to be
|
||||
|
@ -404,11 +404,9 @@ static PyObject* THPModule_swap_tensor_impl(PyObject* _unused, PyObject* args) {
|
||||
// The TensorImpls contain PyObjectSlots that have a reference to the PyObject
|
||||
// associated with the TensorImpl. Swap this field as well.
|
||||
std::optional<PyObject*> mb_obj_a =
|
||||
a->cdata->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(
|
||||
/*ignore_hermetic_tls=*/false);
|
||||
a->cdata->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj();
|
||||
std::optional<PyObject*> mb_obj_b =
|
||||
b->cdata->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(
|
||||
/*ignore_hermetic_tls=*/false);
|
||||
b->cdata->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj();
|
||||
TORCH_INTERNAL_ASSERT(
|
||||
mb_obj_a.has_value() && mb_obj_b.has_value(),
|
||||
"Both tensors should have PyObjects tagged by the current python interpreter");
|
||||
|
@ -614,8 +614,7 @@ static void set_tensor_attr_with_capsule(
|
||||
const c10::TensorImpl* tensor,
|
||||
py::capsule& capsule,
|
||||
const char* attr_name) {
|
||||
std::optional<PyObject*> mb_obj = tensor->pyobj_slot()->check_pyobj(
|
||||
/*ignore_hermetic_tls=*/false);
|
||||
std::optional<PyObject*> mb_obj = tensor->pyobj_slot()->check_pyobj();
|
||||
TORCH_CHECK(
|
||||
mb_obj.has_value(), "Tensor subclass's PyInterpreter has no value");
|
||||
auto obj = mb_obj.value();
|
||||
@ -642,8 +641,7 @@ static c10::ArrayRef<T> get_set_cached_attr(
|
||||
const c10::TensorImpl* tensor,
|
||||
const char* base_attr_name,
|
||||
const py::object& obj) {
|
||||
std::optional<PyObject*> mb_obj =
|
||||
tensor->pyobj_slot()->check_pyobj(getPyInterpreter());
|
||||
std::optional<PyObject*> mb_obj = tensor->pyobj_slot()->check_pyobj();
|
||||
TORCH_CHECK(
|
||||
mb_obj.has_value(), "Tensor subclass's PyInterpreter has no value");
|
||||
auto tensor_obj = mb_obj.value();
|
||||
|
@ -41,8 +41,8 @@ PyObject* THPStorage_NewWithStorage(
|
||||
"Creating a Storage subclass from a class that does not inherit from ",
|
||||
"Storage is not possible. Make sure your class inherits from Storage.");
|
||||
|
||||
auto maybe_pyobj = _storage.unsafeGetStorageImpl()->pyobj_slot()->check_pyobj(
|
||||
/*ignore_hermetic_tls=*/false);
|
||||
auto maybe_pyobj =
|
||||
_storage.unsafeGetStorageImpl()->pyobj_slot()->check_pyobj();
|
||||
if (maybe_pyobj.has_value() && maybe_pyobj.value()) {
|
||||
TORCH_CHECK(
|
||||
allow_preexisting_pyobj,
|
||||
@ -93,8 +93,7 @@ PyObject* THPStorage_Wrap(c10::Storage storage) {
|
||||
}
|
||||
c10::impl::PyObjectSlot* pyobj_slot = storage_impl->pyobj_slot();
|
||||
|
||||
std::optional<PyObject*> maybe_pyobj = pyobj_slot->check_pyobj(
|
||||
/*ignore_hermetic_tls=*/false);
|
||||
std::optional<PyObject*> maybe_pyobj = pyobj_slot->check_pyobj();
|
||||
if (maybe_pyobj.has_value()) {
|
||||
auto obj = *maybe_pyobj;
|
||||
if (obj) {
|
||||
@ -127,8 +126,8 @@ static bool THPStorage_isPreservable(THPStorage* self) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (storage.unsafeGetStorageImpl()->pyobj_slot()->check_pyobj(
|
||||
/*ignore_hermetic_tls=*/true) != (PyObject*)self) {
|
||||
if (storage.unsafeGetStorageImpl()->pyobj_slot()->check_pyobj() !=
|
||||
(PyObject*)self) {
|
||||
return false;
|
||||
}
|
||||
if (storage.use_count() <= 1) {
|
||||
@ -145,8 +144,7 @@ static bool THPStorage_tryPreserve(THPStorage* self) {
|
||||
const auto& storage = THPStorage_Unpack(self);
|
||||
c10::StorageImpl* storage_impl = storage.unsafeGetStorageImpl();
|
||||
|
||||
auto maybe_pyobj = storage_impl->pyobj_slot()->check_pyobj(
|
||||
/*ignore_hermetic_tls=*/true);
|
||||
auto maybe_pyobj = storage_impl->pyobj_slot()->check_pyobj();
|
||||
// NOTE: It is possible to just set the PyObjectSlot here, but the point is
|
||||
// that we should have already set PyObjectSlot when the storage PyObject
|
||||
// was created.
|
||||
|
@ -265,8 +265,7 @@ PyObject* THPVariable_Wrap(const at::TensorBase& var) {
|
||||
}
|
||||
|
||||
std::optional<PyObject*> mb_obj =
|
||||
var.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(
|
||||
/*ignore_hermetic_tls=*/false);
|
||||
var.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj();
|
||||
if (mb_obj.has_value()) {
|
||||
auto obj = *mb_obj;
|
||||
if (obj) {
|
||||
@ -329,8 +328,8 @@ static bool isResurrectable(THPVariable* self) {
|
||||
return false;
|
||||
}
|
||||
// Check if this is hermetic. If it is, no resurrection.
|
||||
if (tensor.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(
|
||||
/*ignore_hermetic_tls=*/false) != (PyObject*)self) {
|
||||
if (tensor.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj() !=
|
||||
(PyObject*)self) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@ -355,8 +354,7 @@ static bool THPVariable_tryResurrect(THPVariable* self) {
|
||||
!tensor.unsafeGetTensorImpl()->pyobj_slot()->owns_pyobj());
|
||||
|
||||
c10::TensorImpl* tensor_impl = tensor.unsafeGetTensorImpl();
|
||||
auto maybe_pyobj = tensor_impl->pyobj_slot()->check_pyobj(
|
||||
/*ignore_hermetic_tls=*/false);
|
||||
auto maybe_pyobj = tensor_impl->pyobj_slot()->check_pyobj();
|
||||
|
||||
TORCH_INTERNAL_ASSERT(
|
||||
maybe_pyobj.has_value(),
|
||||
@ -2223,8 +2221,8 @@ static int THPVariable_subclass_clear(THPVariable* self) {
|
||||
// because Tensor asked us to (it's already destructing).
|
||||
|
||||
if (!self->cdata.unsafeIsBorrowed() &&
|
||||
tensor.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(
|
||||
/*ignore_hermetic_tls=*/false) == (PyObject*)self) {
|
||||
tensor.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj() ==
|
||||
(PyObject*)self) {
|
||||
// TODO: empirically, on OS X this assert appears to be untrue
|
||||
// In test_py_tensors_multi_async_call - ProcessGroupRpcTestWithSpawn
|
||||
// distributed/rpc/test_process_group_agent.py
|
||||
@ -2410,8 +2408,7 @@ static PyObject* THPVariable_NewWithVar(
|
||||
|
||||
// This function overwrite the Tensor's pyobj field without extra checks
|
||||
// Make sure it is not set otherwise we would leak memory
|
||||
auto mb_obj = _var.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(
|
||||
/*ignore_hermetic_tls=*/false);
|
||||
auto mb_obj = _var.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj();
|
||||
|
||||
// Under some circumstances, we may attempt to create a new Python
|
||||
// object for a variable that already has a Python object. The most common
|
||||
|
Reference in New Issue
Block a user