Files
pytorch/c10/core/impl/PyInterpreter.cpp
Laith Sakka 189a054cfb Remove guard_size_oblivious from default contiguity python check, and add aten.sym_is_contiguous. [attempt2] (#160869)
[relanding again after fixing internal build]
Summary:
This might cause some new DDEs on call sites that do not use is_contiguous_or_false() or sym_is_contiguous()
but want to find those call sites to handle this properly by calling  is_contiguous_or_false() and not is_contiguous() explitly when appropriate.
I had to fix one issue after removing the implicit size oblivious reasoning. here is context

we defined in this https://github.com/pytorch/pytorch/pull/157472 sym_is_contiguous to be the function computing contiguity for dynamic shapes in c++. It returns a symbolic expression that represents contiguity and guaranteed not to throw a DDE.

when people call is_contiguous we do sym_is_contiguous().guard_bool()
when people call is_contiguous_or_false we do sym_is_contiguous().guard_or_false()

one issue not handled well was this path
```
c10::SymBool TensorImpl::sym_is_contiguous_custom(
    at::MemoryFormat memory_format) const {
  if (C10_UNLIKELY(matches_python_custom(SizesStridesPolicy::CustomStrides))) {
    return pyobj_slot_.load_pyobj_interpreter()->is_contiguous(
        this, memory_format);
  }

  return sym_is_contiguous_default(memory_format);
}
```
namely if we call sym_is_contiguous_custom but we have matches_python_custom(SizesStridesPolicy::CustomStrides) return true , then we used to call is_contiguous(this, memory_format);

This used to go through the load_pyobj_interpreter and end up calling the python is_contiguous call which used implicit size oblivious reasoning.
once we removed that implicit size oblivious reasoning, the right thing we want is to call
return pyobj_slot_.load_pyobj_interpreter()->sym_is_contiguous(this, memory_format);
otherwise we would get DDE even if the caller is doing sym_is_contiguous.

so I had to define it for pyinterpreter, and then I had to override it for nested tensors.

Approved by: https://github.com/ezyang

Test Plan:
contbuild & OSS CI, see e444cd24d4

Rollback Plan:

Differential Revision: D80435179

Pull Request resolved: https://github.com/pytorch/pytorch/pull/160869
Approved by: https://github.com/ezyang
2025-09-08 22:59:13 +00:00

153 lines
4.7 KiB
C++

#include <c10/core/SymIntArrayRef.h>
#include <c10/core/TensorImpl.h>
#include <c10/core/impl/PyInterpreter.h>
C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter")
namespace c10::impl {
struct NoopPyInterpreterVTable final : public PyInterpreterVTable {
std::string name() const override {
return "<unloaded interpreter>";
}
void incref(PyObject* pyobj) const override {} // do nothing
void decref(PyObject* pyobj, bool has_pyobj_slot) const override {
} // do nothing
#define PANIC(m) \
TORCH_INTERNAL_ASSERT( \
0, \
"attempted to call " #m \
" on a Tensor with nontrivial PyObject after corresponding interpreter died")
c10::intrusive_ptr<TensorImpl> detach(const TensorImpl* self) const override {
PANIC(detach);
}
void dispatch(const c10::OperatorHandle& op, torch::jit::Stack* stack)
const override {
PANIC(dispatch);
}
void reportErrorCallback(PyObject* callback, DispatchKey key) const override {
PANIC(reportErrorCallback);
}
void python_op_registration_trampoline(
const c10::OperatorHandle& op,
c10::DispatchKey,
c10::DispatchKeySet keyset,
torch::jit::Stack* stack,
bool with_keyset,
bool with_op) const override {
PANIC(python_op_registration_trampoline);
}
void throw_abstract_impl_not_imported_error(
std::string opname,
const char* pymodule,
const char* context) const override {
PANIC(throw_abstract_impl_not_imported_error);
}
void python_dispatcher(
const c10::OperatorHandle& op,
c10::DispatchKeySet,
torch::jit::Stack* stack) const override {
PANIC(python_dispatcher);
}
bool is_contiguous(const TensorImpl* self, at::MemoryFormat) const override {
PANIC(is_contiguous);
}
c10::SymBool sym_is_contiguous(const TensorImpl* self, at::MemoryFormat)
const override {
PANIC(sym_is_contiguous);
}
bool is_strides_like(const TensorImpl* self, at::MemoryFormat)
const override {
PANIC(is_strides_like);
}
bool is_non_overlapping_and_dense(const TensorImpl* self) const override {
PANIC(is_non_overlapping_and_dense);
}
c10::Device device(const TensorImpl* self) const override {
PANIC(device);
}
int64_t dim(const TensorImpl* self) const override {
PANIC(dim);
}
c10::IntArrayRef strides(const TensorImpl* self) const override {
PANIC(strides);
}
c10::IntArrayRef sizes(const TensorImpl* self) const override {
PANIC(sizes);
}
c10::SymIntArrayRef sym_sizes(const TensorImpl* self) const override {
PANIC(sym_sizes);
}
c10::Layout layout(const TensorImpl* self) const override {
PANIC(layout);
}
int64_t numel(const TensorImpl* self) const override {
PANIC(numel);
}
c10::SymInt sym_numel(const TensorImpl* self) const override {
PANIC(sym_numel);
}
c10::SymIntArrayRef sym_strides(const TensorImpl* self) const override {
PANIC(sym_strides);
}
c10::SymInt sym_storage_offset(const TensorImpl* self) const override {
PANIC(sym_storage_offset);
}
// Just swallow the event, don't do anything
void trace_gpu_event_creation(c10::DeviceType device_type, uintptr_t event)
const override {}
void trace_gpu_event_deletion(c10::DeviceType device_type, uintptr_t event)
const override {}
void trace_gpu_event_record(
c10::DeviceType device_type,
uintptr_t event,
uintptr_t stream) const override {}
void trace_gpu_event_wait(
c10::DeviceType device_type,
uintptr_t event,
uintptr_t stream) const override {}
void trace_gpu_memory_allocation(c10::DeviceType device_type, uintptr_t ptr)
const override {}
void trace_gpu_memory_deallocation(c10::DeviceType device_type, uintptr_t ptr)
const override {}
void trace_gpu_stream_creation(c10::DeviceType device_type, uintptr_t stream)
const override {}
void trace_gpu_device_synchronization(
c10::DeviceType device_type) const override {}
void trace_gpu_stream_synchronization(
c10::DeviceType device_type,
uintptr_t stream) const override {}
void trace_gpu_event_synchronization(
c10::DeviceType device_type,
uintptr_t event) const override {}
void reset_backward_hooks(const TensorImpl* self) const override {
PANIC(reset_backward_hooks);
}
};
// Construct this in Global scope instead of within `disarm`
// where it will be only initialized first time `disarm` is called.
// This increases the likelihood `noop_vtable` lives longer than
// any object that refers to it.
// If `noop_vtable` goes out of scope first, other objects will have dangling
// reference to it.
static NoopPyInterpreterVTable noop_vtable;
void PyInterpreter::disarm() noexcept {
vtable_ = &noop_vtable;
}
} // namespace c10::impl
C10_DIAGNOSTIC_POP()