mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Based on the [conversation](https://github.com/pytorch/pytorch/issues/121791), we plan to drop the "highest, high, medium" to represent fp32 internal computation data types . Instead, we will directly use the algorithm to represent it. ### Design Choice: Directly use algorithms name like "TF32", "BF16". #### Pros - The names are more informative. 'tf32' is more informative than a simple "high". - Easier to extend new algorithm like `tf32x3` #### Cons - "HIGHEST, HIGH, MEDIUM" indicated the relative precision between different algorithms. However, we can have more documents to discuss them. ### We provide a layered structure for backends/operators. ('f32' is short for 'fp32_precision')  ### We provide 3 fp32 compute precision can be set: - **"ieee"**: Not allowed to use any other internal computation data types . - **"tf32"**: Allowed to use tf32 as internal computation data types. - **"bf16"**: Allowed to use bf16 as internal computation data types. - **"none"**: Precision's are not set. Can be override by its father node. ### Overriding Precision Settings Child node can be override by its father node if it is set to default. For current default settings: ``` backend = generic, op = all, precision setting = none backend = cuda, op = all, precision setting = none backend = cuda, op = conv, precision setting = tf32 backend = cuda, op = rnn, precision setting = tf32 backend = cuda, op = matmul, precision setting = none backend = matmul, op = all, precision setting = none backend = matmul, op = conv, precision setting = none backend = matmul, op = rnn, precision setting = none backend = matmul, op = matmul, precision setting = none ``` - If the user set `torch.backends.mkldnn.fp32_precision="bf16"`, his child nodes `torch.backends.mkldnn.matmul.fp32_precision` / `torch.backends.mkldnn.conv.fp32_precision` / `torch.backends.mkldnn.rnn.fp32_precision` will also be override to "bf16". - If the user set `torch.backends.fp32_precision="bf16"`, `torch.backends.mkldnn.fp32_precision` and his child nodes will also we override to "bf16". ### Backward Compatible Since new API allow user to have more fine-grained control. There will be some conflict. For example, previous `torch.backends.cudnn.allow_tf32` are not enough to represent the status for `torch.backends.cudnn.rnn.fp32_precision="ieee"` and `torch.backends.cudnn.conv.fp32_precision="tf32"`. Therefore, our goal for backward compatible is - If the user only uses previous APIs, it will work as previous expectations. - If the user use **new** API to change the status to an **un-representable** status for old API, and try to access the status by **old** API. We will raise Runtime Error and point the document for user. ### Test Plan ``` python test/test_cuda.py -k test_fp32_precision_with_tf32 python test/test_cuda.py -k test_fp32_precision_with_float32_matmul_precision python test/test_cuda.py -k test_invalid_status_for_legacy_api python test/test_mkldnn.py -k test_mlkdnn_get_set python test/test_mkldnn.py -k test_generic_precision python test/test_mkldnn.py -k test_invalid python test/test_mkldnn.py -k test_default_use_parent ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/125888 Approved by: https://github.com/jgong5, https://github.com/albanD Co-authored-by: Jiang, Yanbing <yanbing.jiang@intel.com>
6385 lines
211 KiB
C++
6385 lines
211 KiB
C++
#include <ATen/PythonTorchFunctionTLS.h>
|
|
#include <ATen/autocast_mode.h>
|
|
#include <c10/core/SafePyObject.h>
|
|
#include <c10/core/impl/PyInterpreter.h>
|
|
#define PY_SSIZE_T_CLEAN
|
|
#include <ATen/EmptyTensor.h>
|
|
#include <ATen/SparseCsrTensorUtils.h>
|
|
#include <c10/util/flat_hash_map.h>
|
|
#include <torch/csrc/autograd/grad_mode.h>
|
|
#include <torch/csrc/autograd/utils/wrap_outputs.h>
|
|
#include <torch/csrc/dynamo/guards.h>
|
|
#include <torch/csrc/inductor/inductor_ops.h>
|
|
#include <torch/csrc/utils/disable_torch_function.h>
|
|
#include <torch/csrc/utils/python_arg_parser.h>
|
|
#include <torch/csrc/utils/python_compat.h>
|
|
#include <torch/csrc/utils/python_numbers.h>
|
|
#include <torch/csrc/utils/python_symnode.h>
|
|
#include <torch/csrc/utils/pythoncapi_compat.h>
|
|
#include <torch/extension.h>
|
|
|
|
#include <torch/csrc/dynamo/debug_macros.h>
|
|
|
|
#include <nlohmann/json.hpp>
|
|
|
|
#ifdef USE_CUDA
|
|
#include <ATen/cuda/EmptyTensor.h>
|
|
#endif
|
|
|
|
#ifdef USE_XPU
|
|
#include <ATen/xpu/EmptyTensor.h>
|
|
#endif
|
|
|
|
#include <chrono>
|
|
#include <sstream>
|
|
#include <tuple>
|
|
#include <utility>
|
|
|
|
// Certain CPython data structures are defined in `.c` files in earlier Python
|
|
// versions, e.g., for TupleIteratorGetItemAccessor, we need a fast way to
|
|
// retrieve the underlying tuple and access the item. Before Python 3.12
|
|
// version, the data structure is in tupleobject.c file -
|
|
// https://github.com/python/cpython/blob/9afc6d102d16080535325f645849cd84eb04d57d/Objects/tupleobject.c#L1058-L1062
|
|
//
|
|
// To handle the older python versions, we manually copy the struct here and
|
|
// manually cast it to this new struct. For newer versions, the struct is
|
|
// included in the header file.
|
|
#if IS_PYTHON_3_12_PLUS
|
|
|
|
#define Py_BUILD_CORE
|
|
#include <internal/pycore_range.h> // _PyRangeIterObject
|
|
#include <internal/pycore_tuple.h> // _PyTupleIterObject
|
|
#undef Py_BUILD_CORE
|
|
|
|
#else
|
|
|
|
// Manually create _PyTupleIterObject struct
|
|
typedef struct {
|
|
PyObject_HEAD
|
|
Py_ssize_t it_index;
|
|
PyTupleObject* it_seq; /* Set to NULL when iterator is exhausted */
|
|
} _PyTupleIterObject;
|
|
|
|
// Copied from CPython, and given a unified name for different Python versions.
|
|
// https://github.com/python/cpython/blob/7f71003b222ad398713514c2b55d34dc05dba6bc/Objects/rangeobject.c#L765-L771
|
|
typedef struct {
|
|
PyObject_HEAD
|
|
// NOTE for Python 3.12+, `index` is removed, and `start` is updated in place
|
|
// instead, upon each `next(...)` call. See
|
|
// https://github.com/python/cpython/pull/27986
|
|
long index;
|
|
long start;
|
|
long step;
|
|
long len;
|
|
} _PyRangeIterObject;
|
|
|
|
#endif // IS_PYTHON_3_12_PLUS
|
|
|
|
namespace torch::dynamo {
|
|
|
|
// Macro to skip addition of duplicate guards like EQUALS_MATCH
|
|
#define SKIP_IF_GUARD_ALREADY_PRESENT(name) \
|
|
if (self.is_leaf_guard_present(name)) { \
|
|
return; \
|
|
} \
|
|
self.insert_leaf_guard(name);
|
|
|
|
TensorCheck::TensorCheck(
|
|
const LocalState& state,
|
|
PyTypeObject* pt,
|
|
const at::Tensor& v,
|
|
c10::DispatchKeySet dispatch_key_set,
|
|
std::vector<std::optional<c10::SymInt>> dynamic_dims_sizes,
|
|
std::vector<std::optional<c10::SymInt>> dynamic_dims_strides)
|
|
: pytype(pt),
|
|
dispatch_key_(state.apply(dispatch_key_set).raw_repr()),
|
|
dtype_(v.dtype().toScalarType()),
|
|
device_index_(v.device().index()),
|
|
requires_grad_(v.requires_grad()),
|
|
sizes_(std::move(dynamic_dims_sizes)),
|
|
strides_(std::move(dynamic_dims_strides)),
|
|
dim_(static_cast<int64_t>(sizes_.size())) {
|
|
// TODO(voz): In cases where sizes_ and strides_ are fully dynamic, should
|
|
// we just treat this as optional?
|
|
}
|
|
|
|
TensorCheck::TensorCheck(
|
|
const LocalState& state,
|
|
PyTypeObject* pt,
|
|
c10::DispatchKeySet dispatch_key_set,
|
|
at::ScalarType dtype,
|
|
at::DeviceIndex device_index,
|
|
bool requires_grad,
|
|
std::vector<std::optional<c10::SymInt>> dynamic_dims_sizes,
|
|
std::vector<std::optional<c10::SymInt>> dynamic_dims_strides)
|
|
: pytype(pt),
|
|
dispatch_key_(state.apply(dispatch_key_set).raw_repr()),
|
|
dtype_(dtype),
|
|
device_index_(device_index),
|
|
requires_grad_(requires_grad),
|
|
sizes_(std::move(dynamic_dims_sizes)),
|
|
strides_(std::move(dynamic_dims_strides)),
|
|
dim_(static_cast<int64_t>(sizes_.size())) {}
|
|
|
|
// See note in guards.py [Note - On Export Tensor Guards]
|
|
// Logic parallel to here must be maintained in python
|
|
bool TensorCheck::check(const LocalState& state, const at::Tensor& v) {
|
|
// In terms of a sparse_csr tensor, it does not support strides information
|
|
c10::SymIntArrayRef sym_strides(std::vector<SymInt>(v.ndimension(), -1));
|
|
bool does_not_support_stride = v.layout() == c10::kSparseCsr ||
|
|
v.layout() == c10::kSparseCsc || v.layout() == c10::kSparseBsc ||
|
|
v.layout() == c10::kSparseBsr;
|
|
if (!does_not_support_stride) {
|
|
sym_strides = v.sym_strides();
|
|
}
|
|
|
|
return check(
|
|
state,
|
|
v.key_set(),
|
|
v.dtype().toScalarType(),
|
|
v.device(),
|
|
v.sym_sizes(),
|
|
sym_strides,
|
|
v.requires_grad());
|
|
}
|
|
|
|
bool TensorCheck::check(
|
|
const LocalState& state,
|
|
const c10::DispatchKeySet& dispatch_key_set,
|
|
const at::ScalarType& dtype,
|
|
const c10::Device& device,
|
|
const c10::SymIntArrayRef& sym_sizes,
|
|
const c10::SymIntArrayRef& sym_strides,
|
|
const bool& requires_grad) {
|
|
if (dispatch_key_ != state.apply(dispatch_key_set).raw_repr() ||
|
|
dtype_ != dtype || device_index_ != device.index() ||
|
|
requires_grad_ != requires_grad) {
|
|
return false;
|
|
}
|
|
|
|
auto ndim = sym_sizes.size();
|
|
if (ndim != static_cast<size_t>(dim_)) {
|
|
return false;
|
|
}
|
|
|
|
const auto& sizes = sym_sizes;
|
|
const auto& strides = sym_strides;
|
|
for (auto i : c10::irange(ndim)) {
|
|
auto known_size = sizes_[i];
|
|
auto known_stride = strides_[i];
|
|
if (known_size.has_value()) {
|
|
if (known_size.value() != sizes[i]) {
|
|
return false;
|
|
}
|
|
}
|
|
if (known_stride.has_value()) {
|
|
if (known_stride.value() != strides[i]) {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
std::string TensorCheck::check_verbose(
|
|
const LocalState& state,
|
|
const at::Tensor& v,
|
|
const std::string& tensor_name) {
|
|
std::stringstream fail_reason;
|
|
fail_reason << "tensor '" << tensor_name << "' ";
|
|
if (dispatch_key_ != state.apply(v.key_set()).raw_repr()) {
|
|
// return fmt::format("tensor dispatch key mismatch. expected {}, actual
|
|
// {}", dispatch_key_, state.apply(v.key_set()).raw_repr());
|
|
fail_reason << "dispatch key set mismatch. expected "
|
|
<< c10::DispatchKeySet(c10::DispatchKeySet::RAW, dispatch_key_)
|
|
<< ", actual " << state.apply(v.key_set());
|
|
return fail_reason.str();
|
|
} else if (dtype_ != v.dtype().toScalarType()) {
|
|
// return fmt::format("tensor dtype mismatch. expected {}, actual {}",
|
|
// dtype_, v.dtype().toScalarType());
|
|
fail_reason << "dtype mismatch. expected " << dtype_ << ", actual "
|
|
<< v.dtype().toScalarType();
|
|
return fail_reason.str();
|
|
} else if (device_index_ != v.device().index()) {
|
|
fail_reason << "Tensor device index mismatch. Expected device index to be "
|
|
<< device_index_ << ", actual " << v.device().index();
|
|
return fail_reason.str();
|
|
} else if (requires_grad_ != v.requires_grad()) {
|
|
// return fmt::format("tensor requires_grad mismatch. expected {}",
|
|
// requires_grad_);
|
|
fail_reason << "requires_grad mismatch. expected requires_grad="
|
|
<< requires_grad_;
|
|
return fail_reason.str();
|
|
}
|
|
auto ndim = v.ndimension();
|
|
if (ndim != dim_) {
|
|
// return fmt::format("tensor rank mismatch. expected {}, actual {}",
|
|
// sizes_.size(), ndim);
|
|
fail_reason << "rank mismatch. expected " << sizes_.size() << ", actual "
|
|
<< ndim;
|
|
return fail_reason.str();
|
|
}
|
|
const auto& sizes = v.sym_sizes();
|
|
for (auto i : c10::irange(ndim)) {
|
|
auto known_size = sizes_[i];
|
|
if (known_size.has_value() && (known_size.value() != sizes[i])) {
|
|
fail_reason << "size mismatch at index " << i << ". expected "
|
|
<< known_size.value() << ", actual " << sizes[i];
|
|
return fail_reason.str();
|
|
}
|
|
}
|
|
const bool supports_stride =
|
|
!v.is_sparse() && !at::sparse_csr::is_sparse_compressed(v);
|
|
if (supports_stride) {
|
|
const auto& strides = v.sym_strides();
|
|
for (auto i : c10::irange(ndim)) {
|
|
auto known_stride = strides_[i];
|
|
if (known_stride.has_value() && known_stride.value() != strides[i]) {
|
|
fail_reason << "stride mismatch at index " << i << ". expected "
|
|
<< known_stride.value() << ", actual " << strides[i];
|
|
return fail_reason.str();
|
|
}
|
|
}
|
|
}
|
|
return "";
|
|
}
|
|
|
|
namespace {
|
|
|
|
typedef std::vector<TensorCheck> ChecksList;
|
|
|
|
typedef struct {
|
|
PyObject_HEAD
|
|
ChecksList* checks;
|
|
} TensorGuards;
|
|
|
|
static void TensorGuards_dealloc(TensorGuards* self) {
|
|
if (self->checks != nullptr) {
|
|
delete self->checks;
|
|
self->checks = nullptr;
|
|
}
|
|
Py_TYPE(self)->tp_free((PyObject*)self);
|
|
}
|
|
|
|
static PyObject* TensorGuards_new(
|
|
PyTypeObject* type,
|
|
PyObject* args,
|
|
PyObject* kwds) {
|
|
TensorGuards* self = (TensorGuards*)type->tp_alloc(type, 0);
|
|
if (self != nullptr) {
|
|
self->checks = new ChecksList();
|
|
}
|
|
return (PyObject*)self;
|
|
}
|
|
|
|
static std::vector<std::optional<c10::SymInt>> wrapIntegersInOptional(
|
|
const c10::SymIntArrayRef& intArray) {
|
|
std::vector<std::optional<c10::SymInt>> optVec(intArray.size());
|
|
std::transform(
|
|
intArray.begin(),
|
|
intArray.end(),
|
|
optVec.begin(),
|
|
[](const c10::SymInt& value) { return value; });
|
|
return optVec;
|
|
}
|
|
|
|
static std::vector<std::optional<c10::SymInt>> pyListToVecOptInt(
|
|
PyObject* pyList) {
|
|
std::vector<std::optional<c10::SymInt>> vec;
|
|
Py_ssize_t size = PyList_Size(pyList);
|
|
for (Py_ssize_t i = 0; i < size; i++) {
|
|
PyObject* item = PyList_GetItem(pyList, i);
|
|
auto handle = py::handle(item);
|
|
if (item == Py_None) {
|
|
vec.emplace_back(std::nullopt);
|
|
} else if (torch::is_symint(handle)) {
|
|
vec.emplace_back(py::cast<c10::SymInt>(handle));
|
|
} else {
|
|
int64_t value = PyLong_AsLongLong(item);
|
|
if (value == -1 && PyErr_Occurred()) {
|
|
PyErr_SetString(
|
|
PyExc_TypeError,
|
|
"Size or stride list item is not a valid integer.");
|
|
TORCH_CHECK(false, "Size or stride list item is not a valid integer.");
|
|
}
|
|
vec.emplace_back(c10::SymInt(value));
|
|
}
|
|
}
|
|
return vec;
|
|
}
|
|
|
|
static std::vector<std::vector<std::optional<c10::SymInt>>> get_dynamic_dims(
|
|
PyObject* dynamic_dims_py) {
|
|
std::vector<std::vector<std::optional<c10::SymInt>>> per_tensor_dynamic_dims;
|
|
if (dynamic_dims_py != Py_None) {
|
|
Py_ssize_t size = PyList_Size(dynamic_dims_py);
|
|
for (Py_ssize_t i = 0; i < size; i++) {
|
|
PyObject* py_list = PyList_GetItem(dynamic_dims_py, i);
|
|
std::vector<std::optional<c10::SymInt>> vec = pyListToVecOptInt(py_list);
|
|
per_tensor_dynamic_dims.push_back(std::move(vec));
|
|
}
|
|
}
|
|
return per_tensor_dynamic_dims;
|
|
}
|
|
|
|
static int TensorGuards_init(
|
|
TensorGuards* self,
|
|
PyObject* args,
|
|
PyObject* kwds) {
|
|
if (!PyTuple_CheckExact(args)) {
|
|
PyErr_SetString(PyExc_TypeError, "expected tuple()");
|
|
return -1;
|
|
}
|
|
// Top level structure is List[List[Union[int, None]]]
|
|
PyObject* dynamic_dims_sizes_py =
|
|
PyDict_GetItemString(kwds, "dynamic_dims_sizes");
|
|
if (dynamic_dims_sizes_py == nullptr) {
|
|
PyErr_SetString(PyExc_TypeError, "missing dynamic_dims_sizes=...");
|
|
return -1;
|
|
}
|
|
PyObject* dynamic_dims_strides_py =
|
|
PyDict_GetItemString(kwds, "dynamic_dims_strides");
|
|
if (dynamic_dims_strides_py == nullptr) {
|
|
PyErr_SetString(PyExc_TypeError, "missing dynamic_dims_strides=...");
|
|
return -1;
|
|
}
|
|
|
|
// dynamic_dims_strides/sizes_py is None when dynamic_shapes=False - this is
|
|
// an optimization to avoid invoking .size()/.stride() in python needlessly
|
|
std::vector<std::vector<std::optional<c10::SymInt>>>
|
|
per_tensor_dynamic_dims_sizes = get_dynamic_dims(dynamic_dims_sizes_py);
|
|
std::vector<std::vector<std::optional<c10::SymInt>>>
|
|
per_tensor_dynamic_dims_strides =
|
|
get_dynamic_dims(dynamic_dims_strides_py);
|
|
|
|
auto& checks = *self->checks;
|
|
auto len = PyTuple_GET_SIZE(args);
|
|
checks.reserve(len);
|
|
LocalState state;
|
|
|
|
for (auto i : c10::irange(len)) {
|
|
PyObject* item = PyTuple_GET_ITEM(args, i);
|
|
if (!THPVariable_CheckExact(item) && !THPVariable_Check(item)) {
|
|
PyErr_SetString(PyExc_TypeError, "expected Tensor()");
|
|
return -1;
|
|
}
|
|
auto tensor = THPVariable_Unpack(item);
|
|
std::vector<std::optional<c10::SymInt>> tensor_dims_size =
|
|
per_tensor_dynamic_dims_sizes.empty()
|
|
? wrapIntegersInOptional(tensor.sym_sizes())
|
|
: per_tensor_dynamic_dims_sizes[i];
|
|
std::vector<std::optional<c10::SymInt>> tensor_dims_stride =
|
|
per_tensor_dynamic_dims_strides.empty()
|
|
? wrapIntegersInOptional(tensor.sym_strides())
|
|
: per_tensor_dynamic_dims_strides[i];
|
|
|
|
checks.emplace_back(
|
|
state,
|
|
Py_TYPE(item),
|
|
std::move(tensor),
|
|
tensor.key_set(),
|
|
std::move(tensor_dims_size),
|
|
std::move(tensor_dims_stride));
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
PyObject* TensorGuards_check(
|
|
TensorGuards* self,
|
|
PyObject* args,
|
|
PyObject* kwargs) {
|
|
if (!PyTuple_CheckExact(args)) {
|
|
PyErr_SetString(PyExc_TypeError, "expected tuple()");
|
|
return nullptr;
|
|
}
|
|
auto& checks = *self->checks;
|
|
auto len = PyTuple_GET_SIZE(args);
|
|
|
|
// kwargs is just ignored here
|
|
|
|
if (static_cast<decltype(len)>(checks.size()) != len) {
|
|
PyErr_SetString(PyExc_TypeError, "wrong length");
|
|
return nullptr;
|
|
}
|
|
|
|
LocalState state;
|
|
// Note - all the tensors that make it to guards must be unique. Dynamo
|
|
// builder handles guarding for positive aliases (X is Y). However, we do not
|
|
// create guards for negative alias (X is not Y) as that is an N^2
|
|
// relationship. Instead, we rely on the uniqueness upstream to verify, at
|
|
// check_fn time (this function).
|
|
ska::flat_hash_map<PyObject*, std::nullptr_t> unique_tensors;
|
|
for (auto i : c10::irange(len)) {
|
|
PyObject* item = PyTuple_GET_ITEM(args, i);
|
|
|
|
if (Py_TYPE(item) != checks[i].pytype) {
|
|
Py_RETURN_FALSE;
|
|
}
|
|
auto insertion = unique_tensors.insert({item, nullptr});
|
|
if (!insertion.second) {
|
|
// Violates uniqueness
|
|
Py_RETURN_FALSE;
|
|
}
|
|
if (!checks[i].check(state, THPVariable_Unpack(item))) {
|
|
Py_RETURN_FALSE;
|
|
}
|
|
}
|
|
|
|
Py_RETURN_TRUE;
|
|
}
|
|
|
|
PyObject* TensorGuards_check_verbose(
|
|
TensorGuards* self,
|
|
PyObject* args,
|
|
PyObject* kwargs) {
|
|
if (!PyTuple_CheckExact(args)) {
|
|
PyErr_SetString(PyExc_TypeError, "expected tuple()");
|
|
return nullptr;
|
|
}
|
|
auto& checks = *self->checks;
|
|
auto len = PyTuple_GET_SIZE(args);
|
|
|
|
if (static_cast<decltype(len)>(checks.size()) != len) {
|
|
PyErr_SetString(PyExc_TypeError, "wrong length");
|
|
return nullptr;
|
|
}
|
|
|
|
PyObject* tensor_check_names_py =
|
|
PyDict_GetItemString(kwargs, "tensor_check_names");
|
|
if (tensor_check_names_py == nullptr) {
|
|
PyErr_SetString(PyExc_TypeError, "missing tensor_check_names kwarg");
|
|
return nullptr;
|
|
}
|
|
|
|
if (!PyList_Check(tensor_check_names_py)) {
|
|
PyErr_SetString(PyExc_TypeError, "tensor_check_names kwarg must be a list");
|
|
return nullptr;
|
|
}
|
|
|
|
auto names_size = PyList_Size(tensor_check_names_py);
|
|
if (names_size != static_cast<decltype(names_size)>(checks.size())) {
|
|
PyErr_SetString(
|
|
PyExc_TypeError,
|
|
"tensor_check_names should be the same size as # tensors");
|
|
return nullptr;
|
|
}
|
|
|
|
std::vector<std::string> tensor_check_names;
|
|
tensor_check_names.reserve(names_size);
|
|
for (auto i : c10::irange(names_size)) {
|
|
PyObject* value = PyList_GetItem(tensor_check_names_py, i);
|
|
if (!PyUnicode_Check(value)) {
|
|
PyErr_SetString(
|
|
PyExc_TypeError, "tensor_check_names must only contain strings");
|
|
return nullptr;
|
|
}
|
|
tensor_check_names.emplace_back(PyUnicode_AsUTF8(value));
|
|
}
|
|
|
|
LocalState state;
|
|
ska::flat_hash_map<PyObject*, std::nullptr_t> unique_tensors;
|
|
for (auto i : c10::irange(len)) {
|
|
PyObject* item = PyTuple_GET_ITEM(args, i);
|
|
if (Py_TYPE(item) != checks[i].pytype) {
|
|
std::stringstream fail_reason;
|
|
PyObject* type_str = PyObject_Str(PyObject_Type(item));
|
|
fail_reason << "expected type of '" << tensor_check_names[i]
|
|
<< "' to be a tensor type, ";
|
|
if (!type_str) {
|
|
fail_reason << "but found a different type";
|
|
} else {
|
|
fail_reason << "' but found " << PyUnicode_AsUTF8(type_str);
|
|
}
|
|
return Py_BuildValue("s", fail_reason.str().c_str());
|
|
}
|
|
|
|
auto insertion = unique_tensors.insert({item, nullptr});
|
|
if (!insertion.second) {
|
|
std::stringstream fail_reason;
|
|
fail_reason << "Duplicate tensor found where not expected! ";
|
|
fail_reason << tensor_check_names[i]
|
|
<< "should not alias to anything, but is aliased";
|
|
return Py_BuildValue("s", fail_reason.str().c_str());
|
|
}
|
|
std::string fail_reason = checks[i].check_verbose(
|
|
state, THPVariable_Unpack(item), tensor_check_names[i]);
|
|
if (!fail_reason.empty()) {
|
|
return Py_BuildValue("s", fail_reason.c_str());
|
|
}
|
|
}
|
|
|
|
Py_RETURN_TRUE;
|
|
}
|
|
|
|
// NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
|
static PyMethodDef TensorGuards_methods[] = {
|
|
{"check",
|
|
(PyCFunction)(void*)TensorGuards_check,
|
|
METH_VARARGS | METH_KEYWORDS,
|
|
""},
|
|
{"check_verbose",
|
|
(PyCFunction)(void*)TensorGuards_check_verbose,
|
|
METH_VARARGS | METH_KEYWORDS,
|
|
"verbose fail reasons for failed checks"},
|
|
{nullptr} /* Sentinel */
|
|
};
|
|
|
|
static PyTypeObject TensorGuardsType = { PyVarObject_HEAD_INIT(nullptr, 0)
|
|
};
|
|
|
|
struct AutocastState {
|
|
static constexpr auto& DEVICES = at::autocast::_AUTOCAST_SUPPORTED_DEVICES;
|
|
std::array<bool, DEVICES.size()> enabled{};
|
|
std::array<at::ScalarType, DEVICES.size()> dtype{};
|
|
bool cache_enabled;
|
|
|
|
AutocastState() {
|
|
for (size_t i = 0; i < DEVICES.size(); i++) {
|
|
enabled[i] = at::autocast::is_autocast_enabled(DEVICES[i]);
|
|
dtype[i] = at::autocast::get_autocast_dtype(DEVICES[i]);
|
|
}
|
|
cache_enabled = at::autocast::is_autocast_cache_enabled();
|
|
}
|
|
|
|
bool operator==(const AutocastState& o) const {
|
|
for (size_t i = 0; i < DEVICES.size(); i++) {
|
|
// If disabled audocast, autocast_dtype comparison not occur
|
|
if (enabled[i] == false && o.enabled[i] == false) {
|
|
continue;
|
|
}
|
|
if (enabled[i] != o.enabled[i] || dtype[i] != o.dtype[i]) {
|
|
return false;
|
|
}
|
|
}
|
|
if (cache_enabled != o.cache_enabled) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
template <typename T>
|
|
friend void to_json(T& json_j, const AutocastState& json_t) {
|
|
json_j["enabled"] = json_t.enabled;
|
|
json_j["dtype"] = json_t.dtype;
|
|
json_j["cached_enabled"] = json_t.cache_enabled;
|
|
}
|
|
|
|
template <typename T>
|
|
friend void from_json(const T& json_j, AutocastState& json_t) {
|
|
json_t.enabled = json_j.at("enabled");
|
|
json_t.dtype = json_j.at("dtype");
|
|
json_t.cache_enabled = json_j.at("cached_enabled");
|
|
}
|
|
};
|
|
|
|
// TODO (janimesh) - Remove the PyObject_HEAD part when C++ guard manager is
|
|
// merged.
|
|
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
|
|
struct GlobalStateGuard {
|
|
PyObject_HEAD
|
|
|
|
inline void init() {
|
|
auto& ctx = at::globalContext();
|
|
_grad_mode = at::GradMode::is_enabled();
|
|
_autocast_state = AutocastState();
|
|
// The below two flags disambiguate
|
|
// if torch function disabled state is
|
|
// 1) enabled, 2) all disabled, 3) subclasses disabled
|
|
// we guard on the stack separately
|
|
_torch_function = torch::torch_function_enabled();
|
|
_torch_function_all_disabled = at::impl::torch_function_all_disabled();
|
|
_deterministic_algorithms = ctx.deterministicAlgorithms();
|
|
_deterministic_algorithms_warn_only = ctx.deterministicAlgorithmsWarnOnly();
|
|
_allow_tf32 = ctx.float32Precision("cuda", "matmul") == "tf32";
|
|
_allow_fp16_reduce = ctx.allowFP16ReductionCuBLAS();
|
|
_allow_bf16_reduce = ctx.allowBF16ReductionCuBLAS();
|
|
_num_threads = at::get_num_threads();
|
|
_default_dtype = at::get_default_dtype();
|
|
}
|
|
|
|
inline bool check() const {
|
|
auto& ctx = at::globalContext();
|
|
return (_grad_mode == at::GradMode::is_enabled() &&
|
|
_autocast_state == AutocastState() &&
|
|
_torch_function == torch::torch_function_enabled() &&
|
|
_torch_function_all_disabled ==
|
|
at::impl::torch_function_all_disabled() &&
|
|
_deterministic_algorithms == ctx.deterministicAlgorithms() &&
|
|
_deterministic_algorithms_warn_only ==
|
|
ctx.deterministicAlgorithmsWarnOnly() &&
|
|
_allow_tf32 == (ctx.float32Precision("cuda", "matmul") == "tf32") &&
|
|
_allow_fp16_reduce == ctx.allowFP16ReductionCuBLAS() &&
|
|
_allow_bf16_reduce == ctx.allowBF16ReductionCuBLAS() &&
|
|
_num_threads == at::get_num_threads()) &&
|
|
_default_dtype == at::get_default_dtype();
|
|
}
|
|
|
|
inline std::string reason() const {
|
|
std::ostringstream os;
|
|
auto& ctx = at::globalContext();
|
|
if (_grad_mode != at::GradMode::is_enabled())
|
|
os << "grad_mode ";
|
|
if (!(_autocast_state == AutocastState()))
|
|
os << "autocast ";
|
|
if (_torch_function != torch::torch_function_enabled())
|
|
os << "torch_function ";
|
|
if (_deterministic_algorithms != ctx.deterministicAlgorithms())
|
|
os << "deterministic_algorithms ";
|
|
if (_deterministic_algorithms_warn_only !=
|
|
ctx.deterministicAlgorithmsWarnOnly())
|
|
os << "deterministic_algorithms_warn_only ";
|
|
if (_allow_tf32 != (ctx.float32Precision("cuda", "matmul") == "tf32"))
|
|
os << "allow_tf32 ";
|
|
if (_allow_fp16_reduce != ctx.allowFP16ReductionCuBLAS())
|
|
os << "allow_fp16_reduce ";
|
|
if (_allow_bf16_reduce != ctx.allowBF16ReductionCuBLAS())
|
|
os << "allow_bf16_reduce ";
|
|
if (_num_threads != at::get_num_threads())
|
|
os << "num_threads ";
|
|
if (_default_dtype != at::get_default_dtype())
|
|
os << "default_dtype ";
|
|
return os.str();
|
|
}
|
|
|
|
template <typename T>
|
|
friend void to_json(T& json_j, const GlobalStateGuard& json_t) {
|
|
json_j["grad_mode"] = json_t._grad_mode;
|
|
json_j["autocast_state"] = json_t._autocast_state;
|
|
json_j["torch_function"] = json_t._torch_function;
|
|
json_j["torch_function_all_disabled"] = json_t._torch_function_all_disabled;
|
|
json_j["deterministic_algorithms"] = json_t._deterministic_algorithms;
|
|
json_j["deterministic_algorithms_warn_only"] =
|
|
json_t._deterministic_algorithms_warn_only;
|
|
json_j["allow_tf32"] = json_t._allow_tf32;
|
|
json_j["allow_fp16_reduce"] = json_t._allow_fp16_reduce;
|
|
json_j["allow_bf16_reduce"] = json_t._allow_bf16_reduce;
|
|
json_j["num_threads"] = json_t._num_threads;
|
|
json_j["default_dtype"] = json_t._default_dtype.toScalarType();
|
|
}
|
|
|
|
template <typename T>
|
|
friend void from_json(const T& json_j, GlobalStateGuard& json_t) {
|
|
json_t._grad_mode = json_j.at("grad_mode");
|
|
json_t._autocast_state = json_j.at("autocast_state");
|
|
json_t._torch_function = json_j.at("torch_function");
|
|
json_t._torch_function_all_disabled =
|
|
json_j.at("torch_function_all_disabled");
|
|
json_t._deterministic_algorithms = json_j.at("deterministic_algorithms");
|
|
json_t._deterministic_algorithms_warn_only =
|
|
json_j.at("deterministic_algorithms_warn_only");
|
|
json_t._allow_tf32 = json_j.at("allow_tf32");
|
|
json_t._allow_fp16_reduce = json_j.at("allow_fp16_reduce");
|
|
json_t._allow_bf16_reduce = json_j.at("allow_bf16_reduce");
|
|
json_t._num_threads = json_j.at("num_threads");
|
|
json_t._default_dtype =
|
|
caffe2::TypeMeta::fromScalarType(json_j.at("default_dtype"));
|
|
}
|
|
|
|
bool _grad_mode;
|
|
AutocastState _autocast_state;
|
|
bool _torch_function;
|
|
bool _torch_function_all_disabled;
|
|
bool _deterministic_algorithms;
|
|
bool _deterministic_algorithms_warn_only;
|
|
bool _allow_tf32;
|
|
bool _allow_fp16_reduce;
|
|
bool _allow_bf16_reduce;
|
|
int _num_threads;
|
|
caffe2::TypeMeta _default_dtype;
|
|
// TODO(jansel): we should guard on more state as inductor starts using it
|
|
};
|
|
|
|
int GlobalStateGuard_init(
|
|
GlobalStateGuard* self,
|
|
PyObject* args,
|
|
PyObject* kwargs) {
|
|
self->init();
|
|
return 0;
|
|
}
|
|
|
|
PyObject* GlobalStateGuard_check(
|
|
GlobalStateGuard* self,
|
|
PyObject* args,
|
|
PyObject* kwargs) {
|
|
if (self->check()) {
|
|
Py_RETURN_TRUE;
|
|
} else {
|
|
Py_RETURN_FALSE;
|
|
}
|
|
}
|
|
|
|
PyObject* GlobalStateGuard_reason(
|
|
GlobalStateGuard* self,
|
|
PyObject* args,
|
|
PyObject* kwargs) {
|
|
return PyUnicode_FromString(self->reason().c_str());
|
|
}
|
|
|
|
PyObject* GlobalStateGuard_dump(
|
|
GlobalStateGuard* self,
|
|
PyObject* args,
|
|
PyObject* kwargs) {
|
|
return PyUnicode_FromString(nlohmann::json(*self).dump().c_str());
|
|
}
|
|
|
|
PyObject* GlobalStateGuard_load(
|
|
GlobalStateGuard* self,
|
|
PyObject* args,
|
|
PyObject* kwargs) {
|
|
char* json;
|
|
if (!PyArg_ParseTuple(args, "s", &json)) {
|
|
throw std::runtime_error("Cannot parse as json string.");
|
|
}
|
|
nlohmann::json::parse(json).get_to(*self);
|
|
Py_RETURN_NONE;
|
|
}
|
|
|
|
// NOLINTNEXTLINE(*array*)
|
|
static PyMethodDef GlobalStateGuard_methods[] = {
|
|
{"check",
|
|
(PyCFunction)(void*)GlobalStateGuard_check,
|
|
METH_NOARGS,
|
|
"Return true if global state was the same as at creation time"},
|
|
{"reason",
|
|
(PyCFunction)(void*)GlobalStateGuard_reason,
|
|
METH_NOARGS,
|
|
"Return string reason for guard check failing"},
|
|
{"dump",
|
|
(PyCFunction)(void*)GlobalStateGuard_dump,
|
|
METH_NOARGS,
|
|
"Return serialized json format"},
|
|
{"load",
|
|
(PyCFunction)(void*)GlobalStateGuard_load,
|
|
METH_VARARGS,
|
|
"Parse serialized json format"},
|
|
{nullptr}};
|
|
static PyTypeObject GlobalStateGuardType = { PyVarObject_HEAD_INIT(nullptr, 0)
|
|
};
|
|
|
|
static PyObject* check_type_id(PyObject* dummy, PyObject* args) {
|
|
// faster `lambda obj, expected: id(type(obj)) == expected`
|
|
PyObject* obj = nullptr;
|
|
unsigned long long expected = 0;
|
|
if (!PyArg_ParseTuple(args, "OK", &obj, &expected)) {
|
|
return nullptr;
|
|
}
|
|
// NOLINTNEXTLINE(performance-no-int-to-ptr)
|
|
if (Py_TYPE(obj) == (void*)expected) {
|
|
Py_RETURN_TRUE;
|
|
} else {
|
|
Py_RETURN_FALSE;
|
|
}
|
|
}
|
|
|
|
static PyObject* check_obj_id(PyObject* dummy, PyObject* args) {
|
|
// faster `lambda obj, expected: id(obj) == expected`
|
|
PyObject* obj = nullptr;
|
|
unsigned long long expected = 0;
|
|
if (!PyArg_ParseTuple(args, "OK", &obj, &expected)) {
|
|
return nullptr;
|
|
}
|
|
// NOLINTNEXTLINE(performance-no-int-to-ptr)
|
|
if (obj == (void*)expected) {
|
|
Py_RETURN_TRUE;
|
|
} else {
|
|
Py_RETURN_FALSE;
|
|
}
|
|
}
|
|
|
|
#if IS_PYTHON_3_12_PLUS
|
|
|
|
static std::unordered_map<PyObject*, uint64_t> dict_version_map;
|
|
static int dict_version_watcher_id;
|
|
static uint64_t global_dict_version_id = 1;
|
|
static int dict_version_watch_callback(
|
|
PyDict_WatchEvent event,
|
|
PyObject* dict,
|
|
PyObject* key,
|
|
PyObject* new_value) noexcept {
|
|
if (event == PyDict_EVENT_DEALLOCATED) {
|
|
dict_version_map.erase(dict);
|
|
} else if (event != PyDict_EVENT_CLONED) {
|
|
dict_version_map[dict] = global_dict_version_id++;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#endif
|
|
|
|
static uint64_t get_dict_version_unchecked(PyObject* dict) {
|
|
#if IS_PYTHON_3_12_PLUS
|
|
|
|
if (PyDict_Watch(dict_version_watcher_id, dict)) {
|
|
throw std::runtime_error("failed to add version watcher to dict!");
|
|
}
|
|
if (!dict_version_map.count(dict)) {
|
|
dict_version_map[dict] = global_dict_version_id++;
|
|
}
|
|
return dict_version_map[dict];
|
|
|
|
#else
|
|
|
|
return ((PyDictObject*)dict)->ma_version_tag;
|
|
|
|
#endif
|
|
}
|
|
|
|
static PyObject* dict_version(PyObject* dummy, PyObject* args) {
|
|
// Retrieves the version of a dictionary.
|
|
PyObject* obj = nullptr;
|
|
if (!PyArg_ParseTuple(args, "O", &obj)) {
|
|
return nullptr;
|
|
}
|
|
if (!PyDict_Check(obj)) {
|
|
return nullptr;
|
|
}
|
|
return THPUtils_packUInt64(get_dict_version_unchecked(obj));
|
|
}
|
|
|
|
static PyObject* assert_size_stride(PyObject* dummy, PyObject* args) {
|
|
/*
|
|
Assert that a given tensor has a given size/stride, but ignore strides
|
|
of size==1 dimensions. Implemented in C++ as this is on the hot path.
|
|
*/
|
|
PyObject* item = nullptr;
|
|
PyObject* size = nullptr;
|
|
PyObject* stride = nullptr;
|
|
const char* op_name = nullptr;
|
|
|
|
if (!PyArg_ParseTuple(args, "OOO|s", &item, &size, &stride, &op_name)) {
|
|
return nullptr;
|
|
}
|
|
if (!THPVariable_CheckExact(item) && !THPVariable_Check(item)) {
|
|
std::stringstream msg;
|
|
msg << "expected Tensor()";
|
|
if (op_name) {
|
|
msg << " for op: " << op_name;
|
|
}
|
|
PyErr_SetString(PyExc_TypeError, msg.str().c_str());
|
|
return nullptr;
|
|
}
|
|
if (!PyTuple_CheckExact(size) || !PyTuple_CheckExact(stride)) {
|
|
std::stringstream msg;
|
|
msg << "expected tuple()";
|
|
if (op_name) {
|
|
msg << " for op: " << op_name;
|
|
}
|
|
PyErr_SetString(PyExc_TypeError, msg.str().c_str());
|
|
return nullptr;
|
|
}
|
|
at::Tensor tensor = THPVariable_Unpack(item);
|
|
int64_t ndim = tensor.ndimension();
|
|
if (PyTuple_GET_SIZE(size) != ndim || PyTuple_GET_SIZE(stride) != ndim) {
|
|
std::stringstream msg;
|
|
msg << "wrong number of dimensions" << ndim;
|
|
if (op_name) {
|
|
msg << " for op: " << op_name;
|
|
}
|
|
PyErr_SetString(PyExc_AssertionError, msg.str().c_str());
|
|
return nullptr;
|
|
}
|
|
|
|
// We may add the size/stride assert at compile time due to unbacked symint,
|
|
// but at runtime, the tensor can be empty.
|
|
if (tensor.numel() == 0) {
|
|
Py_RETURN_TRUE;
|
|
}
|
|
|
|
std::stringstream msg;
|
|
int num_errors = 0;
|
|
for (auto i : c10::irange(ndim)) {
|
|
int64_t want_size = THPUtils_unpackLong(PyTuple_GET_ITEM(size, i));
|
|
int64_t want_stride = THPUtils_unpackLong(PyTuple_GET_ITEM(stride, i));
|
|
int64_t actual_size = tensor.size(i);
|
|
int64_t actual_stride = tensor.stride(i);
|
|
if (want_size != actual_size ||
|
|
// ignore stride differences when size is 1
|
|
(want_stride != actual_stride && actual_size > 1)) {
|
|
if (num_errors > 0)
|
|
msg << "; ";
|
|
msg << "expected size " << actual_size << "==" << want_size << ", stride "
|
|
<< actual_stride << "==" << want_stride << " at dim=" << i;
|
|
num_errors++;
|
|
}
|
|
}
|
|
|
|
if (num_errors) {
|
|
if (op_name) {
|
|
msg << "\nError in op: " << op_name;
|
|
}
|
|
msg << "\nThis error most often comes from a incorrect fake (aka meta) kernel for a custom op.";
|
|
msg << "\nUse torch.library.opcheck to test your custom op.";
|
|
msg << "\nSee https://pytorch.org/docs/stable/library.html#torch.library.opcheck";
|
|
PyErr_SetString(PyExc_AssertionError, msg.str().c_str());
|
|
return nullptr;
|
|
}
|
|
|
|
Py_RETURN_TRUE;
|
|
}
|
|
|
|
static PyObject* assert_alignment(PyObject* dummy, PyObject* args) {
|
|
/*
|
|
* Asserts that a given tensor meets certain alignment.
|
|
* This C++ version of torch._inductor.utils.tensor_is_aligned
|
|
*/
|
|
PyObject* item = nullptr;
|
|
unsigned long alignment = 0;
|
|
const char* op_name = nullptr;
|
|
|
|
if (!PyArg_ParseTuple(args, "Ok|s", &item, &alignment, &op_name)) {
|
|
return nullptr;
|
|
}
|
|
if (!THPVariable_CheckExact(item) && !THPVariable_Check(item)) {
|
|
std::stringstream msg;
|
|
msg << "expected Tensor()";
|
|
if (op_name) {
|
|
msg << " for op: " << op_name;
|
|
}
|
|
PyErr_SetString(PyExc_TypeError, msg.str().c_str());
|
|
return nullptr;
|
|
}
|
|
if (alignment == 0) {
|
|
std::stringstream msg;
|
|
msg << "alignment cannot be 0";
|
|
if (op_name) {
|
|
msg << " in op: " << op_name;
|
|
}
|
|
PyErr_SetString(PyExc_AssertionError, msg.str().c_str());
|
|
return nullptr;
|
|
}
|
|
|
|
at::Tensor tensor = THPVariable_Unpack(item);
|
|
|
|
int64_t storage_offset = tensor.storage_offset();
|
|
size_t itemsize = tensor.itemsize();
|
|
if (storage_offset * itemsize % alignment != 0) {
|
|
std::stringstream msg;
|
|
if (op_name) {
|
|
msg << "\nError in op: " << op_name;
|
|
}
|
|
msg << "\nExpect the tensor to be " << alignment
|
|
<< " bytes aligned. Fail due to storage_offset=" << storage_offset
|
|
<< " itemsize=" << itemsize;
|
|
PyErr_SetString(PyExc_AssertionError, msg.str().c_str());
|
|
return nullptr;
|
|
}
|
|
|
|
Py_RETURN_TRUE;
|
|
}
|
|
|
|
template <typename T>
|
|
static void unwrap_size_tuple(PyObject* obj, T& output) {
|
|
TORCH_CHECK(PyTuple_CheckExact(obj));
|
|
size_t len = PyTuple_GET_SIZE(obj);
|
|
output.reserve(len);
|
|
for (size_t i = 0; i < len; ++i) {
|
|
auto result = PyLong_AsSsize_t(PyTuple_GET_ITEM(obj, i));
|
|
TORCH_CHECK(result >= 0);
|
|
output.emplace_back(result);
|
|
}
|
|
}
|
|
|
|
template <typename T>
|
|
static void _parse_empty_strided_args(
|
|
PyObject* args,
|
|
T& sizes,
|
|
T& strides,
|
|
at::ScalarType& dtype) {
|
|
TORCH_CHECK(PyTuple_CheckExact(args));
|
|
TORCH_CHECK(PyTuple_GET_SIZE(args) == 3);
|
|
// note PyTuple_GET_ITEM returns a borrowed ref, so no need for refcounts
|
|
unwrap_size_tuple(PyTuple_GET_ITEM(args, 0), sizes);
|
|
unwrap_size_tuple(PyTuple_GET_ITEM(args, 1), strides);
|
|
PyObject* py_dtype = PyTuple_GET_ITEM(args, 2);
|
|
TORCH_CHECK(THPDtype_Check(py_dtype));
|
|
dtype = reinterpret_cast<THPDtype*>(py_dtype)->scalar_type;
|
|
}
|
|
|
|
static PyObject* _empty_strided_device(
|
|
PyObject* dummy,
|
|
PyObject* args,
|
|
c10::DeviceType device_type) {
|
|
HANDLE_TH_ERRORS;
|
|
at::SmallVector<int64_t, 8> sizes;
|
|
at::SmallVector<int64_t, 8> strides;
|
|
at::ScalarType dtype{at::ScalarType::Undefined};
|
|
_parse_empty_strided_args(args, sizes, strides, dtype);
|
|
if (device_type == c10::DeviceType::CPU) {
|
|
return THPVariable_Wrap(
|
|
at::detail::empty_strided_cpu(sizes, strides, dtype));
|
|
}
|
|
#ifdef USE_CUDA
|
|
else if (device_type == c10::DeviceType::CUDA) {
|
|
return THPVariable_Wrap(at::detail::empty_strided_cuda(
|
|
sizes, strides, dtype, c10::DeviceType::CUDA));
|
|
}
|
|
#endif
|
|
#ifdef USE_XPU
|
|
else if (device_type == c10::DeviceType::XPU) {
|
|
return THPVariable_Wrap(at::detail::empty_strided_xpu(
|
|
sizes, strides, dtype, c10::DeviceType::XPU));
|
|
}
|
|
#endif
|
|
else {
|
|
TORCH_CHECK(
|
|
false, "PyTorch compiled without support for the specified device.");
|
|
}
|
|
|
|
END_HANDLE_TH_ERRORS;
|
|
}
|
|
|
|
static PyObject* _empty_strided_cpu(PyObject* dummy, PyObject* args) {
|
|
// at::empty_strided is surprising slow. This is a lower-overhead
|
|
// version that saves ~2us on every allocation.
|
|
return _empty_strided_device(dummy, args, c10::DeviceType::CPU);
|
|
}
|
|
|
|
static PyObject* _empty_strided_cuda(PyObject* dummy, PyObject* args) {
|
|
// at::empty_strided is surprising slow. This is lower-overhead.
|
|
return _empty_strided_device(dummy, args, c10::DeviceType::CUDA);
|
|
}
|
|
|
|
static PyObject* _empty_strided_xpu(PyObject* dummy, PyObject* args) {
|
|
// at::empty_strided is surprising slow. This is lower-overhead.
|
|
return _empty_strided_device(dummy, args, c10::DeviceType::XPU);
|
|
}
|
|
|
|
static PyObject* _reinterpret_tensor(PyObject* dummy, PyObject* args) {
|
|
HANDLE_TH_ERRORS;
|
|
static PythonArgParser parser(
|
|
{"_reinterpret_tensor(Tensor base, IntArrayRef sizes, IntArrayRef strides, int64_t offset_increment=0)"},
|
|
/*traceable=*/true);
|
|
|
|
ParsedArgs<4> parsed_args;
|
|
auto r = parser.parse(args, /*kwargs=*/nullptr, parsed_args);
|
|
|
|
Tensor self = r.tensor(0);
|
|
auto sizes = r.intlist(1);
|
|
auto strides = r.intlist(2);
|
|
auto offset_increment = r.toInt64(3);
|
|
|
|
auto res = torch::inductor::_reinterpret_tensor(
|
|
self, sizes, strides, offset_increment);
|
|
return torch::autograd::utils::wrap(res);
|
|
|
|
END_HANDLE_TH_ERRORS;
|
|
}
|
|
|
|
// NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
|
static PyMethodDef _methods[] = {
|
|
{"check_type_id", check_type_id, METH_VARARGS, nullptr},
|
|
{"check_obj_id", check_obj_id, METH_VARARGS, nullptr},
|
|
{"assert_size_stride", assert_size_stride, METH_VARARGS, nullptr},
|
|
{"assert_alignment", assert_alignment, METH_VARARGS, nullptr},
|
|
{"dict_version", dict_version, METH_VARARGS, nullptr},
|
|
{"_empty_strided_cpu", _empty_strided_cpu, METH_VARARGS, nullptr},
|
|
{"_empty_strided_cuda", _empty_strided_cuda, METH_VARARGS, nullptr},
|
|
{"_empty_strided_xpu", _empty_strided_xpu, METH_VARARGS, nullptr},
|
|
{"_reinterpret_tensor", _reinterpret_tensor, METH_VARARGS, nullptr},
|
|
{nullptr, nullptr, 0, nullptr}};
|
|
|
|
static struct PyModuleDef _module = {
|
|
PyModuleDef_HEAD_INIT,
|
|
"torch._C._dynamo.guards",
|
|
"Module containing checks on tensors",
|
|
-1,
|
|
_methods};
|
|
|
|
std::string get_exception_message() {
|
|
PyObject *ptype = nullptr, *pvalue = nullptr, *ptraceback = nullptr;
|
|
PyErr_Fetch(&ptype, &pvalue, &ptraceback);
|
|
|
|
PyObject* exc_message_pyobj = PyObject_Str(pvalue);
|
|
std::string exc_message = PyUnicode_AsUTF8(exc_message_pyobj);
|
|
|
|
Py_DECREF(exc_message_pyobj);
|
|
Py_XDECREF(ptype);
|
|
Py_XDECREF(pvalue);
|
|
Py_XDECREF(ptraceback);
|
|
return exc_message;
|
|
}
|
|
|
|
bool is_immutable_object(py::handle example_value) {
|
|
py::object config_module = py::module_::import("torch._dynamo.config");
|
|
|
|
bool is_tensor_immutable =
|
|
config_module.attr("skip_tensor_guards_with_matching_dict_tags")
|
|
.cast<bool>();
|
|
|
|
if (PyTuple_Check(example_value.ptr())) {
|
|
// Check that each element is immutable
|
|
for (Py_ssize_t i = 0; i < PyTuple_Size(example_value.ptr()); ++i) {
|
|
if (!is_immutable_object(
|
|
py::handle(PyTuple_GetItem(example_value.ptr(), i)))) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
return PyLong_Check(example_value.ptr()) ||
|
|
PyFloat_Check(example_value.ptr()) || PyBool_Check(example_value.ptr()) ||
|
|
PyUnicode_Check(example_value.ptr()) ||
|
|
(is_tensor_immutable && THPVariable_Check(example_value.ptr()));
|
|
}
|
|
|
|
bool is_parameter(py::handle tensor) {
|
|
py::object parameter = py::module::import("torch.nn").attr("Parameter");
|
|
return py::isinstance(tensor, parameter);
|
|
}
|
|
|
|
/**
|
|
* Dispatches metadata functions to the methods that return integer values,
|
|
* i.e. used whenever static shapes are being used.
|
|
*
|
|
* These are used by the tensor storage overlapping check. Even though their
|
|
* symbolic counterpart does work whenever static shapes are being used, the
|
|
* introduced overhead might significantly worsen the performance.
|
|
*/
|
|
struct StaticMeta {
|
|
static int64_t numel(const Tensor& t) {
|
|
return t.numel();
|
|
}
|
|
|
|
static int64_t storage_offset(const Tensor& t) {
|
|
return t.storage_offset();
|
|
}
|
|
|
|
static int64_t size(const Tensor& t, int64_t i) {
|
|
return t.size(i);
|
|
}
|
|
|
|
static int64_t stride(const Tensor& t, int64_t i) {
|
|
return t.stride(i);
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Dispatches metadata functions to the methods that return c10::SymInt
|
|
* values, i.e. used whenever dynamic shapes are being used.
|
|
*/
|
|
struct DynamicMeta {
|
|
static SymInt numel(const Tensor& t) {
|
|
return t.sym_numel();
|
|
}
|
|
|
|
static SymInt storage_offset(const Tensor& t) {
|
|
return t.sym_storage_offset();
|
|
}
|
|
|
|
static SymInt size(const Tensor& t, int64_t i) {
|
|
return t.sym_size(i);
|
|
}
|
|
|
|
static SymInt stride(const Tensor& t, int64_t i) {
|
|
return t.sym_stride(i);
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Assumption: x and y are known to share a storage, and we are trying to
|
|
* determine if their memory is actually completely disjoint, based on
|
|
* sizes/strides/storage_offset
|
|
*
|
|
* "Meta" should be one of the "*Meta" classes above. They dictate which
|
|
* version of the metadata functions we should be using (symbolic vs.
|
|
* concrete). Even though they have the same apparent behavior, the symbolic
|
|
* version introduces a bit of overhead. Such an overhead might end up
|
|
* becoming relevant if it's run enough times.
|
|
*/
|
|
template <class Meta>
|
|
bool tensors_definitely_do_not_overlap(const Tensor& x, const Tensor& y) {
|
|
if (x.is_same(y)) {
|
|
return false;
|
|
}
|
|
if (Meta::numel(x) == 0 || Meta::numel(y) == 0) {
|
|
return true;
|
|
}
|
|
|
|
// Make x always on the left
|
|
if (Meta::storage_offset(x) > Meta::storage_offset(y)) {
|
|
return tensors_definitely_do_not_overlap<Meta>(y, x);
|
|
}
|
|
|
|
// Short-circuit in the "obvious" overlapping case: both tensors are
|
|
// contiguous
|
|
if (x.is_contiguous() && y.is_contiguous()) {
|
|
if (Meta::storage_offset(x) + Meta::numel(x) > Meta::storage_offset(y)) {
|
|
// definitely overlap
|
|
return false;
|
|
} else {
|
|
// definitely no overlap
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// Short-circuit: if last memory address of x is < start of y, then not
|
|
// overlapping.
|
|
auto x_last = Meta::storage_offset(x);
|
|
for (int64_t i = 0; i < x.dim(); i++) {
|
|
x_last += (Meta::size(x, i) - 1) * Meta::stride(x, i);
|
|
}
|
|
if (x_last < Meta::storage_offset(y)) {
|
|
return true;
|
|
}
|
|
|
|
if (x.dim() == 2 && y.dim() == 2 && Meta::stride(x, 1) == 1 &&
|
|
Meta::stride(y, 1) == 1) {
|
|
// This cases is needed for the shampoo optimizer.
|
|
// All tensors are 2d (non-contiguous), have the same outer stride, and have
|
|
// an inner stride of 1 (so rows are contiguous)
|
|
if (Meta::stride(x, 0) == Meta::stride(y, 0)) {
|
|
auto offset_delta = Meta::storage_offset(y) - Meta::storage_offset(x);
|
|
if (offset_delta < Meta::size(x, 1)) {
|
|
// definitely overlaps (row 0 of y overlaps with row 0 of x)
|
|
// Example:
|
|
// base = torch.arange(32).reshape(4, 8)
|
|
// x = base.narrow(1, 0, 4)
|
|
// x: size=(4, 4), stride=(8, 1), offset=0
|
|
// y = base.narrow(1, 3, 4)
|
|
// y: size=(4, 4), stride=(8, 1), offset=3
|
|
return false;
|
|
}
|
|
auto x_total_elems_covered =
|
|
Meta::stride(x, 0) * (Meta::size(x, 0) - 1) + Meta::size(x, 1);
|
|
if (x_total_elems_covered <= offset_delta) {
|
|
// definitely does not overlap (last byte of x is before start of y)
|
|
// Example:
|
|
// x: size=(4, 4), stride=(8, 1), offset=0 (last byte is 27)
|
|
// y: size=(4, 4), stride=(8, 1), offset=28 (start byte is 28)
|
|
return true;
|
|
}
|
|
// At this point, we want to check if the 0th row of y
|
|
// overlaps with **some** row of x.
|
|
// We can check this by shifting y backward by the shared stride,
|
|
// repeatedly, until the first row of y is before the first row of x. Then
|
|
// we can check if these rows overlap. We can accomplish this by modding
|
|
// our offset by the stride.
|
|
auto offset_delta_mod = offset_delta % Meta::stride(x, 0);
|
|
// Example:
|
|
// 0 1 2 3
|
|
// 9 10 11 12
|
|
// 18 19 20 21
|
|
// 27 28 29 30
|
|
// x: size=(4, 4), stride=(9, 1), offset=0
|
|
// y: size=(4, 4), stride=(9, 1), offset=22 (this would not overlap)
|
|
// y: size=(4, 4), stride=(9, 1), offset=23 (this would not overlap)
|
|
// y: size=(4, 4), stride=(9, 1), offset=24 (this would overlap)
|
|
// y: size=(4, 4), stride=(9, 1), offset=25 (this would overlap)
|
|
// If the interval [modded_offset, modded_offset + x_size] falls entirely
|
|
// without
|
|
if (offset_delta_mod + Meta::size(y, 1) <= Meta::stride(x, 0)) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Computes the indices of the tensors that might overlap.
|
|
*
|
|
* Checks which of the given tensors have overlapping storages with ANY of
|
|
* the other tensors.
|
|
*
|
|
* So, for example, if tensor 1 overlaps with tensor 2, and tensor 3 with
|
|
* tensor 4, all of them will be in the output of this function. Even if
|
|
* tensor 1 and 4 don't overlap.
|
|
*/
|
|
template <class Meta>
|
|
std::unordered_set<int64_t> compute_overlapping_tensors(
|
|
const std::vector<Tensor>& tensors) {
|
|
std::unordered_set<int64_t> aliased_tensor_indices;
|
|
for (int64_t i = 0; i < static_cast<int64_t>(tensors.size()); i++) {
|
|
const auto& tensor_i = tensors[i];
|
|
for (int64_t j = 0; j < i; j++) {
|
|
if (!tensors_definitely_do_not_overlap<Meta>(tensor_i, tensors[j])) {
|
|
aliased_tensor_indices.insert(i);
|
|
aliased_tensor_indices.insert(j);
|
|
}
|
|
}
|
|
}
|
|
return aliased_tensor_indices;
|
|
}
|
|
|
|
/**
|
|
* Checks whether the storage overlapping relation is preserved.
|
|
*
|
|
* At this point, `non_overlapping` represents the tensors that should not
|
|
* have overlapping storages. Similarly, `overlapping` represents the tensors
|
|
* that should have overlapping storage in some way (or that we can't be sure).
|
|
*
|
|
* This function checks whether the assumption above is true or not.
|
|
*/
|
|
bool check_overlapping(
|
|
const std::vector<Tensor>& overlapping,
|
|
const std::vector<Tensor>& non_overlapping) {
|
|
// Merge the tensor lists.
|
|
std::vector<Tensor> tensors;
|
|
tensors.reserve(overlapping.size() + non_overlapping.size());
|
|
tensors.insert(tensors.end(), overlapping.begin(), overlapping.end());
|
|
tensors.insert(tensors.end(), non_overlapping.begin(), non_overlapping.end());
|
|
// Check what is the current storage overlapping relation.
|
|
auto indices = compute_overlapping_tensors<StaticMeta>(tensors);
|
|
// Check that the set of indices of tensors that might overlap is equal to
|
|
// the indices of the first `overlapping.size()` tensors. That's because
|
|
// `overlapping` tensors were in the beginning of `tensors` list.
|
|
auto range = c10::irange(overlapping.size());
|
|
return indices.size() == overlapping.size() &&
|
|
std::all_of(range.begin(), range.end(), [&](int64_t i) {
|
|
return indices.count(i) == 1;
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Class responsible for collecting and checking the storage overlap relations.
|
|
*
|
|
* The way GuardManager is implemented, when STORAGE_OVERLAPPING guard check is
|
|
* run on a given tensor, we don't know if it is an overlapping or
|
|
* non-overlapping tensor. There's no order to which GuardManager runs the guard
|
|
* check so that we can split it in 2.
|
|
*
|
|
* Since we are only interested in the classification of each tensor (not
|
|
* necessarily the order), we can just issue 2 STORAGE_OVERLAPPING guards
|
|
* representing the overlapping tensors and the non-overlapping ones.
|
|
*
|
|
* In order to collect the information from both guards (so that we can call
|
|
* `check_overlapping` function correctly), we need this class which stores
|
|
* both kinds of tensors, and knows when it has collected each one of them.
|
|
*/
|
|
class StorageOverlapChecker {
|
|
public:
|
|
StorageOverlapChecker(
|
|
size_t expected_overlapping,
|
|
size_t expected_non_overlapping)
|
|
: _expected_overlapping(expected_overlapping),
|
|
_expected_non_overlapping(expected_non_overlapping) {}
|
|
|
|
/**
|
|
* Adds a tensor to the corresponding storage, based on whether it should be
|
|
* an `overlapping` tensor or not.
|
|
*/
|
|
void add(PyObject* obj, bool overlapping) {
|
|
// Just check that `obj` is actually a tensor, so that we can keep it alive
|
|
// by incrementing its ref-count.
|
|
TORCH_CHECK(THPVariable_CheckExact(obj) || THPVariable_Check(obj));
|
|
Py_INCREF(obj);
|
|
_get(overlapping).push_back(obj);
|
|
}
|
|
|
|
void reset(bool overlapping) {
|
|
auto& vec = _get(overlapping);
|
|
for (auto item : vec) {
|
|
Py_DECREF(item);
|
|
}
|
|
vec.clear();
|
|
}
|
|
|
|
/**
|
|
* Maybe checks the storage overlapping relation.
|
|
*
|
|
* Before actually calling `check_overlapping` function, this function makes
|
|
* sure it has collected all expected tensors.
|
|
*/
|
|
bool maybe_check() {
|
|
TORCH_CHECK(_expected_overlapping >= _overlapping.size());
|
|
TORCH_CHECK(_expected_non_overlapping >= _non_overlapping.size());
|
|
if (_expected_overlapping == _overlapping.size() &&
|
|
_expected_non_overlapping == _non_overlapping.size()) {
|
|
// Transform each list of PyObject* into an actual list of Tensors.
|
|
auto overlapping_tensors =
|
|
_tensors_from(_overlapping, _expected_overlapping);
|
|
auto non_overlapping_tensors =
|
|
_tensors_from(_non_overlapping, _expected_non_overlapping);
|
|
return check_overlapping(overlapping_tensors, non_overlapping_tensors);
|
|
} else {
|
|
// If we haven't collected them all yet, keep on running.
|
|
return true;
|
|
}
|
|
}
|
|
|
|
private:
|
|
/**
|
|
* Returns a reference to the container that corresponds to the given
|
|
* overlapping relation.
|
|
*/
|
|
std::vector<PyObject*>& _get(bool overlapping) {
|
|
return overlapping ? _overlapping : _non_overlapping;
|
|
}
|
|
|
|
/**
|
|
* Transforms a given list of PyObject* into a list of Tensor.
|
|
*/
|
|
std::vector<Tensor> _tensors_from(
|
|
const std::vector<PyObject*>& objects,
|
|
size_t size) {
|
|
std::vector<Tensor> tensors;
|
|
tensors.reserve(size);
|
|
std::transform(
|
|
objects.begin(),
|
|
objects.end(),
|
|
std::back_inserter(tensors),
|
|
[=](PyObject* obj) { return THPVariable_Unpack(obj); });
|
|
return tensors;
|
|
}
|
|
|
|
// Expected number of possibly overlapping tensors.
|
|
size_t _expected_overlapping;
|
|
// Expected number of non-overlapping tensors.
|
|
size_t _expected_non_overlapping;
|
|
// Collected possibly overlapping tensors.
|
|
std::vector<PyObject*> _overlapping;
|
|
// Collected non-overlapping tensors.
|
|
std::vector<PyObject*> _non_overlapping;
|
|
};
|
|
|
|
/**
|
|
* Stores relevant guard debug information, e.g., failure str for a LeafGuard
|
|
* failure. The data structure is also accessible in Python.
|
|
*/
|
|
|
|
class GuardDebugInfo {
|
|
public:
|
|
GuardDebugInfo(
|
|
bool result,
|
|
py::list verbose_code_parts,
|
|
int num_guards_executed)
|
|
: result(result),
|
|
verbose_code_parts(std::move(verbose_code_parts)),
|
|
num_guards_executed(num_guards_executed) {}
|
|
|
|
// This constructor is used when guard succeeds.
|
|
GuardDebugInfo(bool result, int num_guards_executed)
|
|
: result(result), num_guards_executed(num_guards_executed) {}
|
|
|
|
GuardDebugInfo(
|
|
bool result,
|
|
const std::string& failed_reason,
|
|
int num_guards_executed)
|
|
: GuardDebugInfo(result, num_guards_executed) {
|
|
verbose_code_parts.append(failed_reason);
|
|
}
|
|
|
|
std::string to_string() {
|
|
std::stringstream ss;
|
|
ss << "GuardDebugInfo(\n"
|
|
<< "result=" << result << ",\n"
|
|
<< "verbose_code_parts=" << verbose_code_parts << ",\n"
|
|
<< "num_guards_executed=" << num_guards_executed << ")\n";
|
|
return ss.str();
|
|
}
|
|
|
|
// Whether the guard passed or failed.
|
|
bool result;
|
|
|
|
// This is a list of verbose_code_parts for the failed guard. When there are
|
|
// more than one verbose_code_parts, then recompilation reasoning infra on the
|
|
// Python side can iterate over this list and eval each string to pinpoint the
|
|
// exact code part that failed.
|
|
py::list verbose_code_parts;
|
|
|
|
// Total number of executed guards so far. This is helpful in debugging if
|
|
// shuffling is working.
|
|
int num_guards_executed;
|
|
};
|
|
|
|
class GuardManager;
|
|
class RootGuardManager;
|
|
class DictGuardManager;
|
|
|
|
/**
|
|
* Base class for the leaf guard in the GuardManager hierarchy.
|
|
*/
|
|
class LeafGuard {
|
|
public:
|
|
// Most guards do not need root guard manager.
|
|
LeafGuard(py::object verbose_code_parts)
|
|
: _verbose_code_parts(std::move(verbose_code_parts)) {}
|
|
|
|
// Guards like TENSOR_MATCH require root_guard_manager to access local_state
|
|
// shared across all leaf guards.
|
|
LeafGuard(RootGuardManager* root_guard_manager, py::object verbose_code_parts)
|
|
: _root_guard_manager(root_guard_manager),
|
|
_verbose_code_parts(std::move(verbose_code_parts)) {}
|
|
|
|
// check function could be called from python. This is useful for debugging
|
|
// purpose.
|
|
bool check(py::handle value) {
|
|
return check_nopybind(value.ptr());
|
|
}
|
|
|
|
GuardDebugInfo check_verbose(py::handle value) {
|
|
return check_verbose_nopybind(value.ptr());
|
|
}
|
|
|
|
virtual GuardDebugInfo check_verbose_nopybind(
|
|
PyObject* value) { // borrowed ref
|
|
bool result = check_nopybind(value);
|
|
if (!result) {
|
|
return GuardDebugInfo(result, _verbose_code_parts, 0);
|
|
}
|
|
return GuardDebugInfo(true, 0);
|
|
}
|
|
|
|
py::list verbose_code_parts() {
|
|
return _verbose_code_parts;
|
|
}
|
|
|
|
// This is on the hot path and avoids any refcounting code from pybind. This
|
|
// is not exposed to Python and can only be called from C++.
|
|
virtual bool check_nopybind(PyObject* value) = 0;
|
|
virtual bool check_nopybind(FrameLocalsMapping* map) {
|
|
// throw std::runtime_error("fallback to python");
|
|
// Could fallback to running check on the Python dict (lazily constructed)
|
|
return check_nopybind((PyObject*)map->to_dict());
|
|
}
|
|
|
|
virtual ~LeafGuard() = default;
|
|
|
|
protected:
|
|
// RootGuardManager has state that is common across all guards like
|
|
// LocalState.
|
|
RootGuardManager* _root_guard_manager{nullptr};
|
|
|
|
private:
|
|
// This is set while constructing the leaf guard. This is used for identifying
|
|
// the cause of recompilation.
|
|
py::list _verbose_code_parts;
|
|
};
|
|
|
|
/**
|
|
* Represents a leaf guard that accepts the python guard check function. We
|
|
* would like to have most of the guards in C++ (to avoid a Python function
|
|
* call). But, it will take some time to reach that goal. Also, there might be
|
|
* cases where its too tedious to write an equivalent C++ guard.
|
|
*
|
|
* LAMBDA_GUARD allows us to gradually move to C++. We can start from all
|
|
* guards of type PythonLambaGuard and incrementally move expensive guards to
|
|
* C++.
|
|
*/
|
|
class LAMBDA_GUARD : public LeafGuard {
|
|
public:
|
|
LAMBDA_GUARD(py::object guard_check_fn, py::object verbose_code_parts)
|
|
: LeafGuard(std::move(verbose_code_parts)) {
|
|
if (py::isinstance<py::function>(guard_check_fn)) {
|
|
_guard_check_fn = py::cast<py::function>(std::move(guard_check_fn));
|
|
} else {
|
|
throw py::type_error("LAMBDA_GUARD expects (callable, str)");
|
|
}
|
|
}
|
|
|
|
// Runs the lambda function with the current f_locals value.
|
|
bool check_nopybind(PyObject* value) override { // borrowed ref
|
|
PyObject* x = PyObject_CallOneArg(_guard_check_fn.ptr(), value); // new ref
|
|
if (x == nullptr) {
|
|
// An exception is caught in the lambda function.
|
|
PyErr_Clear();
|
|
return false;
|
|
}
|
|
bool result = PyObject_IsTrue(x);
|
|
Py_DECREF(x);
|
|
return result;
|
|
}
|
|
|
|
GuardDebugInfo check_verbose_nopybind(PyObject* value) override {
|
|
PyObject* x = PyObject_CallOneArg(_guard_check_fn.ptr(), value); // new ref
|
|
if (x == nullptr) {
|
|
// An exception is caught in the lambda function.
|
|
std::string exc_message = get_exception_message();
|
|
PyErr_Clear();
|
|
return GuardDebugInfo(false, exc_message, 0);
|
|
}
|
|
bool result = PyObject_IsTrue(x);
|
|
Py_DECREF(x);
|
|
if (result) {
|
|
return GuardDebugInfo(true, 0);
|
|
}
|
|
return GuardDebugInfo(false, verbose_code_parts(), 0);
|
|
}
|
|
|
|
private:
|
|
// The user provided lambda function for check_fn.
|
|
py::function _guard_check_fn;
|
|
};
|
|
|
|
class TYPE_MATCH : public LeafGuard {
|
|
public:
|
|
// type_id = id(type(obj))
|
|
TYPE_MATCH(py::object type_id, py::object verbose_code_parts)
|
|
: LeafGuard(std::move(verbose_code_parts)),
|
|
_expected(py::cast<intptr_t>(std::move(type_id))) {}
|
|
|
|
bool check_nopybind(PyObject* value) override { // borrowed ref
|
|
// NOLINTNEXTLINE(performance-no-int-to-ptr)
|
|
return Py_TYPE(value) == (void*)_expected;
|
|
}
|
|
|
|
private:
|
|
// id of the type of the original object.
|
|
intptr_t _expected;
|
|
};
|
|
|
|
class ID_MATCH : public LeafGuard {
|
|
public:
|
|
// obj_id = id(obj)
|
|
ID_MATCH(py::object obj_id, py::object verbose_code_parts)
|
|
: LeafGuard(std::move(verbose_code_parts)),
|
|
_expected(py::cast<intptr_t>(std::move(obj_id))) {}
|
|
|
|
bool check_nopybind(PyObject* value) override { // borrowed ref
|
|
// NOLINTNEXTLINE(performance-no-int-to-ptr)
|
|
return value == (void*)_expected;
|
|
}
|
|
|
|
private:
|
|
// id of the original object.
|
|
intptr_t _expected;
|
|
};
|
|
|
|
class NONE_MATCH : public LeafGuard {
|
|
public:
|
|
NONE_MATCH(py::object verbose_code_parts)
|
|
: LeafGuard(std::move(verbose_code_parts)) {}
|
|
|
|
bool check_nopybind(PyObject* value) override { // borrowed ref
|
|
return value == Py_None;
|
|
}
|
|
};
|
|
|
|
class TRUE_MATCH : public LeafGuard {
|
|
public:
|
|
TRUE_MATCH(py::object verbose_code_parts)
|
|
: LeafGuard(std::move(verbose_code_parts)) {}
|
|
|
|
bool check_nopybind(PyObject* value) override { // borrowed ref
|
|
return value == Py_True;
|
|
}
|
|
};
|
|
|
|
class FALSE_MATCH : public LeafGuard {
|
|
public:
|
|
FALSE_MATCH(py::object verbose_code_parts)
|
|
: LeafGuard(std::move(verbose_code_parts)) {}
|
|
|
|
bool check_nopybind(PyObject* value) override { // borrowed ref
|
|
return value == Py_False;
|
|
}
|
|
};
|
|
|
|
class EQUALS_MATCH : public LeafGuard {
|
|
public:
|
|
EQUALS_MATCH(py::object value, py::object verbose_code_parts)
|
|
: LeafGuard(std::move(verbose_code_parts)),
|
|
_value(value),
|
|
_value_type(Py_TYPE(value.ptr())) {}
|
|
|
|
bool check_nopybind(PyObject* value) override { // borrowed ref
|
|
// Fast path - pointer equality check. Pointer equality checks are ok
|
|
// because objects guarded with EQUALS_MATCH are immutable.
|
|
if (value != _value.ptr()) {
|
|
// Check type
|
|
if (Py_TYPE(value) != _value_type) {
|
|
return false;
|
|
}
|
|
int result = PyObject_RichCompareBool(value, _value.ptr(), Py_EQ);
|
|
// Check for exception
|
|
if (result == -1) {
|
|
PyErr_Clear();
|
|
return false;
|
|
}
|
|
return result;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
private:
|
|
// value to compare against. This is py::object so that we hold on to the
|
|
// original value and prevent garbage collection. We run EQUALS_MATCH only on
|
|
// selected objects which do not have high memory footprint, so holding on to
|
|
// these objects is ok.
|
|
py::object _value;
|
|
|
|
// Type of the value
|
|
PyTypeObject* _value_type;
|
|
};
|
|
|
|
class RANGE_ITERATOR_MATCH : public LeafGuard {
|
|
public:
|
|
RANGE_ITERATOR_MATCH(
|
|
py::object start,
|
|
py::object stop,
|
|
py::object step,
|
|
py::object type_id,
|
|
py::object verbose_code_parts)
|
|
: LeafGuard(std::move(verbose_code_parts)),
|
|
_type_id(py::cast<intptr_t>(std::move(type_id))) {
|
|
PyObject* start_obj = start.ptr();
|
|
PyObject* stop_obj = stop.ptr();
|
|
PyObject* step_obj = step.ptr();
|
|
_start = THPUtils_unpackLong(start_obj);
|
|
_stop = THPUtils_unpackLong(stop_obj);
|
|
_step = THPUtils_unpackLong(step_obj);
|
|
TORCH_CHECK(
|
|
!PyErr_Occurred(), "values of start/stop/step must fit in a long type");
|
|
}
|
|
|
|
bool check_nopybind(PyObject* value) override { // borrowed ref
|
|
// Do a type match first.
|
|
// NOLINTNEXTLINE(performance-no-int-to-ptr)
|
|
if (Py_TYPE(value) != (void*)_type_id) {
|
|
return false;
|
|
}
|
|
_PyRangeIterObject* iter = (_PyRangeIterObject*)value;
|
|
|
|
#if IS_PYTHON_3_12_PLUS
|
|
long start = iter->start;
|
|
#else
|
|
long start = iter->start + iter->index * iter->step;
|
|
#endif // IS_PYTHON_3_12_PLUS
|
|
|
|
long stop = iter->start + iter->len * iter->step;
|
|
return start == _start && stop == _stop && iter->step == _step;
|
|
}
|
|
|
|
private:
|
|
intptr_t _type_id;
|
|
// Normalized representation of a range iterator.
|
|
long _start;
|
|
long _stop;
|
|
long _step;
|
|
};
|
|
|
|
class TUPLE_ITERATOR_LEN : public LeafGuard {
|
|
public:
|
|
TUPLE_ITERATOR_LEN(
|
|
py::object length,
|
|
py::object type_id,
|
|
py::object verbose_code_parts)
|
|
: LeafGuard(std::move(verbose_code_parts)),
|
|
_length(py::cast<Py_ssize_t>(std::move(length))),
|
|
_type_id(py::cast<intptr_t>(std::move(type_id))) {}
|
|
|
|
bool check_nopybind(PyObject* value) override { // borrowed ref
|
|
// Do a type match first.
|
|
// NOLINTNEXTLINE(performance-no-int-to-ptr)
|
|
if (Py_TYPE(value) != (void*)_type_id) {
|
|
return false;
|
|
}
|
|
_PyTupleIterObject* it = (_PyTupleIterObject*)value;
|
|
Py_ssize_t length = 0;
|
|
if (it->it_seq)
|
|
length = PyTuple_GET_SIZE(it->it_seq) - it->it_index;
|
|
return length == _length;
|
|
}
|
|
|
|
private:
|
|
// Length of the guarded list
|
|
Py_ssize_t _length;
|
|
intptr_t _type_id;
|
|
};
|
|
|
|
class LENGTH_CHECK : public LeafGuard {
|
|
public:
|
|
LENGTH_CHECK(py::object value, py::object verbose_code_parts)
|
|
: LeafGuard(std::move(verbose_code_parts)),
|
|
_length(py::cast<Py_ssize_t>(std::move(value))) {}
|
|
|
|
bool check_nopybind(PyObject* value) override { // borrowed ref
|
|
// PySequence_Length returns -1 if the object is not a sequence. So, we
|
|
// don't have to test for PySequence_Check.
|
|
return PySequence_Length(value) == _length;
|
|
}
|
|
|
|
private:
|
|
// Length of the guarded list
|
|
Py_ssize_t _length;
|
|
};
|
|
|
|
class DICT_LENGTH : public LeafGuard {
|
|
public:
|
|
DICT_LENGTH(py::object value, py::object verbose_code_parts)
|
|
: LeafGuard(std::move(verbose_code_parts)),
|
|
_length(py::cast<Py_ssize_t>(std::move(value))) {}
|
|
|
|
bool check_nopybind(PyObject* value) override { // borrowed ref
|
|
return PyDict_Check(value) && PyDict_Size(value) == _length;
|
|
}
|
|
|
|
private:
|
|
// Length of the guarded dict
|
|
Py_ssize_t _length;
|
|
};
|
|
|
|
class NOT_NONE : public LeafGuard {
|
|
public:
|
|
NOT_NONE(py::object verbose_code_parts)
|
|
: LeafGuard(std::move(verbose_code_parts)) {}
|
|
|
|
bool check_nopybind(PyObject* value) override { // borrowed ref
|
|
return value != Py_None;
|
|
}
|
|
};
|
|
|
|
class MAPPING_KEYS_MATCH : public LeafGuard {
|
|
public:
|
|
MAPPING_KEYS_MATCH(py::object value, py::object verbose_code_parts)
|
|
: LeafGuard(std::move(verbose_code_parts)) {
|
|
// This is ok to stash in the state because we only support
|
|
// MappingProxyType objects with constant keys. So, the mem overhead is
|
|
// negligible.
|
|
_keys = py::list(value.attr("keys")());
|
|
}
|
|
|
|
bool check_nopybind(PyObject* value) override { // borrowed ref
|
|
PyObject* keys = PyMapping_Keys(value); // new ref
|
|
int result = PyObject_RichCompareBool(keys, _keys.ptr(), Py_EQ);
|
|
Py_DECREF(keys);
|
|
return result;
|
|
}
|
|
|
|
private:
|
|
py::object _keys;
|
|
};
|
|
|
|
class DEFAULT_DEVICE : public LeafGuard {
|
|
public:
|
|
DEFAULT_DEVICE(py::object verbose_code_parts)
|
|
: LeafGuard(std::move(verbose_code_parts)) {
|
|
py::handle device_module = py::module::import("torch.utils._device");
|
|
// Save the dict using py::object
|
|
_utils_device_dict = device_module.attr("__dict__");
|
|
_device = _utils_device_dict["CURRENT_DEVICE"];
|
|
}
|
|
|
|
template <typename T>
|
|
bool check_nopybind_template(T* value) { // borrowed ref
|
|
// Create a static interned string. Interned string is faster than creating
|
|
// a new string every time. Even though its a new reference, we don't dec
|
|
// ref it. Interned strings are used for things like variable names and are
|
|
// leaked by design.
|
|
static PyObject* current_device_str =
|
|
PyUnicode_InternFromString("CURRENT_DEVICE");
|
|
PyObject* device = PyDict_GetItem(
|
|
_utils_device_dict.ptr(), current_device_str); // borrowed ref
|
|
if (device != _device.ptr()) {
|
|
int result = PyObject_RichCompareBool(device, _device.ptr(), Py_EQ);
|
|
if (result == -1) {
|
|
PyErr_Clear();
|
|
return false;
|
|
}
|
|
return result;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool check_nopybind(PyObject* value) override {
|
|
return check_nopybind_template(value);
|
|
}
|
|
|
|
bool check_nopybind(FrameLocalsMapping* value) override {
|
|
return check_nopybind_template(value);
|
|
}
|
|
|
|
private:
|
|
// Save the current device and the module dict during the guard construction.
|
|
py::object _utils_device_dict;
|
|
py::object _device;
|
|
};
|
|
|
|
class GLOBAL_STATE : public LeafGuard {
|
|
public:
|
|
GLOBAL_STATE(py::object verbose_code_parts)
|
|
: LeafGuard(std::move(verbose_code_parts)) {
|
|
_guard = std::make_unique<GlobalStateGuard>();
|
|
_guard->init();
|
|
}
|
|
|
|
bool check_nopybind(PyObject* value) override { // borrowed ref
|
|
// Ignore value arg, this is just to satisfy the interface.
|
|
return _guard->check();
|
|
}
|
|
|
|
bool check_nopybind(FrameLocalsMapping* value) override {
|
|
// Ignore value arg, this is just to satisfy the interface.
|
|
return _guard->check();
|
|
}
|
|
|
|
GuardDebugInfo check_verbose_nopybind(PyObject* value) override {
|
|
if (!_guard->check()) {
|
|
return GuardDebugInfo(
|
|
false, "GLOBAL_STATE changed: " + _guard->reason(), 0);
|
|
}
|
|
return GuardDebugInfo(true, 1);
|
|
}
|
|
|
|
private:
|
|
std::unique_ptr<GlobalStateGuard> _guard;
|
|
};
|
|
|
|
// Checks that an attr is absent in the object. We don't need the opposite
|
|
// HASATTR guard because we can just rely on GetAttrGuardAccessor to act as
|
|
// HASATTR guard.
|
|
class NO_HASATTR : public LeafGuard {
|
|
public:
|
|
NO_HASATTR(py::object attr_name, py::object verbose_code_parts)
|
|
: LeafGuard(std::move(verbose_code_parts)),
|
|
_attr_name(std::move(attr_name)) {}
|
|
|
|
bool check_nopybind(PyObject* value) override { // borrowed ref
|
|
return PyObject_HasAttr(value, _attr_name.ptr()) == 0;
|
|
}
|
|
|
|
private:
|
|
py::object _attr_name;
|
|
};
|
|
|
|
// Checks that dict contains or does not contain a key. This happens for
|
|
// PythonSysModulesVariable tracker.
|
|
// TODO(janimesh) - Check if we can use DictGuardManager. The downside could be
|
|
// large number of keys for sys module, so DICT_CONTAINS might still end up
|
|
// being faster.
|
|
class DICT_CONTAINS : public LeafGuard {
|
|
public:
|
|
DICT_CONTAINS(bool contains, py::object key, py::object verbose_code_parts)
|
|
: LeafGuard(std::move(verbose_code_parts)),
|
|
_contains(contains ? 1 : 0),
|
|
_key(std::move(key)) {}
|
|
|
|
bool check_nopybind(PyObject* value) override { // borrowed ref
|
|
int result = PyDict_Check(value) && PyDict_Contains(value, _key.ptr());
|
|
if (result == -1) {
|
|
PyErr_Clear();
|
|
return false;
|
|
}
|
|
return result == _contains;
|
|
}
|
|
|
|
private:
|
|
int _contains;
|
|
py::object _key;
|
|
};
|
|
|
|
/**
|
|
* Relational guards compare more than one value. We implement Relational
|
|
* guards by capturing some state in the guard object. For example for tensor
|
|
* aliasing guards - tensor X is not tensor Y - we construct one leaf guard
|
|
* and and install it at as a leaf of two guard managers (one for X and
|
|
* another for Y). Therefore, this guard is run twice. In the first
|
|
* invocation, it saves the first value (state) and returns True. In the
|
|
* second invocation, it compares the saved value with the new value and
|
|
* returns True if they do not alias.
|
|
*
|
|
* We have to be careful about resetting in case the other guards fail and we
|
|
* have some state in the relational guard. This is done by virtual method
|
|
* reset_state(). This is called by the RootGuardManager before it exits.
|
|
*
|
|
*/
|
|
class RelationalGuard : public LeafGuard {
|
|
public:
|
|
RelationalGuard(py::object verbose_code_parts)
|
|
: LeafGuard(std::move(verbose_code_parts)) {}
|
|
|
|
// reset the relational guard state on guard failure. This is called by the
|
|
// guard manager.
|
|
virtual void reset_state() = 0;
|
|
};
|
|
|
|
/**
|
|
* Checks that object x is object y.
|
|
*/
|
|
class OBJECT_ALIASING : public RelationalGuard {
|
|
public:
|
|
OBJECT_ALIASING(py::object verbose_code_parts)
|
|
: RelationalGuard(std::move(verbose_code_parts)) {}
|
|
|
|
bool check_nopybind(PyObject* value) override { // borrowed ref
|
|
if (_is_first_call) {
|
|
_first_tensor = value;
|
|
_is_first_call = false;
|
|
return true;
|
|
}
|
|
return _first_tensor == value;
|
|
}
|
|
|
|
void reset_state() final {
|
|
_is_first_call = true;
|
|
}
|
|
|
|
private:
|
|
bool _is_first_call{true};
|
|
PyObject* _first_tensor{nullptr};
|
|
};
|
|
|
|
/**
|
|
* Checks that none of the tensors alias.
|
|
*/
|
|
class NO_TENSOR_ALIASING : public RelationalGuard {
|
|
public:
|
|
NO_TENSOR_ALIASING(
|
|
const py::list& tensor_names,
|
|
py::object verbose_code_parts)
|
|
: RelationalGuard(std::move(verbose_code_parts)),
|
|
_tensor_names(tensor_names) {
|
|
_unique_tensors.reserve(tensor_names.size());
|
|
}
|
|
|
|
bool check_nopybind(PyObject* value) override { // borrowed ref
|
|
auto insertion = _unique_tensors.insert({value, nullptr});
|
|
if (!insertion.second) {
|
|
// No need to clear _unique_tensors, reset_state will do
|
|
// it.
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
GuardDebugInfo check_verbose_nopybind(PyObject* value) override {
|
|
bool result = check_nopybind(value);
|
|
|
|
if (!result) {
|
|
return GuardDebugInfo(
|
|
false, "Duplicate tensor found where not expected!", 0);
|
|
}
|
|
return GuardDebugInfo(true, 1);
|
|
}
|
|
|
|
void reset_state() final {
|
|
_unique_tensors.clear();
|
|
}
|
|
|
|
private:
|
|
py::list _tensor_names;
|
|
ska::flat_hash_map<PyObject*, std::nullptr_t> _unique_tensors;
|
|
};
|
|
|
|
/**
|
|
* Checks the storage overlapping relation of input tensors.
|
|
*
|
|
* This guard is always installed in pairs: one for the possibly overlapping
|
|
* tensors, and another one for the non-overlapping tensors. This is so we can
|
|
* correctly identify the given tensor in the check method as one of the 2
|
|
* classes mentioned above.
|
|
*
|
|
* In the end, the one responsible for storing and checking is the
|
|
* `StorageOverlapChecker` class.
|
|
*/
|
|
class STORAGE_OVERLAPPING : public RelationalGuard {
|
|
public:
|
|
STORAGE_OVERLAPPING(
|
|
bool overlapping,
|
|
std::shared_ptr<StorageOverlapChecker> checker,
|
|
py::object verbose_code_parts)
|
|
: RelationalGuard(std::move(verbose_code_parts)),
|
|
_overlapping(overlapping),
|
|
_checker(std::move(checker)) {}
|
|
|
|
bool check_nopybind(PyObject* value) override {
|
|
_checker->add(value, _overlapping);
|
|
return _checker->maybe_check();
|
|
}
|
|
|
|
void reset_state() final {
|
|
_checker->reset(_overlapping);
|
|
}
|
|
|
|
private:
|
|
// Flag that indicates which kind of tensor this guard is collecting:
|
|
// 1. Possibly overlapping tensors; or
|
|
// 2. Non-overlapping tensors.
|
|
bool _overlapping;
|
|
// Actual checker for this guard.
|
|
std::shared_ptr<StorageOverlapChecker> _checker;
|
|
};
|
|
|
|
/**
|
|
* Symbolic Shape Guard.
|
|
*/
|
|
class SYMBOLIC_SHAPE_GUARD : public RelationalGuard {
|
|
public:
|
|
SYMBOLIC_SHAPE_GUARD(
|
|
py::int_ nargs_int,
|
|
py::int_ nargs_float,
|
|
py::int_ py_addr,
|
|
py::object py_addr_keep_alive,
|
|
py::object verbose_code_parts)
|
|
: RelationalGuard(std::move(verbose_code_parts)),
|
|
_py_addr_keep_alive(std::move(py_addr_keep_alive)) {
|
|
_nargs_int = PyLong_AsSize_t(nargs_int.ptr());
|
|
_nargs_float = PyLong_AsSize_t(nargs_float.ptr());
|
|
_nargs = _nargs_int + _nargs_float;
|
|
if (PyErr_Occurred()) {
|
|
throw py::value_error(
|
|
"SYMBOLIC_SHAPE_GUARD expected a non-negative number of arguments.");
|
|
}
|
|
uintptr_t addr = PyLong_AsUnsignedLongLong(py_addr.ptr());
|
|
if (PyErr_Occurred()) {
|
|
throw py::value_error(
|
|
"SYMBOLIC_SHAPE_GUARD expected an address to a C function.");
|
|
}
|
|
_guard_check_fn = reinterpret_cast<int8_t (*)(int64_t*, double*)>(addr);
|
|
_args_int = std::vector<int64_t>(_nargs_int);
|
|
_args_float = std::vector<double>(_nargs_float);
|
|
}
|
|
|
|
bool check_nopybind(PyObject* value) override {
|
|
// We know that these arguments came from
|
|
// IndexedSource(TensorPropertyGuard) and therefore no need to check that
|
|
// the value is a Tuple[int, int].
|
|
PyObject* py_idx = PyTuple_GET_ITEM(value, 0);
|
|
PyObject* py_val = PyTuple_GET_ITEM(value, 1);
|
|
size_t iarg = PyLong_AsSize_t(py_idx);
|
|
if (iarg < _nargs_int) {
|
|
if (!PyLong_Check(py_val)) {
|
|
return false;
|
|
}
|
|
_args_int[iarg] = PyLong_AsLongLong(py_val);
|
|
} else {
|
|
if (!PyFloat_Check(py_val)) {
|
|
return false;
|
|
}
|
|
_args_float[iarg - _nargs_int] = PyFloat_AS_DOUBLE(py_val);
|
|
}
|
|
_args_seen++;
|
|
|
|
if (_args_seen == _nargs) {
|
|
_args_seen = 0;
|
|
return _guard_check_fn(_args_int.data(), _args_float.data());
|
|
} else {
|
|
// We don't have all the values yet. Return true until we get all.
|
|
return true;
|
|
}
|
|
}
|
|
|
|
GuardDebugInfo check_verbose_nopybind(PyObject* value) override {
|
|
if (!PyTuple_Check(value)) {
|
|
return GuardDebugInfo(false, "Non tuple found!", 0);
|
|
} else if (PyTuple_Size(value) != 2) {
|
|
return GuardDebugInfo(false, "Tuple of size not 2 found!", 0);
|
|
} else {
|
|
PyObject* py_idx = PyTuple_GET_ITEM(value, 0);
|
|
PyObject* py_val = PyTuple_GET_ITEM(value, 1);
|
|
if (!PyLong_Check(py_idx)) {
|
|
return GuardDebugInfo(false, "Non integer index found!", 0);
|
|
}
|
|
size_t iarg = PyLong_AsSize_t(py_idx);
|
|
if (iarg >= _nargs) {
|
|
return GuardDebugInfo(false, "Index out of bounds!", 0);
|
|
} else if (iarg < _nargs_int && !PyLong_Check(py_val)) {
|
|
return GuardDebugInfo(false, "Non integer found!", 0);
|
|
} else if (iarg >= _nargs_int && !PyFloat_Check(py_val)) {
|
|
return GuardDebugInfo(false, "Non float found!", 0);
|
|
}
|
|
}
|
|
bool result = check_nopybind(value);
|
|
|
|
if (!result) {
|
|
std::string msg = "\"Shape guard failed with values: ";
|
|
for (auto v : _args_int) {
|
|
msg += std::to_string(v) + ",";
|
|
}
|
|
for (auto v : _args_float) {
|
|
msg += std::to_string(v) + ",";
|
|
}
|
|
msg.pop_back();
|
|
msg += "\"";
|
|
auto msgs = py::list();
|
|
for (auto code_part : verbose_code_parts()) {
|
|
msgs.append(code_part);
|
|
}
|
|
msgs.append(msg);
|
|
return GuardDebugInfo(false, msgs, 0);
|
|
}
|
|
return GuardDebugInfo(true, 1);
|
|
}
|
|
|
|
void reset_state() final {
|
|
_args_seen = 0;
|
|
}
|
|
|
|
private:
|
|
py::object _py_addr_keep_alive;
|
|
size_t _args_seen{0}, _nargs_float, _nargs_int, _nargs;
|
|
std::vector<int64_t> _args_int;
|
|
std::vector<double> _args_float;
|
|
std::function<int8_t(int64_t*, double*)> _guard_check_fn;
|
|
};
|
|
|
|
class DYNAMIC_INDICES : public LeafGuard {
|
|
// C++ equivalent of
|
|
// code.append(
|
|
// f"(({tensor_name}._dynamo_dynamic_indices.issubset({value._dynamo_dynamic_indices}))
|
|
// if hasattr({tensor_name}, '_dynamo_dynamic_indices') else True)" #
|
|
// noqa: B950
|
|
// )
|
|
public:
|
|
DYNAMIC_INDICES(py::set dynamic_indices, py::object verbose_code_parts)
|
|
: LeafGuard(std::move(verbose_code_parts)),
|
|
_dynamic_indices(std::move(dynamic_indices)) {}
|
|
|
|
bool check_nopybind(PyObject* value) override { // borrowed ref
|
|
// Make an interned string
|
|
static PyObject* dynamic_indices_str =
|
|
PyUnicode_InternFromString("_dynamo_dynamic_indices");
|
|
PyObject* indices = PyObject_GetAttr(value, dynamic_indices_str); // new ref
|
|
if (indices == nullptr) {
|
|
// Attr absent. Clear exception.
|
|
PyErr_Clear();
|
|
// This is true deliberately. If hasattr fails, we return true.
|
|
return true;
|
|
}
|
|
|
|
static PyObject* issubset_str = PyUnicode_InternFromString("issubset");
|
|
PyObject* call_result = PyObject_CallMethodObjArgs(
|
|
indices, issubset_str, _dynamic_indices.ptr(), nullptr); // new ref
|
|
bool result = PyObject_IsTrue(call_result);
|
|
Py_DECREF(call_result);
|
|
Py_DECREF(indices);
|
|
return result;
|
|
}
|
|
|
|
private:
|
|
py::set _dynamic_indices;
|
|
};
|
|
|
|
class DICT_VERSION : public LeafGuard {
|
|
public:
|
|
DICT_VERSION(py::object value, py::object verbose_code_parts)
|
|
: LeafGuard(std::move(verbose_code_parts)) {
|
|
if (!PyDict_Check(value.ptr())) {
|
|
throw py::type_error("DICT_VERSION expects a dict");
|
|
}
|
|
_tag = get_dict_version_unchecked(value.ptr());
|
|
}
|
|
bool check_nopybind(PyObject* value) override { // borrowed ref
|
|
return PyDict_Check(value) && get_dict_version_unchecked(value) == _tag;
|
|
}
|
|
|
|
// Saved dict version.
|
|
uint64_t _tag;
|
|
};
|
|
|
|
// GuardManager can be a pointer to DictGuardManager, but at this point the
|
|
// compiler does not know that DictGuardManager is a derived class of
|
|
// GuardManager (no way to define inheritance relationships in forward
|
|
// declarations), so we forward declare a factory function and define it when
|
|
// both DictGuardManager and GuardManager are fully defined.
|
|
std::unique_ptr<GuardManager> make_guard_manager(
|
|
RootGuardManager* root,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum);
|
|
|
|
GuardManager* clone_guard_manager(
|
|
GuardManager* from,
|
|
RootGuardManager* root,
|
|
const py::function& clone_filter_fn);
|
|
void add_relational_guard_resetter_to_cloned_root(
|
|
RootGuardManager* root,
|
|
std::shared_ptr<RelationalGuard> guard);
|
|
|
|
/**
|
|
* Base class representing a pair of accessor and the associated guard
|
|
* manager. The accessor defines how to access the child value from the
|
|
* py::object given to the parent check function.
|
|
*
|
|
* GuardAccessors can be considered equivalent to name() method of Source
|
|
* objects in guards.py. In python, name() method returns a str which we can
|
|
* then eval in f_locals and f_globals to retrieve the actual py object.
|
|
* GuardAccessor serves the same purpose. The minor difference is that
|
|
* GuardManager is a tree structure, so a GuardAccessor just has to retrieve
|
|
* the value in the next level in this tree and pass it to the child
|
|
* GuardAccessor.
|
|
*
|
|
* GuardAccessor also owns the GuardManager associated with the retrieved
|
|
* value from the GuardAccessor.
|
|
*/
|
|
class GuardAccessor {
|
|
public:
|
|
GuardAccessor(
|
|
RootGuardManager* root,
|
|
py::object accessor_key,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum);
|
|
|
|
// Return by reference as GuardAccessor owns the GuardManager.
|
|
std::unique_ptr<GuardManager>& get_guard_manager() {
|
|
return _guard_manager;
|
|
}
|
|
|
|
bool matches_key(const py::handle& key) const {
|
|
return _accessor_key.equal(key);
|
|
}
|
|
|
|
std::string get_source() {
|
|
return _source;
|
|
}
|
|
|
|
// matches_dict_tag is used by the DictGetItemGuardAccessor to skip the guard
|
|
// subtree on immutable dict getitems.
|
|
virtual bool check_nopybind(PyObject* obj, bool matches_dict_tag = false) = 0;
|
|
virtual bool check_nopybind(FrameLocalsMapping* map, bool matches_dict_tag) {
|
|
// throw std::runtime_error("fallback to python");
|
|
// Could fallback to running check on the Python dict (lazily constructed)
|
|
return check_nopybind((PyObject*)map->to_dict(), matches_dict_tag);
|
|
}
|
|
virtual GuardDebugInfo check_verbose_nopybind(PyObject* obj) = 0;
|
|
virtual std::string repr() const = 0;
|
|
|
|
virtual ~GuardAccessor() = default;
|
|
|
|
// Cloning related functions
|
|
GuardAccessor(GuardManager* guard_manager, GuardAccessor* from);
|
|
|
|
virtual GuardAccessor* clone(
|
|
RootGuardManager* cloned_root,
|
|
const py::function& clone_filter_fn) = 0;
|
|
|
|
void clone_visitor(GuardAccessor* to) {
|
|
to->_source = this->_source;
|
|
to->_accessor_key = this->_accessor_key;
|
|
}
|
|
|
|
template <typename DerivedGuardAccessor>
|
|
GuardAccessor* clone_common(
|
|
RootGuardManager* cloned_root,
|
|
const py::function& clone_filter_fn) {
|
|
GuardManager* cloned_mgr = clone_guard_manager(
|
|
get_guard_manager().get(), cloned_root, clone_filter_fn);
|
|
if (cloned_mgr == nullptr) {
|
|
return nullptr;
|
|
}
|
|
DerivedGuardAccessor* cloned_accessor =
|
|
new DerivedGuardAccessor(cloned_mgr, (DerivedGuardAccessor*)this);
|
|
return cloned_accessor;
|
|
}
|
|
|
|
protected:
|
|
// Guard manager corresponding to the retrieved value from the
|
|
// GuardAccessor.
|
|
std::unique_ptr<GuardManager> _guard_manager;
|
|
// accessor key could be py::str for getattr, getitem or py::function for
|
|
// lambda accessor. It is a py::object because we need to keep these accessor
|
|
// keys alive.
|
|
py::object _accessor_key;
|
|
|
|
// A string that can be eval'd on f_locals or f_globals to access the variable
|
|
// value. Only used for debugging.
|
|
std::string _source;
|
|
};
|
|
|
|
/**
|
|
* GuardManager encapsulates all the guards related to a particular
|
|
* py::object. It is a tree structure and consists of 1) Leaf guards - Guards
|
|
* that are run on the user given object 2) Accessors - Guard accessors (like
|
|
* getattr, getitem) to access the next value in the tree hierarchy. Accessor
|
|
* object also holds the child GuardManager.
|
|
*
|
|
* Lets look at an example to understand how it works.
|
|
* class Pair:
|
|
* int x = 1;
|
|
* int y = 2;
|
|
*
|
|
* At compile time
|
|
* >> guard_mananger = GuardManager()
|
|
* >> guard_mananger.x.add_lambda_guard(
|
|
* lambda x: isinstance(x, Pair),
|
|
* lambda x: f"expected Pair, found {type(x)}"
|
|
* )
|
|
* >> guard_mananger.x.add_lambda_guard(lambda x: x == 1, lambda x: f"found
|
|
* {x}, expected 1")
|
|
* >> guard_mananger.y.add_lambda_guard(lambda x: x == 2, lambda x: f"found
|
|
* {x}, expected 2")
|
|
*
|
|
* At runtime
|
|
* >> guard_mananger.check(Pair())
|
|
*
|
|
* At compile time we build the tree structure. When we do `guard_manager.x`,
|
|
* it creates an AttrGuardAccessorNode, initializes a child guard manager with
|
|
* this accessor node, and adds it as a child. When we do
|
|
* `guard_manager.x.add_lambda_guard`, we call add_lambda_guard on the newly
|
|
* created guard manager and register a new leaf guard on it.
|
|
*
|
|
* At runtime, the accessor node has an important function of providing a way
|
|
* to access the value for the child guard. In the above example,
|
|
* guard_manager.x adds an AttrGuardAccessorNode with attr_name x. When check
|
|
* function is called, parent GuardManager calls getattr(value, "x") on its
|
|
* value passed to the check function to call the check function of the child
|
|
* guard manager.
|
|
*
|
|
* Performance optimization for fail fast - An optimization for runtime here is
|
|
* to sort the execution of child guards depending on the failure count. This
|
|
* ensures that we run the guards that are more prone to fail statistically
|
|
* first. This can improve the cache lookup time when we have multiple cache
|
|
* entries.
|
|
*/
|
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions)
|
|
class GuardManager {
|
|
public:
|
|
GuardManager() = delete;
|
|
GuardManager(RootGuardManager* root, std::string source)
|
|
: _root(root), _source(std::move(source)), _is_dict(false) {}
|
|
|
|
GuardManager(
|
|
RootGuardManager* root,
|
|
std::string source,
|
|
py::handle example_value)
|
|
: _root(root),
|
|
_source(std::move(source)),
|
|
_is_dict(py::isinstance<py::dict>(example_value)) {
|
|
if (_is_dict) {
|
|
_dict_tag = get_dict_version_unchecked(example_value.ptr());
|
|
}
|
|
}
|
|
|
|
GuardManager(const GuardManager& m) = delete;
|
|
GuardManager& operator=(const GuardManager&) = delete;
|
|
virtual ~GuardManager() = default;
|
|
|
|
RootGuardManager* get_root() {
|
|
return _root;
|
|
}
|
|
|
|
std::string get_source() {
|
|
return _source;
|
|
}
|
|
|
|
virtual void add_leaf_guard(std::shared_ptr<LeafGuard> leaf_guard) {
|
|
_leaf_guards.emplace_back(std::move(leaf_guard));
|
|
}
|
|
|
|
public:
|
|
// For cloning
|
|
GuardManager(RootGuardManager* root, std::string source, bool is_dict)
|
|
: _root(root), _source(std::move(source)), _is_dict(is_dict) {}
|
|
|
|
void clone_common(
|
|
RootGuardManager* cloned_root,
|
|
GuardManager* cloned_mgr,
|
|
const py::function& clone_filter_fn) {
|
|
for (const auto& guard : _leaf_guards) {
|
|
cloned_mgr->_leaf_guards.emplace_back(guard);
|
|
if (std::shared_ptr<RelationalGuard> relational_guard =
|
|
std::dynamic_pointer_cast<RelationalGuard>(guard)) {
|
|
add_relational_guard_resetter_to_cloned_root(
|
|
cloned_root, relational_guard);
|
|
}
|
|
}
|
|
|
|
for (const auto& accessor : _accessors) {
|
|
GuardAccessor* cloned_accessor =
|
|
accessor->clone(cloned_root, clone_filter_fn);
|
|
if (cloned_accessor != nullptr) {
|
|
cloned_mgr->_accessors.emplace_back(
|
|
std::unique_ptr<GuardAccessor>(cloned_accessor));
|
|
}
|
|
}
|
|
}
|
|
|
|
virtual GuardManager* clone(
|
|
RootGuardManager* cloned_root,
|
|
const py::function& clone_filter_fn) {
|
|
if (!py::cast<bool>(clone_filter_fn(this))) {
|
|
return nullptr;
|
|
}
|
|
GuardManager* cloned_mgr = new GuardManager(cloned_root, _source, _is_dict);
|
|
clone_common(cloned_root, cloned_mgr, clone_filter_fn);
|
|
return cloned_mgr;
|
|
}
|
|
|
|
/**
|
|
* Adds a new guard manager with appropriate Accessor. If the accessor is
|
|
* already present, we just return the guard manager.
|
|
*/
|
|
template <typename GuardAccessorT>
|
|
GuardManager* get_child_manager(
|
|
const py::object& accessor_key,
|
|
const std::string& source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum) {
|
|
// accessor_key type depends on the GuardAccessorT
|
|
// for example for GetAttrGuardAccessor - py::str name
|
|
|
|
// Return the manager if the guard accessor exists
|
|
for (const auto& accessor : _accessors) {
|
|
if (accessor->matches_key(accessor_key) &&
|
|
source == accessor->get_source()) {
|
|
return accessor->get_guard_manager().get();
|
|
}
|
|
}
|
|
|
|
// Construct a new guard accessor
|
|
_accessors.emplace_back(std::make_unique<GuardAccessorT>(
|
|
_root,
|
|
std::move(accessor_key),
|
|
source,
|
|
example_value,
|
|
guard_manager_enum));
|
|
return _accessors.back()->get_guard_manager().get();
|
|
}
|
|
|
|
// Runs the leaf guards check and then child managers check function.
|
|
//
|
|
// NB: There is some code DUPLICATION between this and check_verbose
|
|
// function. This is intentional. check function is in the hot path and is
|
|
// kept very simple. The purpose of check_verbose function is to get guard
|
|
// failure reasoning to understand recompilations. check_verbose function
|
|
// does not change the state of the guard, e.g., it does not shuffle the
|
|
// guards and does not change the fail count. For simplicity, we duplicate
|
|
// the code here.
|
|
template <typename T>
|
|
bool check_nopybind_template(T* value) { // borrowed ref
|
|
|
|
if (!this->check_leaf_guards_nopybind(value)) {
|
|
return false;
|
|
}
|
|
|
|
return this->check_accessors_nopybind(value);
|
|
}
|
|
|
|
virtual bool check_nopybind(PyObject* value) {
|
|
return check_nopybind_template(value);
|
|
}
|
|
|
|
virtual bool check_nopybind(FrameLocalsMapping* value) {
|
|
return check_nopybind_template(value);
|
|
}
|
|
|
|
template <typename T>
|
|
bool check_leaf_guards_nopybind(T* value) {
|
|
// Iterate over leaf guards
|
|
for (const auto& guard : _leaf_guards) {
|
|
if (!guard->check_nopybind(value)) { // early exit
|
|
_fail_count += 1;
|
|
// no need of sorting, just return.
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
template <typename T>
|
|
bool check_accessors_nopybind(T* value) {
|
|
bool matches_dict_tag = false;
|
|
uint64_t new_tag = 0;
|
|
if constexpr (std::is_same_v<T, PyObject>) {
|
|
if (_is_dict) {
|
|
// Check if the dict tag matches. If it does, propagate to the child
|
|
// accessors. This will pass to the child manager via
|
|
// DictGetItemGuardManager.
|
|
// Relational Guards need to keep state, so do not send matches_dict_tag
|
|
// to avoid early exits when dict_tag matches and the object is
|
|
// immutable.
|
|
new_tag = get_dict_version_unchecked(value);
|
|
matches_dict_tag = (new_tag == _dict_tag);
|
|
}
|
|
}
|
|
|
|
// Iterate over accessors.
|
|
bool result = true;
|
|
bool failed_on_first = true;
|
|
for (const auto& accessor : _accessors) {
|
|
if (!accessor->check_nopybind(value, matches_dict_tag)) { // early exit
|
|
_fail_count += 1;
|
|
result = false;
|
|
// need to sort, so break the loop.
|
|
break;
|
|
}
|
|
failed_on_first = false;
|
|
}
|
|
|
|
// failed_on_first is just an optimization to avoid sorting if we are
|
|
// failing on the first accessor itself. This is helpful when we have
|
|
// already sorted the guards once, and dont need to sort again.
|
|
if (!result && !failed_on_first) {
|
|
// Inplace sort the child guards by fail count. This moves the guard
|
|
// with higher fail count earlier in the queue, and enables fail fast
|
|
// for the next check_verbose.
|
|
|
|
// An alternate implementation was to use priority queue directly on
|
|
// _accessors, but it was rejected because of the complexity of
|
|
// popping and creating a new pq on each run_guards. Moreover, this sort
|
|
// is happening on the unhappy path when check_verbose guard
|
|
// fails. So, its probably ok.
|
|
std::sort(
|
|
_accessors.begin(),
|
|
_accessors.end(),
|
|
[](const std::unique_ptr<GuardAccessor>& a,
|
|
const std::unique_ptr<GuardAccessor>& b) {
|
|
return a->get_guard_manager()->fail_count() >
|
|
b->get_guard_manager()->fail_count();
|
|
});
|
|
}
|
|
|
|
if (_is_dict && result) {
|
|
// If result is true, reset the _dict_tag. This is useful if there is a
|
|
// mutation on the dict but it does not change the attr values (like
|
|
// swapping).
|
|
_dict_tag = new_tag;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
// This function has some code duplication with function check. This is
|
|
// deliberate to keep check function simple and fast.
|
|
virtual GuardDebugInfo check_verbose_nopybind(
|
|
PyObject* value) { // borrowed ref
|
|
int num_guards_executed = 0;
|
|
|
|
const GuardDebugInfo& debug_info =
|
|
check_leaf_guards_verbose_nopybind(value, num_guards_executed);
|
|
if (!debug_info.result) {
|
|
return debug_info;
|
|
}
|
|
|
|
return check_accessors_verbose_nopybind(value, num_guards_executed);
|
|
}
|
|
|
|
GuardDebugInfo check_leaf_guards_verbose_nopybind(
|
|
PyObject* value,
|
|
int& num_guards_executed) {
|
|
// Iterate over leaf guards
|
|
for (const auto& guard : _leaf_guards) {
|
|
const GuardDebugInfo& debug_info = guard->check_verbose_nopybind(value);
|
|
num_guards_executed++;
|
|
if (!debug_info.result) {
|
|
return GuardDebugInfo(
|
|
false, debug_info.verbose_code_parts, num_guards_executed);
|
|
}
|
|
}
|
|
|
|
return GuardDebugInfo(true, num_guards_executed);
|
|
}
|
|
|
|
GuardDebugInfo check_accessors_verbose_nopybind(
|
|
PyObject* value,
|
|
int& num_guards_executed) {
|
|
// Iterate over accessors
|
|
for (const auto& accessor : _accessors) {
|
|
const GuardDebugInfo& debug_info =
|
|
accessor->check_verbose_nopybind(value);
|
|
num_guards_executed += debug_info.num_guards_executed;
|
|
if (!debug_info.result) {
|
|
return GuardDebugInfo(
|
|
false, debug_info.verbose_code_parts, num_guards_executed);
|
|
}
|
|
}
|
|
|
|
return GuardDebugInfo(true, num_guards_executed);
|
|
}
|
|
|
|
bool has_no_accessors() {
|
|
return _accessors.empty();
|
|
}
|
|
|
|
int64_t fail_count() const {
|
|
return _fail_count;
|
|
}
|
|
|
|
// DEBUG function - Returning raw pointers because we can't return unique_ptr
|
|
// and pybind does not accept a unique_ptr reference return type.
|
|
virtual std::vector<GuardAccessor*> get_accessors() const {
|
|
std::vector<GuardAccessor*> ret;
|
|
ret.reserve(_accessors.size());
|
|
for (const auto& accessor : _accessors) {
|
|
ret.emplace_back(accessor.get());
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
// DEBUG function - Returning raw pointers because we can't return unique_ptr
|
|
// and pybind does not accept a unique_ptr reference return type.
|
|
virtual std::vector<GuardManager*> get_child_managers() {
|
|
std::vector<GuardManager*> ret;
|
|
ret.reserve(_accessors.size());
|
|
for (const auto& accessor : _accessors) {
|
|
ret.emplace_back(accessor->get_guard_manager().get());
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
// DEBUG function - Returning raw pointers because we can't return unique_ptr
|
|
// and pybind does not accept a unique_ptr reference return type.
|
|
std::vector<LeafGuard*> get_leaf_guards() const {
|
|
std::vector<LeafGuard*> ret;
|
|
ret.reserve(_leaf_guards.size());
|
|
for (const auto& guard : _leaf_guards) {
|
|
ret.push_back(guard.get());
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
bool is_leaf_guard_present(const std::string& guard_name) {
|
|
return _inserted_leaf_guards.find(guard_name) !=
|
|
_inserted_leaf_guards.end();
|
|
}
|
|
|
|
void insert_leaf_guard(const std::string& guard_name) {
|
|
_inserted_leaf_guards.insert(guard_name);
|
|
}
|
|
|
|
void add_permitted_leaf_guard(std::shared_ptr<LeafGuard> leaf_guard) {
|
|
// Selectively called for permitted guards. This is used by DictGuardManager
|
|
// which overrides the add_leaf_guard manager to throw runtime error.
|
|
GuardManager::add_leaf_guard(std::move(leaf_guard));
|
|
}
|
|
|
|
protected:
|
|
// Keeps a count of how many times this guard manager check function returns
|
|
// False. This is used for sorting optimization.
|
|
int64_t _fail_count{0};
|
|
|
|
private:
|
|
// Root of the guard manager, this is the used to install the relational
|
|
// guard resetters.
|
|
RootGuardManager* _root;
|
|
|
|
// A string that can be used to eval on f_locals or f_globals to get the
|
|
// value. This is used only to pass on debugging information.
|
|
std::string _source;
|
|
|
|
// A map of which leaf guards are inserted. This is to prevent duplicate
|
|
// guards like TYPE_MATCH.
|
|
std::unordered_set<std::string> _inserted_leaf_guards;
|
|
|
|
// Leaf guards are the terminal guards on this object, e.g, type check on a
|
|
// list. These guards have to be run before any children are run.
|
|
//
|
|
// These leaf guards are not shufflable. In almost all cases, these guards
|
|
// will have an order, e,g., type(x) is int guard and x == 5 guard. We also
|
|
// expect very few leaf guards per GuardManager node.
|
|
//
|
|
// NB: Why are leaf guards shared ptr? This is primarily to enable relational
|
|
// guards like `tensor X is not tensor Y`. These guards require multiple
|
|
// values. We handle it by creating one guard object that holds state and this
|
|
// guard is installed in many guard managers, hence a shared ptr.
|
|
std::vector<std::shared_ptr<LeafGuard>> _leaf_guards;
|
|
|
|
// GuardAccessors nodes to access the child guards. These guards are
|
|
// shufflable. On a guard failure, they are sorted based on their fail count
|
|
// to enable fail fast for the next check.
|
|
std::vector<std::unique_ptr<GuardAccessor>> _accessors;
|
|
|
|
bool _is_dict;
|
|
uint64_t _dict_tag{0};
|
|
};
|
|
|
|
GuardAccessor::GuardAccessor(
|
|
RootGuardManager* root,
|
|
py::object accessor_key,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum)
|
|
: _guard_manager(
|
|
make_guard_manager(root, source, example_value, guard_manager_enum)),
|
|
_accessor_key(std::move(accessor_key)),
|
|
_source(std::move(source)) {}
|
|
|
|
// Cloning related functions
|
|
GuardAccessor::GuardAccessor(GuardManager* guard_manager, GuardAccessor* from)
|
|
: _guard_manager(std::unique_ptr<GuardManager>(guard_manager)) {
|
|
from->clone_visitor(this);
|
|
}
|
|
|
|
/**
|
|
Note on [Ownership with cloning] - GuardManagers have the facility to clone
|
|
itself. This is useful for cloning a subset of the guard manager in diff guard
|
|
manager.
|
|
|
|
As the ownership goes, the model is exactly same as before. We have unique_ptr
|
|
for GuardAccessor and GuardManagers. So, any state required for the accessors
|
|
and managers is copied over using constructors and clone_visitor functions.
|
|
The main thing to notice is leaf guards. The leaf guards are represented using
|
|
shared_ptr, and they are shared (not cloned) with the cloned managers.
|
|
|
|
So for leaf guard state to be released, both the original and cloned managers
|
|
have to be destructed.
|
|
*/
|
|
|
|
/**
|
|
* RootGuardManager is the root of the guard tree. This is primarily
|
|
* constructed to hold the relational guard pointers so that we can reset the
|
|
* state of those guards on guard failure. All the other important
|
|
* implementation is in GuardManager class.
|
|
*/
|
|
|
|
class RootGuardManager : public GuardManager {
|
|
public:
|
|
// This is the root node, set its _root member to nullptr
|
|
RootGuardManager() : GuardManager(this, "L") {}
|
|
|
|
// Adds the relational guard resetter
|
|
void add_relational_guard_resetter(
|
|
std::shared_ptr<RelationalGuard> relational_guard) {
|
|
_relational_guard_resetters.emplace_back(std::move(relational_guard));
|
|
}
|
|
|
|
// Python visible API to check guard function.
|
|
bool check(py::handle value) {
|
|
return check_nopybind(value.ptr());
|
|
}
|
|
|
|
// Python visible API to check_verbose guard function.
|
|
GuardDebugInfo check_verbose(py::handle value) {
|
|
return check_verbose_nopybind(value.ptr());
|
|
}
|
|
|
|
// Fast check function.
|
|
template <typename T>
|
|
bool check_nopybind_template(T* value) { // borrowed ref
|
|
// Check [Note on GIL interaction with mutex lock] for details on why we
|
|
// need mutex and its interactions with GIL.
|
|
PyThreadState* _save = nullptr;
|
|
Py_UNBLOCK_THREADS; // ; is added to avoid clang-formatting
|
|
std::lock_guard<std::mutex> lock_guard(_lock);
|
|
Py_BLOCK_THREADS; // ; is added to avoid clang-formatting
|
|
|
|
// Get the local state. This will be used for TENSOR_MATCH guards.
|
|
if (_init_local_state) {
|
|
LocalState state;
|
|
_local_state = state;
|
|
}
|
|
|
|
if (!GuardManager::check_leaf_guards_nopybind(value)) {
|
|
_reset_relational_guard_state();
|
|
return false;
|
|
}
|
|
|
|
// Run accessor guards without TorchFunction enabled
|
|
// Dynamo should only be adding guards on values without
|
|
// torch function at this point, because if there
|
|
// was a torch function, we should've traced through it
|
|
const at::impl::TorchFunctionDisabledState old_state =
|
|
at::impl::PythonTorchFunctionTLS::get_disabled_state();
|
|
at::impl::PythonTorchFunctionTLS::set_disabled_state(
|
|
at::impl::TorchFunctionDisabledState::ALL_DISABLED);
|
|
|
|
if (!GuardManager::check_accessors_nopybind(value)) {
|
|
at::impl::PythonTorchFunctionTLS::set_disabled_state(old_state);
|
|
_reset_relational_guard_state();
|
|
return false;
|
|
}
|
|
|
|
// Iterate over epilogue leaf guards.
|
|
for (const auto& guard : _epilogue_lambda_guards) {
|
|
if (!guard->check_nopybind(value)) { // early exit
|
|
at::impl::PythonTorchFunctionTLS::set_disabled_state(old_state);
|
|
_reset_relational_guard_state();
|
|
return false;
|
|
}
|
|
}
|
|
|
|
at::impl::PythonTorchFunctionTLS::set_disabled_state(old_state);
|
|
_reset_relational_guard_state();
|
|
return true;
|
|
}
|
|
|
|
bool check_nopybind(PyObject* value) override {
|
|
return check_nopybind_template(value);
|
|
}
|
|
|
|
bool check_nopybind(FrameLocalsMapping* value) override {
|
|
return check_nopybind_template(value);
|
|
}
|
|
|
|
// Fast check_verbose function.
|
|
GuardDebugInfo check_verbose_nopybind(
|
|
PyObject* value) override { // borrowed ref
|
|
// Check [Note on GIL interaction with mutex lock] for details on why we
|
|
// need mutex and its interactions with GIL.
|
|
PyThreadState* _save = nullptr;
|
|
Py_UNBLOCK_THREADS; // ; is added to avoid clang-formatting
|
|
std::lock_guard<std::mutex> lock_guard(_lock);
|
|
Py_BLOCK_THREADS; // ; is added to avoid clang-formatting
|
|
|
|
// Get the local state. This will be used for TENSOR_MATCH guards.
|
|
if (_init_local_state) {
|
|
LocalState state;
|
|
_local_state = state;
|
|
}
|
|
|
|
int num_guards_executed = 0;
|
|
|
|
// Run leaf guards
|
|
// This includes the GlobalStateGuard and the Torch Function Mode stack
|
|
// guard, which require Torch Function to be in its unmodified state
|
|
const GuardDebugInfo& debug_info_leaf =
|
|
GuardManager::check_leaf_guards_verbose_nopybind(
|
|
value, num_guards_executed);
|
|
|
|
if (!debug_info_leaf.result) {
|
|
_reset_relational_guard_state();
|
|
return debug_info_leaf;
|
|
}
|
|
|
|
const at::impl::TorchFunctionDisabledState old_state =
|
|
at::impl::PythonTorchFunctionTLS::get_disabled_state();
|
|
at::impl::PythonTorchFunctionTLS::set_disabled_state(
|
|
at::impl::TorchFunctionDisabledState::ALL_DISABLED);
|
|
const GuardDebugInfo& debug_info_accessors =
|
|
GuardManager::check_accessors_verbose_nopybind(
|
|
value, num_guards_executed);
|
|
|
|
if (!debug_info_accessors.result) {
|
|
at::impl::PythonTorchFunctionTLS::set_disabled_state(old_state);
|
|
_reset_relational_guard_state();
|
|
return debug_info_accessors;
|
|
}
|
|
|
|
// Iterate over epilogue leaf guards
|
|
for (const auto& guard : _epilogue_lambda_guards) {
|
|
const GuardDebugInfo& tmp_debug_info =
|
|
guard->check_verbose_nopybind(value);
|
|
num_guards_executed++;
|
|
if (!tmp_debug_info.result) {
|
|
at::impl::PythonTorchFunctionTLS::set_disabled_state(old_state);
|
|
_reset_relational_guard_state();
|
|
return GuardDebugInfo(
|
|
false, tmp_debug_info.verbose_code_parts, num_guards_executed);
|
|
}
|
|
}
|
|
at::impl::PythonTorchFunctionTLS::set_disabled_state(old_state);
|
|
_reset_relational_guard_state();
|
|
return GuardDebugInfo(true, num_guards_executed);
|
|
}
|
|
|
|
void add_epilogue_lambda_guard(std::unique_ptr<LeafGuard> leaf_guard) {
|
|
_epilogue_lambda_guards.emplace_back(std::move(leaf_guard));
|
|
}
|
|
|
|
void set_init_local_state_flag() {
|
|
_init_local_state = true;
|
|
}
|
|
|
|
// See note on [Ownership with cloning]
|
|
RootGuardManager* clone_manager(const py::function& clone_filter_fn) {
|
|
// Use clone_filter_fn
|
|
if (!py::cast<bool>(clone_filter_fn(this))) {
|
|
return nullptr;
|
|
}
|
|
RootGuardManager* cloned_root = new RootGuardManager();
|
|
clone_common(cloned_root, cloned_root, clone_filter_fn);
|
|
for (const auto& guard : _epilogue_lambda_guards) {
|
|
cloned_root->_epilogue_lambda_guards.emplace_back(guard);
|
|
}
|
|
return cloned_root;
|
|
}
|
|
|
|
// DEBUG function - Returning raw pointers because we can't return unique_ptr
|
|
// and pybind does not accept a unique_ptr reference return type.
|
|
std::vector<LeafGuard*> get_epilogue_lambda_guards() const {
|
|
std::vector<LeafGuard*> ret;
|
|
ret.reserve(_epilogue_lambda_guards.size());
|
|
for (const auto& guard : _epilogue_lambda_guards) {
|
|
ret.push_back(guard.get());
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
private:
|
|
// Reset the state of all the relational guards on failure.
|
|
void _reset_relational_guard_state() {
|
|
for (auto& guard : _relational_guard_resetters) {
|
|
guard->reset_state();
|
|
}
|
|
}
|
|
|
|
public:
|
|
// Local state for TENSOR_MATCH guards.
|
|
LocalState _local_state;
|
|
|
|
private:
|
|
// All the relational guards under this guard manager. We only use these
|
|
// when the guard evaluates to False. This ensures that guard state is reset
|
|
// on guard failure so that next invocation is clean.
|
|
std::vector<std::shared_ptr<RelationalGuard>> _relational_guard_resetters;
|
|
|
|
// These guards are lambda guards, i.e., the guards that lack C++
|
|
// implementation. For simplicity, we add these guards at the root. They
|
|
// MUST be run after all other guard managers have finished to ensure that
|
|
// the epilogue guards do not step on some nonexistent getattr or getitem.
|
|
// NB - shared_ptr is used to share the epilogue guards with the cloned guard
|
|
// manager.
|
|
std::vector<std::shared_ptr<LeafGuard>> _epilogue_lambda_guards;
|
|
|
|
// [Note on GIL interaction with mutex lock]
|
|
// We use std::mutex to prevent multiple threads from running
|
|
// check/check_verbose simultaneously. This is to prevent race condition due
|
|
// to state changes in RelationalGuard.
|
|
//
|
|
// However, we also need to be careful about GIL interaction with mutex. There
|
|
// is a chance of deadlock
|
|
//
|
|
// Thread 1: has GIL, waiting for lock
|
|
// Thread 2: has lock, waiting for GIL
|
|
//
|
|
// This can happen when Thread 2 earlier acquired the mutex lock, starting
|
|
// running the critical section of check function and then called some python
|
|
// function (like LAMBDA_GUARD) and reached Cpython codebase that checks if it
|
|
// should release the GIL (typically happens after every few bytecode
|
|
// instructions). Thread 2 here can decide to release the GIL. Thread 1 can
|
|
// acquire GIL and reach the mutex, where it will wait forever.
|
|
//
|
|
// To avoid this, each thread releases the GIL before acquiring the mutex and
|
|
// then acquires the GIL again after acquiring the mutex lock by using
|
|
// Py_BLOCK_THREADS and Py_UNBLOCK_THREADS. This avoids the deadlock.
|
|
std::mutex _lock;
|
|
|
|
// We init LocalState only when this flag it set. This flag is set during
|
|
// TENSOR_MATCH guard init.
|
|
bool _init_local_state = false;
|
|
};
|
|
|
|
/*
|
|
* Dicts are common in python code. Therefore, we handle guards for dicts
|
|
* differently and use PyDict_* APIs which are faster than PyObject_* APIs
|
|
* because of no ref count increments/decrements.
|
|
*
|
|
* DictGuardManager relies on the order of dict.keys(). It keeps track of the
|
|
* indices of dict.keys() to access the key, value pair.
|
|
*/
|
|
typedef std::pair<std::unique_ptr<GuardManager>, std::unique_ptr<GuardManager>>
|
|
KeyValueManager;
|
|
class DictGuardManager : public GuardManager {
|
|
public:
|
|
DictGuardManager(
|
|
RootGuardManager* root,
|
|
std::string source,
|
|
py::handle example_value)
|
|
: GuardManager(root, std::move(source)),
|
|
_size(PyDict_Size(example_value.ptr())),
|
|
_expected_type(Py_TYPE(example_value.ptr())),
|
|
_is_exact_dict_type(PyDict_CheckExact(example_value.ptr())) {}
|
|
|
|
GuardManager* get_key_manager(
|
|
py::object key_index,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum) {
|
|
KeyValueManager& key_value_manager =
|
|
_get_index_manager(std::move(key_index));
|
|
if (!key_value_manager.first) {
|
|
key_value_manager.first = make_guard_manager(
|
|
this->get_root(),
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum);
|
|
};
|
|
return key_value_manager.first.get();
|
|
}
|
|
|
|
GuardManager* get_value_manager(
|
|
py::object key_index,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum) {
|
|
KeyValueManager& key_value_manager =
|
|
_get_index_manager(std::move(key_index));
|
|
if (!key_value_manager.second) {
|
|
key_value_manager.second = make_guard_manager(
|
|
this->get_root(),
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum);
|
|
};
|
|
return key_value_manager.second.get();
|
|
}
|
|
|
|
bool check_nopybind(PyObject* obj) override { // borrowed ref
|
|
// TODO(janimesh) - Implement a fast-path using dict versions.
|
|
|
|
if (Py_TYPE(obj) != _expected_type) {
|
|
_fail_count += 1;
|
|
return false;
|
|
}
|
|
|
|
if (PyDict_Size(obj) != _size) {
|
|
_fail_count += 1;
|
|
return false;
|
|
}
|
|
|
|
// Early return
|
|
if (_size == 0) {
|
|
return true;
|
|
}
|
|
|
|
// Invokes the base class's check_nopybind method. We permit a limited set
|
|
// of leaf guards and accessors within the DictGuardManager framework.
|
|
// Integrating certain guards or accessors directly within the
|
|
// DictGuardManager can be challenging. For instance, `type(dict_object)` as
|
|
// an accessor is permissible, which otherwise would be hard to integrate
|
|
// directly into DictGuardManager. Similarly, incorporating guards such as
|
|
// DICT_CONTAINS and DICT_VERSION as leaf guards offers a simpler solution
|
|
// than embedding these functionalities within the DictGuardManager itself.
|
|
if (!GuardManager::check_nopybind(obj)) {
|
|
_fail_count += 1;
|
|
// No need to shuffle the child guards, just return.
|
|
return false;
|
|
}
|
|
|
|
PyObject *key = nullptr, *value = nullptr;
|
|
Py_ssize_t pos = 0;
|
|
|
|
// Points to an element in the _indices vector.
|
|
size_t index_pointer = 0;
|
|
// Points to the key index in the dict
|
|
Py_ssize_t dict_pointer = 0;
|
|
|
|
while (index_pointer < _indices.size() &&
|
|
PyDict_Next(obj, &pos, &key, &value)) {
|
|
// Skip if dict_pointer is not a saved index.
|
|
if (dict_pointer == _indices[index_pointer]) {
|
|
index_pointer += 1;
|
|
KeyValueManager& key_value_manager = _key_value_managers[dict_pointer];
|
|
std::unique_ptr<GuardManager>& key_manager = key_value_manager.first;
|
|
if (key_manager && !key_manager->check_nopybind(key)) {
|
|
return false;
|
|
}
|
|
std::unique_ptr<GuardManager>& value_manager = key_value_manager.second;
|
|
if (value_manager && !value_manager->check_nopybind(value)) {
|
|
return false;
|
|
}
|
|
}
|
|
dict_pointer += 1;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
GuardDebugInfo check_verbose_nopybind(
|
|
PyObject* obj) override { // borrowed ref
|
|
if (Py_TYPE(obj) != _expected_type) {
|
|
return GuardDebugInfo(false, "TYPE_MISMATCH(" + get_source() + ")", 0);
|
|
}
|
|
|
|
if (PyDict_Size(obj) != _size) {
|
|
return GuardDebugInfo(
|
|
false, "len(" + get_source() + ") != " + std::to_string(_size), 0);
|
|
}
|
|
|
|
// Early return
|
|
if (_size == 0) {
|
|
return GuardDebugInfo(true, 0);
|
|
}
|
|
|
|
// Invokes the base class's check_nopybind method. We permit a limited set
|
|
// of leaf guards and accessors within the DictGuardManager framework.
|
|
// Integrating certain guards or accessors directly within the
|
|
// DictGuardManager can be challenging. For instance, `type(dict_object)` as
|
|
// an accessor is permissible, which otherwise would be hard to integrate
|
|
// directly into DictGuardManager. Similarly, incorporating guards such as
|
|
// DICT_CONTAINS and DICT_VERSION as leaf guards offers a simpler solution
|
|
// than embedding these functionalities within the DictGuardManager itself.
|
|
GuardDebugInfo debug_info = GuardManager::check_verbose_nopybind(obj);
|
|
if (!debug_info.result) {
|
|
return debug_info;
|
|
}
|
|
|
|
PyObject *key = nullptr, *value = nullptr;
|
|
Py_ssize_t pos = 0;
|
|
|
|
// Points to an element in the _indices vector.
|
|
size_t index_pointer = 0;
|
|
Py_ssize_t dict_pointer = 0;
|
|
|
|
int num_guards_executed = 0;
|
|
while (index_pointer < _indices.size() &&
|
|
PyDict_Next(obj, &pos, &key, &value)) {
|
|
// Skip if pos is not a saved index.
|
|
if (dict_pointer == _indices[index_pointer]) {
|
|
index_pointer += 1;
|
|
KeyValueManager& key_value_manager = _key_value_managers[dict_pointer];
|
|
std::unique_ptr<GuardManager>& key_manager = key_value_manager.first;
|
|
if (key_manager) {
|
|
GuardDebugInfo debug_info = key_manager->check_verbose_nopybind(key);
|
|
num_guards_executed += debug_info.num_guards_executed;
|
|
if (!debug_info.result) {
|
|
return GuardDebugInfo(
|
|
false, debug_info.verbose_code_parts, num_guards_executed);
|
|
}
|
|
}
|
|
std::unique_ptr<GuardManager>& value_manager = key_value_manager.second;
|
|
if (value_manager) {
|
|
GuardDebugInfo debug_info =
|
|
value_manager->check_verbose_nopybind(value);
|
|
num_guards_executed += debug_info.num_guards_executed;
|
|
if (!debug_info.result) {
|
|
return GuardDebugInfo(
|
|
false, debug_info.verbose_code_parts, num_guards_executed);
|
|
}
|
|
}
|
|
}
|
|
dict_pointer += 1;
|
|
}
|
|
return GuardDebugInfo(true, num_guards_executed);
|
|
}
|
|
|
|
void skip_adding_guard(const py::object& a, const py::object& b) {
|
|
// The `add_leaf_guard` method in `DictGuardManager` is overridden to block
|
|
// the addition of leaf guards. However, this is too strict. Python side of
|
|
// guard management frequently adds TYPE_MATCH and DICT_LENGTH on
|
|
// DictGuardManager. We could refactor Python side to never call these
|
|
// guards on dict objects, but that results in messy code. Instead, we just
|
|
// override these two guards to not go through add_leaf_guard code path and
|
|
// skip adding guards. This makes the python side easy.
|
|
}
|
|
|
|
void fail_on_get_child_manager(
|
|
const py::object& a,
|
|
const std::string& source,
|
|
const py::object& b) {
|
|
throw std::runtime_error("Can not add an accessor to DictGuardManager");
|
|
}
|
|
|
|
void add_leaf_guard(std::shared_ptr<LeafGuard> leaf_guard) override {
|
|
// If you are calling this, you probably want to go through a key, value
|
|
// child manager and then add a leaf guard on them. DictGuardManager already
|
|
// has TYPE_MATCH and LENGTH_CHECK built in.
|
|
throw std::runtime_error("DictGuardManager does not support a leaf_guard");
|
|
}
|
|
|
|
// Debug helper - Returning raw pointers because we can't return unique_ptr
|
|
// and pybind does not accept a unique_ptr reference return type.
|
|
std::unordered_map<Py_ssize_t, std::pair<GuardManager*, GuardManager*>>
|
|
get_key_value_managers() {
|
|
std::unordered_map<Py_ssize_t, std::pair<GuardManager*, GuardManager*>> ret;
|
|
for (auto index : _indices) {
|
|
ret[index] = std::make_pair(
|
|
_key_value_managers[index].first.get(),
|
|
_key_value_managers[index].second.get());
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
bool is_exact_dict_type() {
|
|
return _is_exact_dict_type;
|
|
}
|
|
|
|
public: // cloning functions
|
|
DictGuardManager(
|
|
RootGuardManager* cloned_root,
|
|
std::string source,
|
|
Py_ssize_t size,
|
|
PyTypeObject* expected_type,
|
|
bool is_exact_dict_type,
|
|
std::vector<Py_ssize_t> indices)
|
|
: GuardManager(cloned_root, std::move(source), true),
|
|
_size(size),
|
|
_expected_type(expected_type),
|
|
_is_exact_dict_type(is_exact_dict_type),
|
|
_indices(std::move(indices)) {}
|
|
|
|
template <typename T>
|
|
GuardManager* clone_dict_guard_manager(
|
|
RootGuardManager* cloned_root,
|
|
const py::function& clone_filter_fn) {
|
|
if (!py::cast<bool>(clone_filter_fn(this))) {
|
|
return nullptr;
|
|
}
|
|
T* cloned_mgr = new T(
|
|
cloned_root,
|
|
get_source(),
|
|
_size,
|
|
_expected_type,
|
|
_is_exact_dict_type,
|
|
_indices);
|
|
|
|
clone_common(cloned_root, cloned_mgr, clone_filter_fn);
|
|
for (auto index : _indices) {
|
|
KeyValueManager& key_value_manager = _key_value_managers[index];
|
|
std::unique_ptr<GuardManager>& key_manager = key_value_manager.first;
|
|
std::unique_ptr<GuardManager>& value_manager = key_value_manager.second;
|
|
|
|
cloned_mgr->_key_value_managers[index] = std::make_pair(nullptr, nullptr);
|
|
|
|
if (key_manager) {
|
|
GuardManager* cloned_key_manager =
|
|
key_manager->clone(cloned_root, clone_filter_fn);
|
|
if (cloned_key_manager) {
|
|
cloned_mgr->_key_value_managers[index].first =
|
|
std::unique_ptr<GuardManager>(cloned_key_manager);
|
|
}
|
|
}
|
|
|
|
if (value_manager) {
|
|
GuardManager* cloned_value_manager =
|
|
value_manager->clone(cloned_root, clone_filter_fn);
|
|
if (cloned_value_manager) {
|
|
cloned_mgr->_key_value_managers[index].second =
|
|
std::unique_ptr<GuardManager>(cloned_value_manager);
|
|
}
|
|
}
|
|
}
|
|
return cloned_mgr;
|
|
}
|
|
|
|
GuardManager* clone(
|
|
RootGuardManager* cloned_root,
|
|
const py::function& clone_filter_fn) override {
|
|
return clone_dict_guard_manager<DictGuardManager>(
|
|
cloned_root, clone_filter_fn);
|
|
}
|
|
|
|
private:
|
|
/**
|
|
* Adds a new KeyDictGuardAccessor. If the accessor is already present, we
|
|
* just return the guard manager.
|
|
*/
|
|
KeyValueManager& _get_index_manager(py::object key_index) {
|
|
// Check if the accessor is already present.
|
|
Py_ssize_t index = py::cast<Py_ssize_t>(std::move(key_index));
|
|
auto it = _key_value_managers.find(index);
|
|
if (it != _key_value_managers.end()) {
|
|
return it->second;
|
|
}
|
|
_indices.push_back(index);
|
|
// Always keep the _indices array sorted
|
|
std::sort(_indices.begin(), _indices.end());
|
|
_key_value_managers[index] = std::make_pair(nullptr, nullptr);
|
|
return _key_value_managers[index];
|
|
}
|
|
|
|
protected:
|
|
Py_ssize_t _size;
|
|
// DictGuardManager supports both exact dict type and non-exact dict type.
|
|
// Therefore, we have to compare the type to early exit.
|
|
PyTypeObject* _expected_type;
|
|
bool _is_exact_dict_type; // Useful to check getattr_manager validity.
|
|
std::vector<Py_ssize_t> _indices;
|
|
std::unordered_map<Py_ssize_t, KeyValueManager> _key_value_managers;
|
|
};
|
|
|
|
GuardManager* clone_guard_manager(
|
|
GuardManager* from,
|
|
RootGuardManager* cloned_root,
|
|
const py::function& clone_filter_fn) {
|
|
return from->clone(cloned_root, clone_filter_fn);
|
|
}
|
|
|
|
void add_relational_guard_resetter_to_cloned_root(
|
|
RootGuardManager* root,
|
|
std::shared_ptr<RelationalGuard> guard) {
|
|
root->add_relational_guard_resetter(std::move(guard));
|
|
}
|
|
|
|
std::unique_ptr<GuardManager> make_guard_manager(
|
|
RootGuardManager* root,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum) {
|
|
#if IS_PYBIND_2_13_PLUS
|
|
using threeobjects = std::tuple<py::object, py::object, py::object>;
|
|
PYBIND11_CONSTINIT static py::gil_safe_call_once_and_store<threeobjects>
|
|
storage;
|
|
|
|
auto& [guard_manager_enum_class, base_guard_manager_enum, dict_guard_manager_enum] =
|
|
storage
|
|
.call_once_and_store_result([]() -> threeobjects {
|
|
py::object guard_manager_enum_class =
|
|
py::module_::import("torch._dynamo.guards")
|
|
.attr("GuardManagerType");
|
|
return {
|
|
guard_manager_enum_class,
|
|
guard_manager_enum_class.attr("GUARD_MANAGER"),
|
|
guard_manager_enum_class.attr("DICT_GUARD_MANAGER")};
|
|
})
|
|
.get_stored();
|
|
#else
|
|
static py::object guard_manager_enum_class =
|
|
py::module_::import("torch._dynamo.guards").attr("GuardManagerType");
|
|
static py::object base_guard_manager_enum =
|
|
guard_manager_enum_class.attr("GUARD_MANAGER");
|
|
static py::object dict_guard_manager_enum =
|
|
guard_manager_enum_class.attr("DICT_GUARD_MANAGER");
|
|
#endif
|
|
if (py::isinstance<py::dict>(example_value)) {
|
|
if (guard_manager_enum.is(base_guard_manager_enum)) {
|
|
// For dicts that don't need to guard on keys, we can just rely on the
|
|
// base GuardManager.
|
|
return std::make_unique<GuardManager>(
|
|
root, std::move(source), example_value);
|
|
} else if (guard_manager_enum.is(dict_guard_manager_enum)) {
|
|
return std::make_unique<DictGuardManager>(
|
|
root, std::move(source), example_value);
|
|
} else {
|
|
throw py::type_error("Invalid guard manager enum");
|
|
}
|
|
}
|
|
return std::make_unique<GuardManager>(root, std::move(source));
|
|
}
|
|
|
|
class TORCH_FUNCTION_MODE_STACK : public LeafGuard {
|
|
public:
|
|
TORCH_FUNCTION_MODE_STACK(
|
|
const py::list& initial_stack,
|
|
py::object verbose_code_parts)
|
|
: LeafGuard(std::move(verbose_code_parts)) {
|
|
Py_ssize_t len = PyList_Size(initial_stack.ptr());
|
|
for (Py_ssize_t idx = 0; idx < len; idx++) {
|
|
PyObject* mode = PyList_GetItem(initial_stack.ptr(), idx); // borrowed ref
|
|
auto type = Py_TYPE(mode);
|
|
this->_ref_stack.push_back(type);
|
|
}
|
|
}
|
|
|
|
template <typename T>
|
|
bool check_nopybind_template(T* value) {
|
|
// Ignore value arg, only used to satisfy the interface
|
|
const size_t len = (size_t)at::impl::PythonTorchFunctionTLS::stack_len();
|
|
const size_t ref_stack_size = this->_ref_stack.size();
|
|
|
|
if (len != ref_stack_size) {
|
|
return false;
|
|
}
|
|
|
|
for (int64_t idx = 0; (size_t)idx < len; idx++) {
|
|
std::shared_ptr<c10::SafePyObject> mode =
|
|
at::impl::PythonTorchFunctionTLS::get_stack_at(idx);
|
|
|
|
PyTypeObject* mode_type = Py_TYPE(mode->ptr(getPyInterpreter()));
|
|
if (mode_type != _ref_stack.at(idx)) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool check_nopybind(PyObject* value) override {
|
|
return check_nopybind_template(value);
|
|
}
|
|
|
|
bool check_nopybind(FrameLocalsMapping* value) override {
|
|
return check_nopybind_template(value);
|
|
}
|
|
|
|
private:
|
|
std::vector<PyTypeObject*> _ref_stack;
|
|
};
|
|
|
|
class DISPATCH_KEY_SET_MATCH : public LeafGuard {
|
|
public:
|
|
DISPATCH_KEY_SET_MATCH(
|
|
RootGuardManager* root_guard_manager,
|
|
py::object value,
|
|
py::object verbose_code_parts)
|
|
: LeafGuard(root_guard_manager, std::move(verbose_code_parts)) {
|
|
root_guard_manager->set_init_local_state_flag();
|
|
c10::DispatchKeySet value_ = value.cast<c10::DispatchKeySet>();
|
|
raw_repr = _root_guard_manager->_local_state.apply(value_).raw_repr();
|
|
}
|
|
|
|
bool check_nopybind(PyObject* value) override { // borrowed ref
|
|
py::handle handle = py::handle(value);
|
|
c10::DispatchKeySet value_ = handle.cast<c10::DispatchKeySet>();
|
|
return raw_repr ==
|
|
_root_guard_manager->_local_state.apply(value_).raw_repr();
|
|
}
|
|
|
|
private:
|
|
uint64_t raw_repr;
|
|
};
|
|
|
|
class TENSOR_MATCH : public LeafGuard {
|
|
public:
|
|
TENSOR_MATCH(
|
|
RootGuardManager* root_guard_manager,
|
|
py::object value,
|
|
py::object dynamic_dims_sizes_py,
|
|
py::object dynamic_dims_strides_py,
|
|
py::object tensor_name,
|
|
py::object verbose_code_parts,
|
|
py::object pytype,
|
|
py::object dispatch_keys)
|
|
: LeafGuard(root_guard_manager, std::move(verbose_code_parts)),
|
|
_tensor_name(py::cast<std::string>(std::move(tensor_name))) {
|
|
root_guard_manager->set_init_local_state_flag();
|
|
PyObject* item = value.ptr();
|
|
if (!THPVariable_CheckExact(item) && !THPVariable_Check(item)) {
|
|
PyErr_SetString(PyExc_TypeError, "expected Tensor()");
|
|
return;
|
|
}
|
|
if (!PyType_Check(pytype.ptr())) {
|
|
PyErr_SetString(PyExc_TypeError, "expected type object");
|
|
return;
|
|
}
|
|
auto tensor = THPVariable_Unpack(item);
|
|
|
|
std::vector<std::optional<c10::SymInt>> tensor_dims_size =
|
|
pyListToVecOptInt(dynamic_dims_sizes_py.ptr());
|
|
std::vector<std::optional<c10::SymInt>> tensor_dims_stride =
|
|
pyListToVecOptInt(dynamic_dims_strides_py.ptr());
|
|
|
|
tensor_dims_size = tensor_dims_size.empty()
|
|
? wrapIntegersInOptional(tensor.sym_sizes())
|
|
: tensor_dims_size;
|
|
tensor_dims_stride = tensor_dims_stride.empty()
|
|
? wrapIntegersInOptional(tensor.sym_strides())
|
|
: tensor_dims_stride;
|
|
LocalState state;
|
|
_tensor_check = std::make_unique<TensorCheck>(
|
|
state,
|
|
(PyTypeObject*)pytype.ptr(),
|
|
std::move(tensor),
|
|
dispatch_keys.cast<c10::DispatchKeySet>(),
|
|
std::move(tensor_dims_size),
|
|
std::move(tensor_dims_stride));
|
|
}
|
|
|
|
bool check_nopybind(PyObject* value) override { // borrowed ref
|
|
if (Py_TYPE(value) != _tensor_check->pytype) {
|
|
return false;
|
|
}
|
|
return _tensor_check->check(
|
|
_root_guard_manager->_local_state, THPVariable_Unpack(value));
|
|
}
|
|
|
|
GuardDebugInfo check_verbose_nopybind(
|
|
PyObject* value) override { // borrowed ref
|
|
|
|
if (Py_TYPE(value) != _tensor_check->pytype) {
|
|
std::stringstream fail_reason;
|
|
PyObject* type_str = PyObject_Str(PyObject_Type(value));
|
|
fail_reason << "expected type of '" << _tensor_name
|
|
<< "' to be a tensor type, ";
|
|
if (!type_str) {
|
|
fail_reason << "but found a different type";
|
|
} else {
|
|
fail_reason << "' but found " << PyUnicode_AsUTF8(type_str);
|
|
}
|
|
return GuardDebugInfo(false, fail_reason.str(), 0);
|
|
}
|
|
|
|
std::string fail_reason = _tensor_check->check_verbose(
|
|
_root_guard_manager->_local_state,
|
|
THPVariable_Unpack(value),
|
|
_tensor_name);
|
|
|
|
if (!fail_reason.empty()) {
|
|
if (is_parameter(py::handle(value))) {
|
|
fail_reason += ". Guard failed on a parameter, consider using ";
|
|
fail_reason +=
|
|
"torch._dynamo.config.force_parameter_static_shapes = False ";
|
|
fail_reason += "to allow dynamism on parameters.";
|
|
}
|
|
return GuardDebugInfo(false, fail_reason, 0);
|
|
}
|
|
return GuardDebugInfo(true, 1);
|
|
}
|
|
|
|
private:
|
|
std::string _tensor_name;
|
|
std::unique_ptr<TensorCheck> _tensor_check;
|
|
};
|
|
|
|
/**
|
|
* Represents __getattr__ accessor.
|
|
*/
|
|
class GetAttrGuardAccessor : public GuardAccessor {
|
|
public:
|
|
GetAttrGuardAccessor(
|
|
RootGuardManager* root,
|
|
py::str name,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum)
|
|
: GuardAccessor(
|
|
root,
|
|
name,
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum),
|
|
_attr_name(name.ptr()) {}
|
|
|
|
// NB: Intentional duplication between check_nopybind and
|
|
// check_verbose_nopybind.
|
|
bool check_nopybind(PyObject* obj, bool matches_dict_tag = false)
|
|
override { // borrowed ref
|
|
PyObject* x = PyObject_GetAttr(obj, _attr_name); // new ref
|
|
if (x == nullptr) {
|
|
// Attribute absent, clear the exception and return false.
|
|
PyErr_Clear();
|
|
return false;
|
|
}
|
|
bool result = _guard_manager->check_nopybind(x);
|
|
Py_DECREF(x);
|
|
return result;
|
|
}
|
|
|
|
GuardDebugInfo check_verbose_nopybind(
|
|
PyObject* obj) override { // borrowed ref
|
|
PyObject* x = PyObject_GetAttr(obj, _attr_name); // new ref
|
|
if (x == nullptr) {
|
|
// Attribute absent, clear the exception and return false.
|
|
PyErr_Clear();
|
|
return GuardDebugInfo(
|
|
false, "getattr failed on source " + get_source(), 0);
|
|
}
|
|
GuardDebugInfo result = _guard_manager->check_verbose_nopybind(x);
|
|
Py_DECREF(x);
|
|
return result;
|
|
}
|
|
|
|
std::string repr() const override {
|
|
// Helpful when printing GuardManager tree structure.
|
|
return "GetAttrGuardAccessor(" + py::str(_attr_name).cast<std::string>() +
|
|
")";
|
|
}
|
|
|
|
public: // cloning functions
|
|
GetAttrGuardAccessor(GuardManager* guard_manager, GetAttrGuardAccessor* from)
|
|
: GuardAccessor(guard_manager, from) {
|
|
from->clone_visitor(this);
|
|
}
|
|
|
|
GuardAccessor* clone(
|
|
RootGuardManager* cloned_root,
|
|
const py::function& clone_filter_fn) override {
|
|
return clone_common<GetAttrGuardAccessor>(cloned_root, clone_filter_fn);
|
|
}
|
|
|
|
void clone_visitor(GetAttrGuardAccessor* to) {
|
|
to->_attr_name = _attr_name;
|
|
}
|
|
|
|
private:
|
|
// no need of py::object here because the attr_name is already passed on to
|
|
// the base class as accessor_key which is a py::object.
|
|
PyObject* _attr_name{nullptr};
|
|
};
|
|
|
|
/**
|
|
* Represents object.__getattribute__(obj, attr_name) accessor.
|
|
*/
|
|
class GenericGetAttrGuardAccessor : public GuardAccessor {
|
|
public:
|
|
GenericGetAttrGuardAccessor(
|
|
RootGuardManager* root,
|
|
py::str name,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum)
|
|
: GuardAccessor(
|
|
root,
|
|
name,
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum),
|
|
_attr_name(name.ptr()) {}
|
|
|
|
// NB: Intentional duplication between check_nopybind and
|
|
// check_verbose_nopybind.
|
|
bool check_nopybind(PyObject* obj, bool matches_dict_tag = false)
|
|
override { // borrowed ref
|
|
PyObject* x = PyObject_GenericGetAttr(obj, _attr_name); // new ref
|
|
if (x == nullptr) {
|
|
// Attribute absent, clear the exception and return false.
|
|
PyErr_Clear();
|
|
return false;
|
|
}
|
|
bool result = _guard_manager->check_nopybind(x);
|
|
Py_DECREF(x);
|
|
return result;
|
|
}
|
|
|
|
GuardDebugInfo check_verbose_nopybind(
|
|
PyObject* obj) override { // borrowed ref
|
|
PyObject* x = PyObject_GenericGetAttr(obj, _attr_name); // new ref
|
|
if (x == nullptr) {
|
|
// Attribute absent, clear the exception and return false.
|
|
PyErr_Clear();
|
|
return GuardDebugInfo(
|
|
false, "getattr failed on source " + get_source(), 0);
|
|
}
|
|
GuardDebugInfo result = _guard_manager->check_verbose_nopybind(x);
|
|
Py_DECREF(x);
|
|
return result;
|
|
}
|
|
|
|
std::string repr() const override {
|
|
// Helpful when printing GuardManager tree structure.
|
|
return "GenericGetAttrGuardAccessor(" +
|
|
py::str(_attr_name).cast<std::string>() + ")";
|
|
}
|
|
|
|
public: // cloning functions
|
|
GenericGetAttrGuardAccessor(
|
|
GuardManager* guard_manager,
|
|
GenericGetAttrGuardAccessor* from)
|
|
: GuardAccessor(guard_manager, from) {
|
|
from->clone_visitor(this);
|
|
}
|
|
|
|
GuardAccessor* clone(
|
|
RootGuardManager* cloned_root,
|
|
const py::function& clone_filter_fn) override {
|
|
return clone_common<GenericGetAttrGuardAccessor>(
|
|
cloned_root, clone_filter_fn);
|
|
}
|
|
|
|
void clone_visitor(GenericGetAttrGuardAccessor* to) {
|
|
to->_attr_name = _attr_name;
|
|
}
|
|
|
|
private:
|
|
// no need of py::object here because the attr_name is already passed on to
|
|
// the base class as accessor_key which is a py::object.
|
|
PyObject* _attr_name{nullptr};
|
|
};
|
|
|
|
/**
|
|
* Represents x.__dict__ accessor.
|
|
*/
|
|
class GetGenericDictGuardAccessor : public GuardAccessor {
|
|
public:
|
|
GetGenericDictGuardAccessor(
|
|
RootGuardManager* root,
|
|
py::str name,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum)
|
|
: GuardAccessor(
|
|
root,
|
|
std::move(name),
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum) {}
|
|
|
|
// NB: Intentional duplication between check_nopybind and
|
|
// check_verbose_nopybind.
|
|
bool check_nopybind(PyObject* obj, bool matches_dict_tag = false)
|
|
override { // borrowed ref
|
|
PyObject* x = PyObject_GenericGetDict(obj, nullptr); // new ref
|
|
if (x == nullptr) {
|
|
// Attribute absent, clear the exception and return false.
|
|
PyErr_Clear();
|
|
return false;
|
|
}
|
|
bool result = _guard_manager->check_nopybind(x);
|
|
Py_DECREF(x);
|
|
return result;
|
|
}
|
|
|
|
GuardDebugInfo check_verbose_nopybind(
|
|
PyObject* obj) override { // borrowed ref
|
|
PyObject* x = PyObject_GenericGetDict(obj, nullptr); // new ref
|
|
if (x == nullptr) {
|
|
// Attribute absent, clear the exception and return false.
|
|
PyErr_Clear();
|
|
return GuardDebugInfo(
|
|
false, "getattr failed on source " + get_source(), 0);
|
|
}
|
|
GuardDebugInfo result = _guard_manager->check_verbose_nopybind(x);
|
|
Py_DECREF(x);
|
|
return result;
|
|
}
|
|
|
|
std::string repr() const override {
|
|
// Helpful when printing GuardManager tree structure.
|
|
return "GetGenericDictGuardAccessor";
|
|
}
|
|
|
|
public: // cloning functions
|
|
GetGenericDictGuardAccessor(
|
|
GuardManager* guard_manager,
|
|
GetGenericDictGuardAccessor* from)
|
|
: GuardAccessor(guard_manager, from) {
|
|
from->clone_visitor(this);
|
|
}
|
|
|
|
GuardAccessor* clone(
|
|
RootGuardManager* cloned_root,
|
|
const py::function& clone_filter_fn) override {
|
|
return clone_common<GetGenericDictGuardAccessor>(
|
|
cloned_root, clone_filter_fn);
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Represents __getitem__ accessor.
|
|
*/
|
|
class GetItemGuardAccessor : public GuardAccessor {
|
|
public:
|
|
GetItemGuardAccessor(
|
|
RootGuardManager* root,
|
|
py::object name,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum)
|
|
: GuardAccessor(
|
|
root,
|
|
name,
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum),
|
|
_attr_name(name.ptr()) {}
|
|
|
|
// NB: Intentional duplication between check_nopybind and
|
|
// check_verbose_nopybind.
|
|
bool check_nopybind(PyObject* obj, bool matches_dict_tag = false)
|
|
override { // borrowed ref
|
|
PyObject* x = PyObject_GetItem(obj, _attr_name); // new ref
|
|
if (x == nullptr) {
|
|
PyErr_Clear();
|
|
return false;
|
|
}
|
|
bool result = _guard_manager->check_nopybind(x);
|
|
Py_DECREF(x);
|
|
return result;
|
|
}
|
|
|
|
GuardDebugInfo check_verbose_nopybind(
|
|
PyObject* obj) override { // borrowed ref
|
|
PyObject* x = PyObject_GetItem(obj, _attr_name); // new ref
|
|
if (x == nullptr) {
|
|
PyErr_Clear();
|
|
return GuardDebugInfo(
|
|
false, std::string("KeyError on ") + get_source(), 0);
|
|
}
|
|
GuardDebugInfo result = _guard_manager->check_verbose_nopybind(x);
|
|
Py_DECREF(x);
|
|
return result;
|
|
}
|
|
|
|
std::string repr() const override {
|
|
return "GetItemGuardAccessor(" + py::str(_attr_name).cast<std::string>() +
|
|
")";
|
|
}
|
|
|
|
public: // cloning functions
|
|
GetItemGuardAccessor(GuardManager* guard_manager, GetItemGuardAccessor* from)
|
|
: GuardAccessor(guard_manager, from) {
|
|
from->clone_visitor(this);
|
|
}
|
|
|
|
GuardAccessor* clone(
|
|
RootGuardManager* cloned_root,
|
|
const py::function& clone_filter_fn) override {
|
|
return clone_common<GetItemGuardAccessor>(cloned_root, clone_filter_fn);
|
|
}
|
|
|
|
void clone_visitor(GetItemGuardAccessor* to) {
|
|
to->_attr_name = _attr_name;
|
|
}
|
|
|
|
private:
|
|
// no need of py::object here because the attr_name is already passed on to
|
|
// the base class as accessor_key which is a py::object.
|
|
PyObject* _attr_name{nullptr};
|
|
};
|
|
|
|
/**
|
|
* Represents f_locals[name] accessor. Special handling for frame locals since
|
|
* we avoid converting it to Python as much as possible.
|
|
* NB: We don't check for name order in frame locals since it is constant
|
|
* across frames corresponding to the same code object.
|
|
*/
|
|
class FrameLocalsGuardAccessor : public GuardAccessor {
|
|
public:
|
|
FrameLocalsGuardAccessor(
|
|
RootGuardManager* root,
|
|
const py::tuple& key,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum)
|
|
: GuardAccessor(
|
|
root,
|
|
key[0],
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum),
|
|
_key(key[0].ptr()),
|
|
_framelocals_idx(key[1].cast<int>()),
|
|
_is_immutable_object(is_immutable_object(example_value)) {}
|
|
|
|
// Run as a result of calling run_root_guard_manager/check_nopybind
|
|
// NB: Intentional duplication between check_nopybind and
|
|
// check_verbose_nopybind.
|
|
bool check_nopybind(
|
|
FrameLocalsMapping* obj,
|
|
bool matches_dict_tag = false) override { // borrowed ref
|
|
if (matches_dict_tag && _is_immutable_object) {
|
|
// immutable object and dict tag matches, we can skip the guard subtree.
|
|
return true;
|
|
}
|
|
|
|
PyObject* x = obj->get(_framelocals_idx);
|
|
if (x == nullptr) {
|
|
PyErr_Clear();
|
|
return false;
|
|
}
|
|
return _guard_manager->check_nopybind(x);
|
|
}
|
|
|
|
// Run as a result of calling check(), e.g. from Python
|
|
// NB: Intentional duplication between check_nopybind and
|
|
// check_verbose_nopybind.
|
|
bool check_nopybind(PyObject* obj, bool matches_dict_tag = false) override {
|
|
if (!PyDict_Check(obj)) {
|
|
// This should not cause guard failure.
|
|
// If this error is encountered, it probably means
|
|
// we did not convert FrameLocalsMapping to dict (using to_dict()).
|
|
throw std::runtime_error(
|
|
"FrameLocalsGuardAccessor check expected dict() input");
|
|
}
|
|
|
|
if (matches_dict_tag && _is_immutable_object) {
|
|
// immutable object and dict tag matches, we can skip the guard subtree.
|
|
return true;
|
|
}
|
|
|
|
PyObject* x = PyDict_GetItem(obj, _key); // borrowed ref
|
|
if (x == nullptr) {
|
|
PyErr_Clear();
|
|
return false;
|
|
}
|
|
bool result = _guard_manager->check_nopybind(x);
|
|
return result;
|
|
}
|
|
|
|
// If we've reached here, it means the guard failed - `obj` should be the
|
|
// FrameLocalsMapping converted into a Python dict and we should
|
|
// behave like DictGetItemGuardAccessor.
|
|
GuardDebugInfo check_verbose_nopybind(
|
|
PyObject* obj) override { // borrowed ref
|
|
if (!PyDict_Check(obj)) {
|
|
PyErr_Clear();
|
|
return GuardDebugInfo(
|
|
false, "FrameLocalsGuardAccessor check expected dict() input", 0);
|
|
}
|
|
PyObject* x = PyDict_GetItem(obj, _key); // borrowed ref
|
|
if (x == nullptr) {
|
|
PyErr_Clear();
|
|
return GuardDebugInfo(
|
|
false, std::string("KeyError on ") + get_source(), 0);
|
|
}
|
|
GuardDebugInfo result = _guard_manager->check_verbose_nopybind(x);
|
|
return result;
|
|
}
|
|
|
|
std::string repr() const override {
|
|
return "FrameLocalsGuardAccessor(key=" +
|
|
py::repr(_key).cast<std::string>() +
|
|
", framelocals_idx=" + std::to_string(_framelocals_idx) + ")";
|
|
}
|
|
|
|
public: // cloning functions
|
|
FrameLocalsGuardAccessor(
|
|
GuardManager* guard_manager,
|
|
FrameLocalsGuardAccessor* from)
|
|
: GuardAccessor(guard_manager, from) {
|
|
from->clone_visitor(this);
|
|
}
|
|
|
|
GuardAccessor* clone(
|
|
RootGuardManager* cloned_root,
|
|
const py::function& clone_filter_fn) override {
|
|
return clone_common<FrameLocalsGuardAccessor>(cloned_root, clone_filter_fn);
|
|
}
|
|
|
|
void clone_visitor(FrameLocalsGuardAccessor* to) {
|
|
to->_key = _key;
|
|
to->_framelocals_idx = _framelocals_idx;
|
|
to->_is_immutable_object = _is_immutable_object;
|
|
}
|
|
|
|
private:
|
|
PyObject* _key{nullptr};
|
|
int _framelocals_idx{-1};
|
|
|
|
// If immutable object and dict tag matches, we can skip the guard subtree and
|
|
// return true.
|
|
bool _is_immutable_object{false};
|
|
};
|
|
|
|
/**
|
|
* Represents dict[name] accessor. Needed since DictGuardManager does not
|
|
* support sorting. We differentiate it from GetItemGuardAccessor because
|
|
* PyDict_GetItem should be faster than PyObject_GetItem.
|
|
*/
|
|
class DictGetItemGuardAccessor : public GuardAccessor {
|
|
public:
|
|
DictGetItemGuardAccessor(
|
|
RootGuardManager* root,
|
|
py::object key,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum)
|
|
: GuardAccessor(
|
|
root,
|
|
key,
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum),
|
|
_key(key.ptr()),
|
|
_is_immutable_object(is_immutable_object(example_value)) {}
|
|
|
|
// NB: Intentional duplication between check_nopybind and
|
|
// check_verbose_nopybind.
|
|
bool check_nopybind(PyObject* obj, bool matches_dict_tag = false) override {
|
|
if (matches_dict_tag && _is_immutable_object &&
|
|
_guard_manager->has_no_accessors()) {
|
|
// immutable object and dict tag matches, we can skip the guard subtree.
|
|
// NB: We only skip the subtree if there are no accessors in the subtree.
|
|
// This is specifically for tensors which are used in symbolic shape C++
|
|
// guards, and therefore have accessors on the tensor GuardManager itself.
|
|
return true;
|
|
}
|
|
|
|
PyObject* x = PyDict_GetItem(obj, _key); // borrowed ref
|
|
if (x == nullptr) {
|
|
PyErr_Clear();
|
|
return false;
|
|
}
|
|
bool result = _guard_manager->check_nopybind(x);
|
|
return result;
|
|
}
|
|
|
|
GuardDebugInfo check_verbose_nopybind(
|
|
PyObject* obj) override { // borrowed ref
|
|
PyObject* x = PyDict_GetItem(obj, _key); // borrowed ref
|
|
if (x == nullptr) {
|
|
PyErr_Clear();
|
|
return GuardDebugInfo(
|
|
false, std::string("KeyError on ") + get_source(), 0);
|
|
}
|
|
GuardDebugInfo result = _guard_manager->check_verbose_nopybind(x);
|
|
return result;
|
|
}
|
|
|
|
std::string repr() const override {
|
|
return "DictGetItemGuardAccessor(" + py::repr(_key).cast<std::string>() +
|
|
")";
|
|
}
|
|
|
|
public: // cloning functions
|
|
DictGetItemGuardAccessor(
|
|
GuardManager* guard_manager,
|
|
DictGetItemGuardAccessor* from)
|
|
: GuardAccessor(guard_manager, from) {
|
|
from->clone_visitor(this);
|
|
}
|
|
|
|
GuardAccessor* clone(
|
|
RootGuardManager* cloned_root,
|
|
const py::function& clone_filter_fn) override {
|
|
return clone_common<DictGetItemGuardAccessor>(cloned_root, clone_filter_fn);
|
|
}
|
|
|
|
void clone_visitor(DictGetItemGuardAccessor* to) {
|
|
to->_key = _key;
|
|
to->_is_immutable_object = _is_immutable_object;
|
|
}
|
|
|
|
private:
|
|
PyObject* _key{nullptr};
|
|
|
|
// If immutable object and dict tag matches, we can skip the guard subtree and
|
|
// return true.
|
|
bool _is_immutable_object{false};
|
|
};
|
|
|
|
/**
|
|
* Represents list[index] accessor. It is faster than generic
|
|
* GetItemGuardAccessor.
|
|
*/
|
|
class ListGetItemGuardAccessor : public GuardAccessor {
|
|
public:
|
|
ListGetItemGuardAccessor(
|
|
RootGuardManager* root,
|
|
const py::object& index,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum)
|
|
: GuardAccessor(
|
|
root,
|
|
index,
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum),
|
|
_index(py::cast<Py_ssize_t>(index)) {}
|
|
|
|
// NB: Intentional duplication between check_nopybind and
|
|
// check_verbose_nopybind.
|
|
bool check_nopybind(PyObject* obj, bool matches_dict_tag = false)
|
|
override { // borrowed ref
|
|
PyObject* x = PyList_GetItem(obj, _index); // borrowed ref
|
|
if (x == nullptr) {
|
|
PyErr_Clear();
|
|
return false;
|
|
}
|
|
bool result = _guard_manager->check_nopybind(x);
|
|
return result;
|
|
}
|
|
|
|
GuardDebugInfo check_verbose_nopybind(
|
|
PyObject* obj) override { // borrowed ref
|
|
PyObject* x = PyList_GetItem(obj, _index); // borrowed ref
|
|
if (x == nullptr) {
|
|
PyErr_Clear();
|
|
return GuardDebugInfo(
|
|
false, std::string("IndexError on ") + get_source(), 0);
|
|
}
|
|
GuardDebugInfo result = _guard_manager->check_verbose_nopybind(x);
|
|
return result;
|
|
}
|
|
|
|
std::string repr() const override {
|
|
return "ListGetItemGuardAccessor(" + std::to_string(_index) + ")";
|
|
}
|
|
|
|
public: // cloning functions
|
|
ListGetItemGuardAccessor(
|
|
GuardManager* guard_manager,
|
|
ListGetItemGuardAccessor* from)
|
|
: GuardAccessor(guard_manager, from) {
|
|
from->clone_visitor(this);
|
|
}
|
|
|
|
GuardAccessor* clone(
|
|
RootGuardManager* cloned_root,
|
|
const py::function& clone_filter_fn) override {
|
|
return clone_common<ListGetItemGuardAccessor>(cloned_root, clone_filter_fn);
|
|
}
|
|
|
|
void clone_visitor(ListGetItemGuardAccessor* to) {
|
|
to->_index = _index;
|
|
}
|
|
|
|
private:
|
|
Py_ssize_t _index{-1};
|
|
};
|
|
|
|
/**
|
|
* Represents tuple[index] accessor. It is faster than generic
|
|
* GetItemGuardAccessor.
|
|
*/
|
|
class TupleGetItemGuardAccessor : public GuardAccessor {
|
|
public:
|
|
TupleGetItemGuardAccessor(
|
|
RootGuardManager* root,
|
|
const py::object& index,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum)
|
|
: GuardAccessor(
|
|
root,
|
|
index,
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum),
|
|
_index(py::cast<Py_ssize_t>(index)) {}
|
|
|
|
// NB: Intentional duplication between check_nopybind and
|
|
// check_verbose_nopybind.
|
|
bool check_nopybind(PyObject* obj, bool matches_dict_tag = false)
|
|
override { // borrowed ref
|
|
PyObject* x = PyTuple_GetItem(obj, _index); // borrowed ref
|
|
if (x == nullptr) {
|
|
PyErr_Clear();
|
|
return false;
|
|
}
|
|
bool result = _guard_manager->check_nopybind(x);
|
|
return result;
|
|
}
|
|
|
|
GuardDebugInfo check_verbose_nopybind(
|
|
PyObject* obj) override { // borrowed ref
|
|
PyObject* x = PyTuple_GetItem(obj, _index); // borrowed ref
|
|
if (x == nullptr) {
|
|
PyErr_Clear();
|
|
return GuardDebugInfo(
|
|
false, std::string("IndexError on ") + get_source(), 0);
|
|
}
|
|
GuardDebugInfo result = _guard_manager->check_verbose_nopybind(x);
|
|
return result;
|
|
}
|
|
|
|
std::string repr() const override {
|
|
return "TupleGetItemGuardAccessor(" + std::to_string(_index) + ")";
|
|
}
|
|
|
|
public: // cloning functions
|
|
TupleGetItemGuardAccessor(
|
|
GuardManager* guard_manager,
|
|
TupleGetItemGuardAccessor* from)
|
|
: GuardAccessor(guard_manager, from) {
|
|
from->clone_visitor(this);
|
|
}
|
|
|
|
GuardAccessor* clone(
|
|
RootGuardManager* cloned_root,
|
|
const py::function& clone_filter_fn) override {
|
|
return clone_common<TupleGetItemGuardAccessor>(
|
|
cloned_root, clone_filter_fn);
|
|
}
|
|
|
|
void clone_visitor(TupleGetItemGuardAccessor* to) {
|
|
to->_index = _index;
|
|
}
|
|
|
|
private:
|
|
Py_ssize_t _index{-1};
|
|
};
|
|
|
|
enum class TensorProperty {
|
|
SIZE = 0,
|
|
STRIDE = 1,
|
|
STORAGE_OFFSET = 2,
|
|
};
|
|
|
|
std::string to_string(TensorProperty prop) {
|
|
switch (prop) {
|
|
case TensorProperty::SIZE:
|
|
return "TensorProperty::SIZE";
|
|
case TensorProperty::STRIDE:
|
|
return "TensorProperty::STRIDE";
|
|
case TensorProperty::STORAGE_OFFSET:
|
|
return "TensorProperty::STORAGE_OFFSET";
|
|
default:
|
|
return "TensorProperty::Unknown";
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Represents tensor.size/shape/storage_offset accessor.
|
|
*/
|
|
template <TensorProperty _prop>
|
|
class TensorPropertyGuardAccessor : public GuardAccessor {
|
|
public:
|
|
TensorPropertyGuardAccessor(
|
|
RootGuardManager* root,
|
|
const py::object& index,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum)
|
|
: GuardAccessor(
|
|
root,
|
|
index,
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum) {
|
|
if (_prop != TensorProperty::STORAGE_OFFSET) {
|
|
_index = py::cast<Py_ssize_t>(index);
|
|
}
|
|
}
|
|
// NB: Intentional duplication between check_nopybind and
|
|
// check_verbose_nopybind.
|
|
bool check_nopybind(PyObject* obj, bool matches_dict_tag = false)
|
|
override { // borrowed ref
|
|
// We need to check here to ensure that `obj` is a tensor
|
|
// Usually we have a TENSOR_MATCH in the parent guard manager
|
|
// but in the case of ``tensor._base` we don't. When the tensor
|
|
// is not a view `tensor._base` is None and we have to check
|
|
// that here.
|
|
if (!THPVariable_CheckExact(obj) && !THPVariable_Check(obj)) {
|
|
return false;
|
|
}
|
|
at::Tensor tensor = THPVariable_Unpack(obj);
|
|
std::optional<int64_t> opt_value;
|
|
if (_prop == TensorProperty::SIZE) {
|
|
if (_index >= tensor.dim()) {
|
|
return false;
|
|
}
|
|
opt_value = tensor.sym_size(_index).maybe_as_int();
|
|
} else if (_prop == TensorProperty::STRIDE) {
|
|
if (_index >= tensor.dim()) {
|
|
return false;
|
|
}
|
|
opt_value = tensor.sym_stride(_index).maybe_as_int();
|
|
} else if (_prop == TensorProperty::STORAGE_OFFSET) {
|
|
opt_value = tensor.sym_storage_offset().maybe_as_int();
|
|
} else {
|
|
throw std::runtime_error("Unknown property");
|
|
}
|
|
|
|
if (!opt_value.has_value()) {
|
|
return false;
|
|
}
|
|
|
|
PyObject* py_value =
|
|
PyLong_FromLongLong(opt_value.value()); // New reference
|
|
bool result = _guard_manager->check_nopybind(py_value);
|
|
Py_DECREF(py_value);
|
|
return result;
|
|
}
|
|
|
|
GuardDebugInfo check_verbose_nopybind(
|
|
PyObject* obj) override { // borrowed ref
|
|
// check that its a tensor
|
|
if (!THPVariable_CheckExact(obj) && !THPVariable_Check(obj)) {
|
|
return GuardDebugInfo(false, "not a tensor" + get_source(), 0);
|
|
}
|
|
at::Tensor tensor = THPVariable_Unpack(obj);
|
|
std::optional<int64_t> opt_value;
|
|
if (_prop == TensorProperty::SIZE) {
|
|
if (_index >= tensor.dim()) {
|
|
return GuardDebugInfo(false, "tensor has too few dimensions", 0);
|
|
}
|
|
opt_value = tensor.sym_size(_index).maybe_as_int();
|
|
} else if (_prop == TensorProperty::STRIDE) {
|
|
if (_index >= tensor.dim()) {
|
|
return GuardDebugInfo(false, "tensor has too few dimensions", 0);
|
|
}
|
|
opt_value = tensor.sym_stride(_index).maybe_as_int();
|
|
} else if (_prop == TensorProperty::STORAGE_OFFSET) {
|
|
opt_value = tensor.sym_storage_offset().maybe_as_int();
|
|
} else {
|
|
return GuardDebugInfo(false, "unknown property", 0);
|
|
}
|
|
|
|
if (!opt_value.has_value()) {
|
|
return GuardDebugInfo(false, "symbolic values found", 0);
|
|
}
|
|
|
|
PyObject* py_value =
|
|
PyLong_FromLongLong(opt_value.value()); // New reference
|
|
GuardDebugInfo result = _guard_manager->check_verbose_nopybind(py_value);
|
|
Py_DECREF(py_value);
|
|
return result;
|
|
}
|
|
|
|
std::string repr() const override {
|
|
// Helpful when printing GuardManager tree structure.
|
|
return "TensorPropertyGuardAccessor<" + to_string(_prop) + +">(" +
|
|
std::to_string(_index) + ")";
|
|
}
|
|
|
|
public: // cloning functions
|
|
TensorPropertyGuardAccessor(
|
|
GuardManager* guard_manager,
|
|
TensorPropertyGuardAccessor<_prop>* from)
|
|
: GuardAccessor(guard_manager, from) {
|
|
from->clone_visitor(this);
|
|
}
|
|
|
|
GuardAccessor* clone(
|
|
RootGuardManager* cloned_root,
|
|
const py::function& clone_filter_fn) override {
|
|
return clone_common<TensorPropertyGuardAccessor<_prop>>(
|
|
cloned_root, clone_filter_fn);
|
|
}
|
|
|
|
void clone_visitor(TensorPropertyGuardAccessor<_prop>* to) {
|
|
to->_index = _index;
|
|
}
|
|
|
|
private:
|
|
Py_ssize_t _index{-1};
|
|
};
|
|
|
|
/**
|
|
* Indexed Guard Accessor that retrieves a value from the child
|
|
* and sends a (index, source) to the parent.
|
|
*/
|
|
class IndexedGuardAccessor : public GuardAccessor {
|
|
public:
|
|
IndexedGuardAccessor(
|
|
RootGuardManager* root,
|
|
py::int_ index,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum)
|
|
: GuardAccessor(
|
|
root,
|
|
index,
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum),
|
|
_index(index) {}
|
|
// NB: Intentional duplication between check_nopybind and
|
|
// check_verbose_nopybind.
|
|
bool check_nopybind(PyObject* obj, bool matches_dict_tag = false)
|
|
override { // borrowed ref
|
|
PyObject* tuple = PyTuple_Pack(2, _index.ptr(), obj); // New reference
|
|
bool result = _guard_manager->check_nopybind(tuple);
|
|
Py_DECREF(tuple);
|
|
return result;
|
|
}
|
|
|
|
GuardDebugInfo check_verbose_nopybind(
|
|
PyObject* obj) override { // borrowed ref
|
|
PyObject* tuple = PyTuple_Pack(2, _index.ptr(), obj); // New reference
|
|
GuardDebugInfo result = _guard_manager->check_verbose_nopybind(tuple);
|
|
Py_DECREF(tuple);
|
|
return result;
|
|
}
|
|
|
|
std::string repr() const override {
|
|
// Helpful when printing GuardManager tree structure.
|
|
return "IndexedGuardAccesor(" +
|
|
std::to_string(py::cast<Py_ssize_t>(_index)) + ")";
|
|
}
|
|
|
|
public: // cloning functions
|
|
IndexedGuardAccessor(GuardManager* guard_manager, IndexedGuardAccessor* from)
|
|
: GuardAccessor(guard_manager, from) {
|
|
from->clone_visitor(this);
|
|
}
|
|
|
|
GuardAccessor* clone(
|
|
RootGuardManager* cloned_root,
|
|
const py::function& clone_filter_fn) override {
|
|
return clone_common<IndexedGuardAccessor>(cloned_root, clone_filter_fn);
|
|
}
|
|
|
|
void clone_visitor(IndexedGuardAccessor* to) {
|
|
to->_index = _index;
|
|
}
|
|
|
|
private:
|
|
py::int_ _index{-1};
|
|
};
|
|
|
|
/**
|
|
* Represents tensor.grad accessor.
|
|
*/
|
|
class GradGuardAccessor : public GuardAccessor {
|
|
public:
|
|
GradGuardAccessor(
|
|
RootGuardManager* root,
|
|
py::str name,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum)
|
|
: GuardAccessor(
|
|
root,
|
|
std::move(name),
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum) {}
|
|
|
|
// NB: Intentional duplication between check_nopybind and
|
|
// check_verbose_nopybind.
|
|
bool check_nopybind(PyObject* obj, bool matches_dict_tag = false)
|
|
override { // borrowed ref
|
|
// check that its a tensor
|
|
if (!THPVariable_CheckExact(obj) && !THPVariable_Check(obj)) {
|
|
return false;
|
|
}
|
|
PyObject* grad =
|
|
THPVariable_Wrap(THPVariable_Unpack(obj).grad()); // New reference
|
|
bool result = _guard_manager->check_nopybind(grad);
|
|
// For undefined tensor, THPVariable_Wrap returns Py_RETURN_NONE. So, no
|
|
// need of Py_XDECREF.
|
|
Py_DECREF(grad);
|
|
return result;
|
|
}
|
|
|
|
GuardDebugInfo check_verbose_nopybind(
|
|
PyObject* obj) override { // borrowed ref
|
|
// check that its a tensor
|
|
if (!THPVariable_CheckExact(obj) && !THPVariable_Check(obj)) {
|
|
return GuardDebugInfo(
|
|
false, "not a tensor - grad field is accessed " + get_source(), 0);
|
|
}
|
|
PyObject* grad =
|
|
THPVariable_Wrap(THPVariable_Unpack(obj).grad()); // New reference
|
|
GuardDebugInfo result = _guard_manager->check_verbose_nopybind(grad);
|
|
// For undefined tensor, THPVariable_Wrap returns Py_RETURN_NONE. So, no
|
|
// need of Py_XDECREF.
|
|
Py_DECREF(grad);
|
|
return result;
|
|
}
|
|
|
|
std::string repr() const override {
|
|
// Helpful when printing GuardManager tree structure.
|
|
return "GradGuardAccessor(grad)";
|
|
}
|
|
|
|
public: // cloning functions
|
|
GradGuardAccessor(GuardManager* guard_manager, GradGuardAccessor* from)
|
|
: GuardAccessor(guard_manager, from) {
|
|
from->clone_visitor(this);
|
|
}
|
|
|
|
GuardAccessor* clone(
|
|
RootGuardManager* cloned_root,
|
|
const py::function& clone_filter_fn) override {
|
|
return clone_common<GradGuardAccessor>(cloned_root, clone_filter_fn);
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Represents func.__defaults__ accessor.
|
|
*/
|
|
class FuncDefaultsGuardAccessor : public GuardAccessor {
|
|
public:
|
|
FuncDefaultsGuardAccessor(
|
|
RootGuardManager* root,
|
|
py::object name,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum)
|
|
: GuardAccessor(
|
|
root,
|
|
std::move(name),
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum) {}
|
|
|
|
// NB: Intentional duplication between check_nopybind and
|
|
// check_verbose_nopybind.
|
|
bool check_nopybind(PyObject* obj, bool matches_dict_tag = false)
|
|
override { // borrowed ref
|
|
PyObject* func = obj;
|
|
if (PyMethod_Check(obj)) {
|
|
func = PyMethod_GET_FUNCTION(obj); // borrowed ref
|
|
} else if (PyInstanceMethod_Check(obj)) {
|
|
func = PyInstanceMethod_GET_FUNCTION(obj); // borrowed ref
|
|
}
|
|
PyObject* x = PyFunction_GetDefaults(func); // borrowed ref
|
|
if (x == nullptr) {
|
|
PyErr_Clear();
|
|
return false;
|
|
}
|
|
return _guard_manager->check_nopybind(x);
|
|
}
|
|
|
|
GuardDebugInfo check_verbose_nopybind(
|
|
PyObject* obj) override { // borrowed ref
|
|
PyObject* func = obj;
|
|
if (PyMethod_Check(obj)) {
|
|
func = PyMethod_GET_FUNCTION(obj); // borrowed ref
|
|
} else if (PyInstanceMethod_Check(obj)) {
|
|
func = PyInstanceMethod_GET_FUNCTION(obj); // borrowed ref
|
|
}
|
|
PyObject* x = PyFunction_GetDefaults(func);
|
|
if (x == nullptr) {
|
|
PyErr_Clear();
|
|
return GuardDebugInfo(
|
|
false,
|
|
std::string(repr() + ": Not a function on ") + get_source(),
|
|
0);
|
|
}
|
|
|
|
return _guard_manager->check_verbose_nopybind(x);
|
|
}
|
|
|
|
std::string repr() const override {
|
|
return "FuncDefaultsGuardAccessor";
|
|
}
|
|
|
|
public: // cloning functions
|
|
FuncDefaultsGuardAccessor(
|
|
GuardManager* guard_manager,
|
|
FuncDefaultsGuardAccessor* from)
|
|
: GuardAccessor(guard_manager, from) {
|
|
from->clone_visitor(this);
|
|
}
|
|
GuardAccessor* clone(
|
|
RootGuardManager* cloned_root,
|
|
const py::function& clone_filter_fn) override {
|
|
return clone_common<FuncDefaultsGuardAccessor>(
|
|
cloned_root, clone_filter_fn);
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Represents func.__kwdefaults__ accessor.
|
|
*/
|
|
class FuncKwDefaultsGuardAccessor : public GuardAccessor {
|
|
public:
|
|
FuncKwDefaultsGuardAccessor(
|
|
RootGuardManager* root,
|
|
py::object name,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum)
|
|
: GuardAccessor(
|
|
root,
|
|
std::move(name),
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum) {}
|
|
|
|
// NB: Intentional duplication between check_nopybind and
|
|
// check_verbose_nopybind.
|
|
bool check_nopybind(PyObject* obj, bool matches_dict_tag = false)
|
|
override { // borrowed ref
|
|
PyObject* func = obj;
|
|
if (PyMethod_Check(obj)) {
|
|
func = PyMethod_GET_FUNCTION(obj); // borrowed ref
|
|
} else if (PyInstanceMethod_Check(obj)) {
|
|
func = PyInstanceMethod_GET_FUNCTION(obj); // borrowed ref
|
|
}
|
|
PyObject* x = PyFunction_GetKwDefaults(func); // borrowed ref
|
|
if (x == nullptr) {
|
|
PyErr_Clear();
|
|
return false;
|
|
}
|
|
return _guard_manager->check_nopybind(x);
|
|
}
|
|
|
|
GuardDebugInfo check_verbose_nopybind(
|
|
PyObject* obj) override { // borrowed ref
|
|
PyObject* func = obj;
|
|
if (PyMethod_Check(obj)) {
|
|
func = PyMethod_GET_FUNCTION(obj); // borrowed ref
|
|
} else if (PyInstanceMethod_Check(obj)) {
|
|
func = PyInstanceMethod_GET_FUNCTION(obj); // borrowed ref
|
|
}
|
|
PyObject* x = PyFunction_GetKwDefaults(func);
|
|
if (x == nullptr) {
|
|
PyErr_Clear();
|
|
return GuardDebugInfo(
|
|
false,
|
|
std::string(repr() + ": Not a function on ") + get_source(),
|
|
0);
|
|
}
|
|
|
|
return _guard_manager->check_verbose_nopybind(x);
|
|
}
|
|
|
|
std::string repr() const override {
|
|
return "FuncKwDefaultsGuardAccessor";
|
|
}
|
|
|
|
public: // cloning functions
|
|
FuncKwDefaultsGuardAccessor(
|
|
GuardManager* guard_manager,
|
|
FuncKwDefaultsGuardAccessor* from)
|
|
: GuardAccessor(guard_manager, from) {
|
|
from->clone_visitor(this);
|
|
}
|
|
|
|
GuardAccessor* clone(
|
|
RootGuardManager* cloned_root,
|
|
const py::function& clone_filter_fn) override {
|
|
return clone_common<FuncKwDefaultsGuardAccessor>(
|
|
cloned_root, clone_filter_fn);
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Represents f_globals accessor. This sits as a child accessor of the
|
|
* RootGuardManager.
|
|
*/
|
|
class GlobalsGuardAccessor : public GuardAccessor {
|
|
public:
|
|
GlobalsGuardAccessor(
|
|
RootGuardManager* root,
|
|
py::dict globals_dict,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum)
|
|
: GuardAccessor(
|
|
root,
|
|
globals_dict,
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum),
|
|
_globals_dict(globals_dict.ptr()) {}
|
|
|
|
// NB: Intentional duplication between check_nopybind and
|
|
// check_verbose_nopybind.
|
|
bool check_nopybind(PyObject* obj, bool matches_dict_tag = false)
|
|
override { // borrowed ref
|
|
// Ignore the obj arg. This is required to satisfy the function signature.
|
|
// Just pass on the globals dict to the child manager.
|
|
return _guard_manager->check_nopybind(_globals_dict);
|
|
}
|
|
|
|
GuardDebugInfo check_verbose_nopybind(
|
|
PyObject* obj) override { // borrowed ref
|
|
// Ignore the obj arg. This is required to satisfy the function signature.
|
|
// Just pass on the globals dict to the child manager.
|
|
return _guard_manager->check_verbose_nopybind(_globals_dict);
|
|
}
|
|
|
|
std::string repr() const override {
|
|
return "GlobalsGuardAccessor";
|
|
}
|
|
|
|
public: // cloning functions
|
|
GlobalsGuardAccessor(GuardManager* guard_manager, GlobalsGuardAccessor* from)
|
|
: GuardAccessor(guard_manager, from) {
|
|
from->clone_visitor(this);
|
|
}
|
|
|
|
GuardAccessor* clone(
|
|
RootGuardManager* cloned_root,
|
|
const py::function& clone_filter_fn) override {
|
|
return clone_common<GlobalsGuardAccessor>(cloned_root, clone_filter_fn);
|
|
}
|
|
|
|
void clone_visitor(GlobalsGuardAccessor* to) {
|
|
to->_globals_dict = _globals_dict;
|
|
}
|
|
|
|
private:
|
|
// no need of py::object here because the globals_dict is already passed on to
|
|
// the base class as accessor_key which is a py::object.
|
|
PyObject* _globals_dict{nullptr};
|
|
};
|
|
|
|
/**
|
|
* Represent type(...) accessor.
|
|
*/
|
|
class TypeGuardAccessor : public GuardAccessor {
|
|
public:
|
|
// name = __type_accessor__, a unique string used as attribute name.
|
|
TypeGuardAccessor(
|
|
RootGuardManager* root,
|
|
py::str name,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum)
|
|
: GuardAccessor(
|
|
root,
|
|
std::move(name),
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum) {}
|
|
|
|
// NB: Intentional duplication between check_nopybind and
|
|
// check_verbose_nopybind.
|
|
bool check_nopybind(PyObject* obj, bool matches_dict_tag = false)
|
|
override { // borrowed ref
|
|
PyObject* x = (PyObject*)Py_TYPE(obj); // borrowed ref
|
|
return _guard_manager->check_nopybind(x);
|
|
}
|
|
|
|
GuardDebugInfo check_verbose_nopybind(
|
|
PyObject* obj) override { // borrowed ref
|
|
PyObject* x = (PyObject*)Py_TYPE(obj); // borrowed ref
|
|
return _guard_manager->check_verbose_nopybind(x);
|
|
}
|
|
|
|
std::string repr() const override {
|
|
return "TypeGuardAccessor";
|
|
}
|
|
|
|
public: // cloning functions
|
|
TypeGuardAccessor(GuardManager* guard_manager, TypeGuardAccessor* from)
|
|
: GuardAccessor(guard_manager, from) {
|
|
from->clone_visitor(this);
|
|
}
|
|
|
|
GuardAccessor* clone(
|
|
RootGuardManager* cloned_root,
|
|
const py::function& clone_filter_fn) override {
|
|
return clone_common<TypeGuardAccessor>(cloned_root, clone_filter_fn);
|
|
}
|
|
|
|
void clone_visitor(TypeGuardAccessor* to) {}
|
|
};
|
|
|
|
/**
|
|
* Getitem tuple_iterator accessor.
|
|
*/
|
|
class TupleIteratorGetItemAccessor : public GuardAccessor {
|
|
public:
|
|
TupleIteratorGetItemAccessor(
|
|
RootGuardManager* root,
|
|
py::object index,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum)
|
|
: GuardAccessor(
|
|
root,
|
|
index,
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum),
|
|
_index(py::cast<Py_ssize_t>(std::move(index))) {}
|
|
|
|
// NB: Intentional duplication between check_nopybind and
|
|
// check_verbose_nopybind.
|
|
bool check_nopybind(PyObject* obj, bool matches_dict_tag = false)
|
|
override { // borrowed ref
|
|
_PyTupleIterObject* it = (_PyTupleIterObject*)obj;
|
|
PyObject* x =
|
|
PyTuple_GET_ITEM(it->it_seq, it->it_index + _index); // borrowed ref
|
|
if (x == nullptr) {
|
|
// Out of range.
|
|
PyErr_Clear();
|
|
return false;
|
|
}
|
|
bool result = _guard_manager->check_nopybind(x);
|
|
return result;
|
|
}
|
|
|
|
GuardDebugInfo check_verbose_nopybind(
|
|
PyObject* obj) override { // borrowed ref
|
|
_PyTupleIterObject* it = (_PyTupleIterObject*)obj;
|
|
PyObject* x =
|
|
PyTuple_GET_ITEM(it->it_seq, it->it_index + _index); // borrowed ref
|
|
if (x == nullptr) {
|
|
// Out of range.
|
|
PyErr_Clear();
|
|
return GuardDebugInfo(false, std::string("IndexError ") + repr(), 0);
|
|
}
|
|
GuardDebugInfo result = _guard_manager->check_verbose_nopybind(x);
|
|
return result;
|
|
}
|
|
|
|
std::string repr() const override {
|
|
return "TupleIteratorGetItemAccessor(" + std::to_string(_index) + ")";
|
|
}
|
|
|
|
public: // cloning functions
|
|
TupleIteratorGetItemAccessor(
|
|
GuardManager* guard_manager,
|
|
TupleIteratorGetItemAccessor* from)
|
|
: GuardAccessor(guard_manager, from) {
|
|
from->clone_visitor(this);
|
|
}
|
|
|
|
GuardAccessor* clone(
|
|
RootGuardManager* cloned_root,
|
|
const py::function& clone_filter_fn) override {
|
|
return clone_common<TupleIteratorGetItemAccessor>(
|
|
cloned_root, clone_filter_fn);
|
|
}
|
|
|
|
void clone_visitor(TupleIteratorGetItemAccessor* to) {
|
|
to->_index = _index;
|
|
}
|
|
|
|
private:
|
|
Py_ssize_t _index{-1};
|
|
};
|
|
|
|
/**
|
|
* GlobalWeakRef accessor. Dynamo can insert a weakref object into the frame
|
|
* globals. This accessor reads the globals and then calls the weakref object
|
|
* to get the underlying object. This is a child of GlobalsGuardAccessor.
|
|
* Therefore, we will get the globals dict while calling check_nopybind.
|
|
*/
|
|
class GlobalWeakRefGuardAccessor : public GuardAccessor {
|
|
public:
|
|
GlobalWeakRefGuardAccessor(
|
|
RootGuardManager* root,
|
|
py::object global_name,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum)
|
|
: GuardAccessor(
|
|
root,
|
|
global_name,
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum),
|
|
_global_name(global_name.ptr()) {}
|
|
|
|
// NB: Intentional duplication between check_nopybind and
|
|
// check_verbose_nopybind.
|
|
bool check_nopybind(PyObject* obj, bool matches_dict_tag = false)
|
|
override { // borrowed ref
|
|
// obj is globals dict because GlobalWeakRefGuardAccessor has to be a
|
|
// child of GlobalsGuardAccessor.
|
|
PyObject* weakref = PyDict_GetItem(obj, _global_name); // borrowed ref
|
|
if (weakref == nullptr) {
|
|
// The weakref is not in the globals dict.
|
|
PyErr_Clear();
|
|
return false;
|
|
}
|
|
|
|
if (!PyWeakref_Check(weakref)) {
|
|
return false;
|
|
}
|
|
|
|
PyObject* x = nullptr;
|
|
if (PyWeakref_GetRef(weakref, &x) == -1) { // strong reference
|
|
// error when attempting to call ref
|
|
PyErr_Clear();
|
|
return false;
|
|
}
|
|
if (x == nullptr) {
|
|
// weakref is dead
|
|
x = Py_NewRef(Py_None);
|
|
}
|
|
bool result = _guard_manager->check_nopybind(x);
|
|
Py_DECREF(x);
|
|
return result;
|
|
}
|
|
|
|
GuardDebugInfo check_verbose_nopybind(
|
|
PyObject* obj) override { // borrowed ref
|
|
// obj is globals dict because GlobalWeakRefGuardAccessor has to be a
|
|
// child of GlobalsGuardAccessor.
|
|
PyObject* weakref = PyDict_GetItem(obj, _global_name); // borrowed ref
|
|
if (weakref == nullptr) {
|
|
// The weakref is not in the globals dict.
|
|
PyErr_Clear();
|
|
return GuardDebugInfo(
|
|
false, std::string("KeyError on ") + get_source(), 0);
|
|
}
|
|
|
|
if (!PyWeakref_Check(weakref)) {
|
|
return GuardDebugInfo(
|
|
false, std::string("Not a weakref ") + get_source(), 0);
|
|
}
|
|
|
|
PyObject* x = nullptr;
|
|
if (PyWeakref_GetRef(weakref, &x) == -1) { // strong reference
|
|
// error when attempting to call ref
|
|
PyErr_Clear();
|
|
return GuardDebugInfo(
|
|
false, std::string("Weakref_GetRef failed ") + get_source(), 0);
|
|
}
|
|
if (x == nullptr) {
|
|
// weakref is dead
|
|
x = Py_NewRef(Py_None);
|
|
}
|
|
auto result = _guard_manager->check_verbose_nopybind(x);
|
|
Py_DECREF(x);
|
|
return result;
|
|
}
|
|
|
|
std::string repr() const override {
|
|
return "GlobalWeakRefGuardAccessor(" +
|
|
py::str(_global_name).cast<std::string>() + ")";
|
|
}
|
|
|
|
public: // cloning functions
|
|
GlobalWeakRefGuardAccessor(
|
|
GuardManager* guard_manager,
|
|
GlobalWeakRefGuardAccessor* from)
|
|
: GuardAccessor(guard_manager, from) {
|
|
from->clone_visitor(this);
|
|
}
|
|
GuardAccessor* clone(
|
|
RootGuardManager* cloned_root,
|
|
const py::function& clone_filter_fn) override {
|
|
return clone_common<GlobalWeakRefGuardAccessor>(
|
|
cloned_root, clone_filter_fn);
|
|
}
|
|
|
|
void clone_visitor(GlobalWeakRefGuardAccessor* to) {
|
|
to->_global_name = _global_name;
|
|
}
|
|
|
|
private:
|
|
PyObject* _global_name{nullptr};
|
|
};
|
|
|
|
/**
|
|
* Implements weakref call - x_weak()
|
|
*/
|
|
class WeakRefCallGuardAccessor : public GuardAccessor {
|
|
public:
|
|
WeakRefCallGuardAccessor(
|
|
RootGuardManager* root,
|
|
py::str name,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum)
|
|
: GuardAccessor(
|
|
root,
|
|
std::move(name),
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum) {}
|
|
|
|
// NB: Intentional duplication between check_nopybind and
|
|
// check_verbose_nopybind.
|
|
bool check_nopybind(PyObject* obj, bool matches_dict_tag = false)
|
|
override { // borrowed ref
|
|
if (!PyWeakref_Check(obj)) {
|
|
return false;
|
|
}
|
|
|
|
PyObject* x = nullptr;
|
|
if (PyWeakref_GetRef(obj, &x) == -1) { // strong reference
|
|
// error when attempting to call ref
|
|
PyErr_Clear();
|
|
return false;
|
|
}
|
|
if (x == nullptr) {
|
|
// weakref is dead
|
|
x = Py_NewRef(Py_None);
|
|
}
|
|
bool result = _guard_manager->check_nopybind(x);
|
|
Py_DECREF(x);
|
|
return result;
|
|
}
|
|
|
|
GuardDebugInfo check_verbose_nopybind(
|
|
PyObject* obj) override { // borrowed ref
|
|
if (!PyWeakref_Check(obj)) {
|
|
return GuardDebugInfo(
|
|
false, std::string("Not a weakref obj ") + get_source(), 0);
|
|
}
|
|
|
|
PyObject* x = nullptr;
|
|
if (PyWeakref_GetRef(obj, &x) == -1) { // strong reference
|
|
// error when attempting to call ref
|
|
PyErr_Clear();
|
|
return GuardDebugInfo(
|
|
false, std::string("Weakref_GetRef failed ") + get_source(), 0);
|
|
}
|
|
if (x == nullptr) {
|
|
// weakref is dead
|
|
x = Py_NewRef(Py_None);
|
|
}
|
|
auto result = _guard_manager->check_verbose_nopybind(x);
|
|
Py_DECREF(x);
|
|
return result;
|
|
}
|
|
|
|
std::string repr() const override {
|
|
return "WeakRefCallGuardAccessor()";
|
|
}
|
|
|
|
public: // cloning functions
|
|
WeakRefCallGuardAccessor(
|
|
GuardManager* guard_manager,
|
|
WeakRefCallGuardAccessor* from)
|
|
: GuardAccessor(guard_manager, from) {
|
|
from->clone_visitor(this);
|
|
}
|
|
|
|
GuardAccessor* clone(
|
|
RootGuardManager* cloned_root,
|
|
const py::function& clone_filter_fn) override {
|
|
return clone_common<WeakRefCallGuardAccessor>(cloned_root, clone_filter_fn);
|
|
}
|
|
|
|
void clone_visitor(WeakRefCallGuardAccessor* to) {}
|
|
};
|
|
|
|
/**
|
|
* Implements function call no args - e.g, torch.cuda.current_device()
|
|
*/
|
|
class CallFunctionNoArgsGuardAccessor : public GuardAccessor {
|
|
public:
|
|
CallFunctionNoArgsGuardAccessor(
|
|
RootGuardManager* root,
|
|
py::str name,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum)
|
|
: GuardAccessor(
|
|
root,
|
|
std::move(name),
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum) {}
|
|
|
|
// NB: Intentional duplication between check_nopybind and
|
|
// check_verbose_nopybind.
|
|
bool check_nopybind(PyObject* obj, bool matches_dict_tag = false)
|
|
override { // borrowed ref
|
|
if (!PyCallable_Check(obj)) {
|
|
return false;
|
|
}
|
|
|
|
PyObject* x = PyObject_CallNoArgs(obj);
|
|
if (x == nullptr) {
|
|
// Call failed, clear the exception and return false.
|
|
PyErr_Clear();
|
|
return false;
|
|
}
|
|
|
|
bool result = _guard_manager->check_nopybind(x);
|
|
Py_DECREF(x);
|
|
return result;
|
|
}
|
|
|
|
GuardDebugInfo check_verbose_nopybind(
|
|
PyObject* obj) override { // borrowed ref
|
|
if (!PyCallable_Check(obj)) {
|
|
return GuardDebugInfo(
|
|
false, std::string("Not a callable obj ") + get_source(), 0);
|
|
}
|
|
|
|
PyObject* x = PyObject_CallNoArgs(obj);
|
|
if (x == nullptr) {
|
|
// Call failed, clear the exception and return debug info.
|
|
std::string exc_message = get_exception_message();
|
|
PyErr_Clear();
|
|
return GuardDebugInfo(false, exc_message, 0);
|
|
}
|
|
|
|
GuardDebugInfo result = _guard_manager->check_verbose_nopybind(x);
|
|
Py_DECREF(x);
|
|
return result;
|
|
}
|
|
|
|
std::string repr() const override {
|
|
return "CallFunctionNoArgsGuardAccessor()";
|
|
}
|
|
|
|
public: // cloning functions
|
|
CallFunctionNoArgsGuardAccessor(
|
|
GuardManager* guard_manager,
|
|
CallFunctionNoArgsGuardAccessor* from)
|
|
: GuardAccessor(guard_manager, from) {
|
|
from->clone_visitor(this);
|
|
}
|
|
|
|
GuardAccessor* clone(
|
|
RootGuardManager* cloned_root,
|
|
const py::function& clone_filter_fn) override {
|
|
return clone_common<CallFunctionNoArgsGuardAccessor>(
|
|
cloned_root, clone_filter_fn);
|
|
}
|
|
|
|
void clone_visitor(CallFunctionNoArgsGuardAccessor* to) {}
|
|
};
|
|
|
|
/**
|
|
* Similar to PythonLambdaLeafGuard, this class is a way to allow developers to
|
|
* supply accessor as a python function. This is useful for from_numpy source.
|
|
*/
|
|
class PythonLambdaGuardAccessor : public GuardAccessor {
|
|
public:
|
|
PythonLambdaGuardAccessor(
|
|
RootGuardManager* root,
|
|
py::function accessor_fn,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum)
|
|
: GuardAccessor(
|
|
root,
|
|
accessor_fn,
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum),
|
|
_accessor_fn(std::move(accessor_fn)) {}
|
|
|
|
// NB: Intentional duplication between check_nopybind and
|
|
// check_verbose_nopybind.
|
|
bool check_nopybind(PyObject* obj, bool matches_dict_tag = false)
|
|
override { // borrowed ref
|
|
PyObject* x = PyObject_CallOneArg(_accessor_fn.ptr(), obj); // new ref
|
|
if (x == nullptr) {
|
|
// The accessor function failed.
|
|
PyErr_Clear();
|
|
return false;
|
|
}
|
|
bool result = _guard_manager->check_nopybind(x);
|
|
Py_DECREF(x);
|
|
return result;
|
|
}
|
|
|
|
GuardDebugInfo check_verbose_nopybind(
|
|
PyObject* obj) override { // borrowed ref
|
|
PyObject* x = PyObject_CallOneArg(_accessor_fn.ptr(), obj); // new ref
|
|
if (x == nullptr) {
|
|
// The accessor function failed.
|
|
std::string exc_message = get_exception_message();
|
|
PyErr_Clear();
|
|
return GuardDebugInfo(false, exc_message, 0);
|
|
}
|
|
GuardDebugInfo result = _guard_manager->check_verbose_nopybind(x);
|
|
Py_DECREF(x);
|
|
return result;
|
|
}
|
|
|
|
std::string repr() const override {
|
|
return "PythonLambdaGuardAccessor";
|
|
}
|
|
|
|
public: // cloning functions
|
|
PythonLambdaGuardAccessor(
|
|
GuardManager* guard_manager,
|
|
PythonLambdaGuardAccessor* from)
|
|
: GuardAccessor(guard_manager, from) {
|
|
from->clone_visitor(this);
|
|
}
|
|
|
|
GuardAccessor* clone(
|
|
RootGuardManager* cloned_root,
|
|
const py::function& clone_filter_fn) override {
|
|
return clone_common<PythonLambdaGuardAccessor>(
|
|
cloned_root, clone_filter_fn);
|
|
}
|
|
|
|
void clone_visitor(PythonLambdaGuardAccessor* to) {
|
|
to->_accessor_fn = _accessor_fn;
|
|
}
|
|
|
|
private:
|
|
py::object _accessor_fn;
|
|
};
|
|
|
|
void install_object_aliasing_guard(
|
|
GuardManager* x,
|
|
GuardManager* y,
|
|
py::object verbose_code_parts) {
|
|
// Adds tensor X is tensor Y guard. This is a an example of relational guard.
|
|
// There is one guard object that is shared between two guard managers.
|
|
std::shared_ptr<RelationalGuard> guard =
|
|
std::make_shared<OBJECT_ALIASING>(std::move(verbose_code_parts));
|
|
|
|
// Register the resetter on the root guard manager, so that it can reset
|
|
// the newly added relational guard when the guard eval fails.
|
|
x->get_root()->add_relational_guard_resetter(guard);
|
|
|
|
// In case the guard is a DictGuardManager, OBJECT_ALIASING guard is a
|
|
// permitted guard.
|
|
x->add_permitted_leaf_guard(guard);
|
|
y->add_permitted_leaf_guard(guard);
|
|
}
|
|
|
|
void install_no_tensor_aliasing_guard(
|
|
const py::list& guard_managers,
|
|
const py::list& tensor_names,
|
|
py::object verbose_code_parts) {
|
|
// Adds a guard that checks none of tensors alias. This is a an example of
|
|
// relational guard. There is one guard object that is shared between multiple
|
|
// guard managers.
|
|
std::shared_ptr<RelationalGuard> guard = std::make_shared<NO_TENSOR_ALIASING>(
|
|
tensor_names, std::move(verbose_code_parts));
|
|
|
|
// Register the resetter on the root guard manager, so that it can reset
|
|
// the newly added relational guard when the guard eval fails.
|
|
py::cast<GuardManager*>(guard_managers[0])
|
|
->get_root()
|
|
->add_relational_guard_resetter(guard);
|
|
for (const auto& guard_manager : guard_managers) {
|
|
py::cast<GuardManager*>(guard_manager)->add_leaf_guard(guard);
|
|
}
|
|
}
|
|
|
|
void install_symbolic_shape_guard(
|
|
const py::list& guard_managers,
|
|
py::int_ nargs_int,
|
|
py::int_ nargs_float,
|
|
py::int_ py_addr,
|
|
py::object py_addr_keep_alive,
|
|
py::object verbose_code_parts) {
|
|
// Adds a guard that checks symbolic shapes. This is a an example of
|
|
// relational guard. There is one guard object that is shared between
|
|
// multiple guard managers.
|
|
std::shared_ptr<RelationalGuard> guard =
|
|
std::make_shared<SYMBOLIC_SHAPE_GUARD>(
|
|
std::move(nargs_int),
|
|
std::move(nargs_float),
|
|
std::move(py_addr),
|
|
std::move(py_addr_keep_alive),
|
|
std::move(verbose_code_parts));
|
|
|
|
// Register the resetter on the root guard manager, so that it can reset
|
|
// the newly added relational guard when the guard eval fails.
|
|
py::cast<GuardManager*>(guard_managers[0])
|
|
->get_root()
|
|
->add_relational_guard_resetter(guard);
|
|
for (const auto& guard_manager : guard_managers) {
|
|
py::cast<GuardManager*>(guard_manager)->add_leaf_guard(guard);
|
|
}
|
|
}
|
|
|
|
void install_storage_overlapping_guard_with_checker(
|
|
const std::shared_ptr<StorageOverlapChecker>& checker,
|
|
const py::list& guard_managers,
|
|
const py::object& verbose_code_parts,
|
|
bool overlapping) {
|
|
if (guard_managers.empty()) {
|
|
// If there are no GuardManagers, there's no need to create a
|
|
// STORAGE_OVERLAPPING guard.
|
|
return;
|
|
}
|
|
|
|
std::shared_ptr<RelationalGuard> guard =
|
|
std::make_shared<STORAGE_OVERLAPPING>(
|
|
overlapping, checker, verbose_code_parts);
|
|
py::cast<GuardManager*>(guard_managers[0])
|
|
->get_root()
|
|
->add_relational_guard_resetter(guard);
|
|
for (const auto& guard_manager : guard_managers) {
|
|
py::cast<GuardManager*>(guard_manager)->add_leaf_guard(guard);
|
|
}
|
|
}
|
|
|
|
void install_storage_overlapping_guard(
|
|
const py::list& overlapping_guard_managers,
|
|
const py::list& non_overlapping_guard_managers,
|
|
const py::object& verbose_code_parts) {
|
|
// Create a single StorageOverlapChecker that will be shared amongst
|
|
// the 2 STORAGE_OVERLAPPING guards below.
|
|
std::shared_ptr<StorageOverlapChecker> checker =
|
|
std::make_shared<StorageOverlapChecker>(
|
|
overlapping_guard_managers.size(),
|
|
non_overlapping_guard_managers.size());
|
|
// Create the possibly overlapping storage guard.
|
|
install_storage_overlapping_guard_with_checker(
|
|
checker,
|
|
overlapping_guard_managers,
|
|
verbose_code_parts,
|
|
/* overlapping= */ true);
|
|
// Create the non-overlapping storage guard.
|
|
install_storage_overlapping_guard_with_checker(
|
|
checker,
|
|
non_overlapping_guard_managers,
|
|
verbose_code_parts,
|
|
/* overlapping= */ false);
|
|
}
|
|
|
|
char flush_cache_by_eviction() {
|
|
constexpr size_t evict_size = 32 * 1024 * 1024;
|
|
std::vector<char> buffer(evict_size, 1);
|
|
|
|
volatile char sink = 0;
|
|
for (size_t i = 0; i < buffer.size(); i += 64) {
|
|
sink ^= buffer[i];
|
|
}
|
|
return sink;
|
|
}
|
|
|
|
double profile_guard_manager(
|
|
RootGuardManager* root,
|
|
py::object f_locals,
|
|
int n_iters) {
|
|
PyObject* locals = f_locals.ptr();
|
|
|
|
// Warmup to setup fast paths (like dict_tags) for the actual profiling
|
|
for (int i = 0; i < 5; i++) {
|
|
root->check_nopybind(locals);
|
|
}
|
|
|
|
std::chrono::duration<double> total_elapsed{0.0};
|
|
for (int i = 0; i < n_iters; i++) {
|
|
// Flush the caches to accurately measure the overhead
|
|
// store into a volatile to prevent optimization
|
|
volatile char dummy = flush_cache_by_eviction();
|
|
(void)dummy;
|
|
|
|
auto start = std::chrono::high_resolution_clock::now();
|
|
root->check_nopybind(locals);
|
|
auto end = std::chrono::high_resolution_clock::now();
|
|
total_elapsed += end - start;
|
|
}
|
|
|
|
// Calculate the average time per iteration in microseconds
|
|
return (total_elapsed.count() * 1e6) / n_iters;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
static void* _torchinductor_pyobject_tensor_data_ptr(PyObject* obj) {
|
|
if (C10_UNLIKELY(
|
|
obj == nullptr ||
|
|
(!THPVariable_CheckExact(obj) && !THPVariable_Check(obj)))) {
|
|
throw std::runtime_error(
|
|
"_torchinductor_pyobject_tensor_data_ptr: non-tensor input");
|
|
}
|
|
return THPVariable_Unpack(obj).data_ptr();
|
|
}
|
|
|
|
void* convert_to_root_guard_manager(py::object root) {
|
|
// For invalidated guards, return nullptr
|
|
if (root.is(py::none())) {
|
|
return nullptr;
|
|
}
|
|
RootGuardManager* root_mgr = std::move(root).cast<RootGuardManager*>();
|
|
return (void*)root_mgr;
|
|
}
|
|
|
|
bool run_root_guard_manager(void* root, FrameLocalsMapping* f_locals) {
|
|
// for invalidated guards, return false
|
|
if (root == nullptr) {
|
|
return false;
|
|
}
|
|
return ((RootGuardManager*)root)->check_nopybind(f_locals);
|
|
}
|
|
|
|
PyObject* torch_c_dynamo_guards_init() {
|
|
// initialize TensorGuardsType
|
|
TensorGuardsType.tp_name = "torch._C._dynamo.guards.TensorGuards";
|
|
TensorGuardsType.tp_basicsize = sizeof(TensorGuards);
|
|
TensorGuardsType.tp_itemsize = 0;
|
|
TensorGuardsType.tp_dealloc = (destructor)TensorGuards_dealloc;
|
|
TensorGuardsType.tp_flags = Py_TPFLAGS_DEFAULT;
|
|
TensorGuardsType.tp_doc = "Check properties of a torch.Tensor";
|
|
TensorGuardsType.tp_methods = TensorGuards_methods;
|
|
TensorGuardsType.tp_init = (initproc)TensorGuards_init;
|
|
TensorGuardsType.tp_new = TensorGuards_new;
|
|
|
|
if (PyType_Ready(&TensorGuardsType) < 0)
|
|
return nullptr;
|
|
|
|
GlobalStateGuardType.tp_name = "torch._C._dynamo.guards.GlobalStateGuard";
|
|
GlobalStateGuardType.tp_basicsize = sizeof(GlobalStateGuard);
|
|
GlobalStateGuardType.tp_itemsize = 0;
|
|
GlobalStateGuardType.tp_flags = Py_TPFLAGS_DEFAULT;
|
|
GlobalStateGuardType.tp_doc = "Guard on PyTorch global flags such as no_grad";
|
|
GlobalStateGuardType.tp_methods = GlobalStateGuard_methods;
|
|
GlobalStateGuardType.tp_init = (initproc)GlobalStateGuard_init;
|
|
GlobalStateGuardType.tp_new = PyType_GenericNew;
|
|
|
|
if (PyType_Ready(&GlobalStateGuardType) < 0)
|
|
return nullptr;
|
|
|
|
auto m = PyModule_Create(&_module);
|
|
if (m == nullptr)
|
|
return nullptr;
|
|
|
|
#ifdef Py_GIL_DISABLED
|
|
PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED);
|
|
#endif
|
|
|
|
Py_INCREF(&TensorGuardsType);
|
|
if (PyModule_AddObject(m, "TensorGuards", (PyObject*)&TensorGuardsType) < 0) {
|
|
Py_DECREF(&TensorGuardsType);
|
|
Py_DECREF(m);
|
|
return nullptr;
|
|
}
|
|
|
|
Py_INCREF(&GlobalStateGuardType);
|
|
if (PyModule_AddObject(
|
|
m, "GlobalStateGuard", (PyObject*)&GlobalStateGuardType) < 0) {
|
|
Py_DECREF(&GlobalStateGuardType);
|
|
Py_DECREF(m);
|
|
return nullptr;
|
|
}
|
|
|
|
// We expose the address of _torchinductor_pyobject_tensor_data_ptr in order
|
|
// to allow manual linking in our generated TorchInductor Python bindings.
|
|
// While regular linking works in most cases, it does not work properly in
|
|
// fbcode due to janky build setup there.
|
|
if (PyModule_AddObject(
|
|
m,
|
|
"_torchinductor_pyobject_tensor_data_ptr",
|
|
PyLong_FromVoidPtr(reinterpret_cast<void*>(
|
|
&_torchinductor_pyobject_tensor_data_ptr))) < 0) {
|
|
return nullptr;
|
|
}
|
|
|
|
auto py_m = py::handle(m).cast<py::module>();
|
|
py::class_<GuardDebugInfo, std::unique_ptr<GuardDebugInfo>>(
|
|
py_m, "GuardDebugInfo")
|
|
.def(py::init<bool, py::list, int>())
|
|
.def("__str__", &GuardDebugInfo::to_string)
|
|
.def_readonly("result", &GuardDebugInfo::result)
|
|
.def_readonly("verbose_code_parts", &GuardDebugInfo::verbose_code_parts)
|
|
.def_readonly(
|
|
"num_guards_executed", &GuardDebugInfo::num_guards_executed);
|
|
|
|
// Leaf Guards
|
|
py::class_<LeafGuard, std::shared_ptr<LeafGuard>>(py_m, "LeafGuard")
|
|
.def("verbose_code_parts", &LeafGuard::verbose_code_parts);
|
|
py::class_<LAMBDA_GUARD, LeafGuard, std::shared_ptr<LAMBDA_GUARD>>(
|
|
py_m, "LAMBDA_GUARD")
|
|
.def(py::init<py::function, py::list>())
|
|
.def("__call__", &LAMBDA_GUARD::check);
|
|
py::class_<TYPE_MATCH, LeafGuard, std::shared_ptr<TYPE_MATCH>>(
|
|
py_m, "TYPE_MATCH")
|
|
.def(py::init<py::object, py::list>())
|
|
.def("__call__", &TYPE_MATCH::check);
|
|
py::class_<ID_MATCH, LeafGuard, std::shared_ptr<ID_MATCH>>(py_m, "ID_MATCH")
|
|
.def(py::init<py::object, py::list>())
|
|
.def("__call__", &ID_MATCH::check);
|
|
py::class_<NONE_MATCH, LeafGuard, std::shared_ptr<NONE_MATCH>>(
|
|
py_m, "NONE_MATCH")
|
|
.def(py::init<py::list>())
|
|
.def("__call__", &NONE_MATCH::check);
|
|
py::class_<TRUE_MATCH, LeafGuard, std::shared_ptr<TRUE_MATCH>>(
|
|
py_m, "TRUE_MATCH")
|
|
.def(py::init<py::list>())
|
|
.def("__call__", &TRUE_MATCH::check);
|
|
py::class_<FALSE_MATCH, LeafGuard, std::shared_ptr<FALSE_MATCH>>(
|
|
py_m, "FALSE_MATCH")
|
|
.def(py::init<py::list>())
|
|
.def("__call__", &FALSE_MATCH::check);
|
|
py::class_<EQUALS_MATCH, LeafGuard, std::shared_ptr<EQUALS_MATCH>>(
|
|
py_m, "EQUALS_MATCH")
|
|
.def(py::init<py::object, py::list>())
|
|
.def("__call__", &EQUALS_MATCH::check);
|
|
py::class_<LENGTH_CHECK, LeafGuard, std::shared_ptr<LENGTH_CHECK>>(
|
|
py_m, "LENGTH_CHECK")
|
|
.def(py::init<py::object, py::list>())
|
|
.def("__call__", &LENGTH_CHECK::check);
|
|
py::class_<DICT_LENGTH, LeafGuard, std::shared_ptr<DICT_LENGTH>>(
|
|
py_m, "DICT_LENGTH")
|
|
.def(py::init<py::object, py::list>())
|
|
.def("__call__", &DICT_LENGTH::check);
|
|
py::class_<DEFAULT_DEVICE, LeafGuard, std::shared_ptr<DEFAULT_DEVICE>>(
|
|
py_m, "DEFAULT_DEVICE")
|
|
.def(py::init<py::list>())
|
|
.def("__call__", &DEFAULT_DEVICE::check);
|
|
py::class_<NOT_NONE, LeafGuard, std::shared_ptr<NOT_NONE>>(py_m, "NOT_NONE")
|
|
.def(py::init<py::list>())
|
|
.def("__call__", &NOT_NONE::check);
|
|
py::class_<
|
|
MAPPING_KEYS_MATCH,
|
|
LeafGuard,
|
|
std::shared_ptr<MAPPING_KEYS_MATCH>>(py_m, "MAPPING_KEYS_MATCH")
|
|
.def(py::init<py::object, py::list>())
|
|
.def("__call__", &MAPPING_KEYS_MATCH::check);
|
|
py::class_<
|
|
TUPLE_ITERATOR_LEN,
|
|
LeafGuard,
|
|
std::shared_ptr<TUPLE_ITERATOR_LEN>>(py_m, "TUPLE_ITERATOR_LEN")
|
|
.def(py::init<py::object, py::object, py::list>())
|
|
.def("__call__", &TUPLE_ITERATOR_LEN::check);
|
|
py::class_<
|
|
RANGE_ITERATOR_MATCH,
|
|
LeafGuard,
|
|
std::shared_ptr<RANGE_ITERATOR_MATCH>>(py_m, "RANGE_ITERATOR_MATCH")
|
|
.def(py::init<py::object, py::object, py::object, py::object, py::list>())
|
|
.def("__call__", &RANGE_ITERATOR_MATCH::check);
|
|
py::class_<GLOBAL_STATE, LeafGuard, std::shared_ptr<GLOBAL_STATE>>(
|
|
py_m, "GLOBAL_STATE")
|
|
.def(py::init<py::list>())
|
|
.def("check_verbose", &GLOBAL_STATE::check_verbose)
|
|
.def("__call__", &GLOBAL_STATE::check);
|
|
py::class_<
|
|
TORCH_FUNCTION_MODE_STACK,
|
|
LeafGuard,
|
|
std::shared_ptr<TORCH_FUNCTION_MODE_STACK>>(
|
|
py_m, "TORCH_FUNCTION_MODE_STACK")
|
|
.def(py::init<py::list, py::list>())
|
|
.def("__call__", &TORCH_FUNCTION_MODE_STACK::check);
|
|
py::class_<NO_HASATTR, LeafGuard, std::shared_ptr<NO_HASATTR>>(
|
|
py_m, "NO_HASATTR")
|
|
.def(py::init<py::object, py::list>())
|
|
.def("__call__", &NO_HASATTR::check);
|
|
py::class_<DICT_CONTAINS, LeafGuard, std::shared_ptr<DICT_CONTAINS>>(
|
|
py_m, "DICT_CONTAINS")
|
|
.def(py::init<bool, py::object, py::list>())
|
|
.def("__call__", &DICT_CONTAINS::check);
|
|
py::class_<DYNAMIC_INDICES, LeafGuard, std::shared_ptr<DYNAMIC_INDICES>>(
|
|
py_m, "DYNAMIC_INDICES")
|
|
.def(py::init<py::set, py::list>())
|
|
.def("__call__", &DYNAMIC_INDICES::check);
|
|
py::class_<DICT_VERSION, LeafGuard, std::shared_ptr<DICT_VERSION>>(
|
|
py_m, "DICT_VERSION")
|
|
.def(py::init<py::object, py::list>())
|
|
.def("__call__", &DICT_VERSION::check);
|
|
py::class_<
|
|
DISPATCH_KEY_SET_MATCH,
|
|
LeafGuard,
|
|
std::shared_ptr<DISPATCH_KEY_SET_MATCH>>(py_m, "DISPATCH_KEY_SET_MATCH")
|
|
.def(py::init<RootGuardManager*, py::object, py::list>())
|
|
.def("__call__", &DISPATCH_KEY_SET_MATCH::check);
|
|
py::class_<TENSOR_MATCH, LeafGuard, std::shared_ptr<TENSOR_MATCH>>(
|
|
py_m, "TENSOR_MATCH")
|
|
.def(py::init<
|
|
RootGuardManager*,
|
|
py::object,
|
|
py::object,
|
|
py::object,
|
|
py::str,
|
|
py::list,
|
|
py::type,
|
|
py::object>())
|
|
.def("__call__", &TENSOR_MATCH::check);
|
|
// NOLINTNEXTLINE(bugprone-unused-raii)
|
|
py::class_<RelationalGuard, LeafGuard, std::shared_ptr<RelationalGuard>>(
|
|
py_m, "RelationalGuard");
|
|
// NOLINTNEXTLINE(bugprone-unused-raii)
|
|
py::class_<
|
|
OBJECT_ALIASING,
|
|
RelationalGuard,
|
|
std::shared_ptr<OBJECT_ALIASING>>(py_m, "OBJECT_ALIASING");
|
|
// NOLINTNEXTLINE(bugprone-unused-raii)
|
|
py::class_<
|
|
NO_TENSOR_ALIASING,
|
|
RelationalGuard,
|
|
std::shared_ptr<NO_TENSOR_ALIASING>>(py_m, "NO_TENSOR_ALIASING");
|
|
// NOLINTNEXTLINE(bugprone-unused-raii)
|
|
py::class_<
|
|
STORAGE_OVERLAPPING,
|
|
RelationalGuard,
|
|
std::shared_ptr<STORAGE_OVERLAPPING>>(py_m, "STORAGE_OVERLAPPING");
|
|
// NOLINTNEXTLINE(bugprone-unused-raii)
|
|
py::class_<
|
|
SYMBOLIC_SHAPE_GUARD,
|
|
RelationalGuard,
|
|
std::shared_ptr<SYMBOLIC_SHAPE_GUARD>>(py_m, "SYMBOLIC_SHAPE_GUARD");
|
|
|
|
// Guard Accessors - These are present so that we can iterate over the
|
|
// GuardManager hierarchy. We intentionally do not provide even an init
|
|
// function on these, because these should be constructed from within C++.
|
|
py::class_<GuardAccessor, std::unique_ptr<GuardAccessor>>(
|
|
py_m, "GuardAccessor")
|
|
.def("repr", &GuardAccessor::repr);
|
|
// NOLINTNEXTLINE(bugprone-unused-raii)
|
|
py::class_<
|
|
GetAttrGuardAccessor,
|
|
GuardAccessor,
|
|
std::unique_ptr<GetAttrGuardAccessor>>(py_m, "GetAttrGuardAccessor");
|
|
// NOLINTNEXTLINE(bugprone-unused-raii)
|
|
py::class_<
|
|
GenericGetAttrGuardAccessor,
|
|
GuardAccessor,
|
|
std::unique_ptr<GenericGetAttrGuardAccessor>>(
|
|
py_m, "GenericGetAttrGuardAccessor");
|
|
// NOLINTNEXTLINE(bugprone-unused-raii)
|
|
py::class_<
|
|
GetGenericDictGuardAccessor,
|
|
GuardAccessor,
|
|
std::unique_ptr<GetGenericDictGuardAccessor>>(
|
|
py_m, "GetGenericDictGuardAccessor");
|
|
// NOLINTNEXTLINE(bugprone-unused-raii)
|
|
py::class_<
|
|
GetItemGuardAccessor,
|
|
GuardAccessor,
|
|
std::unique_ptr<GetItemGuardAccessor>>(py_m, "GetItemGuardAccessor");
|
|
// NOLINTNEXTLINE(bugprone-unused-raii)
|
|
py::class_<
|
|
FrameLocalsGuardAccessor,
|
|
GuardAccessor,
|
|
std::unique_ptr<FrameLocalsGuardAccessor>>(
|
|
py_m, "FrameLocalsGuardAccessor");
|
|
// NOLINTNEXTLINE(bugprone-unused-raii)
|
|
py::class_<
|
|
DictGetItemGuardAccessor,
|
|
GuardAccessor,
|
|
std::unique_ptr<DictGetItemGuardAccessor>>(
|
|
py_m, "DictGetItemGuardAccessor");
|
|
// NOLINTNEXTLINE(bugprone-unused-raii)
|
|
py::class_<
|
|
ListGetItemGuardAccessor,
|
|
GuardAccessor,
|
|
std::unique_ptr<ListGetItemGuardAccessor>>(
|
|
py_m, "ListGetItemGuardAccessor");
|
|
// NOLINTNEXTLINE(bugprone-unused-raii)
|
|
py::class_<
|
|
TupleGetItemGuardAccessor,
|
|
GuardAccessor,
|
|
std::unique_ptr<TupleGetItemGuardAccessor>>(
|
|
py_m, "TupleGetItemGuardAccessor");
|
|
// NOLINTNEXTLINE(bugprone-unused-raii)
|
|
py::class_<
|
|
FuncDefaultsGuardAccessor,
|
|
GuardAccessor,
|
|
std::unique_ptr<FuncDefaultsGuardAccessor>>(
|
|
py_m, "FuncDefaultsGuardAccessor");
|
|
// NOLINTNEXTLINE(bugprone-unused-raii)
|
|
py::class_<
|
|
FuncKwDefaultsGuardAccessor,
|
|
GuardAccessor,
|
|
std::unique_ptr<FuncKwDefaultsGuardAccessor>>(
|
|
py_m, "FuncKwDefaultsGuardAccessor");
|
|
// NOLINTNEXTLINE(bugprone-unused-raii)
|
|
py::class_<
|
|
GlobalsGuardAccessor,
|
|
GuardAccessor,
|
|
std::unique_ptr<GlobalsGuardAccessor>>(py_m, "GlobalsGuardAccessor");
|
|
// NOLINTNEXTLINE(bugprone-unused-raii)
|
|
py::class_<
|
|
TypeGuardAccessor,
|
|
GuardAccessor,
|
|
std::unique_ptr<TypeGuardAccessor>>(py_m, "TypeGuardAccessor");
|
|
// NOLINTNEXTLINE(bugprone-unused-raii)
|
|
py::class_<
|
|
WeakRefCallGuardAccessor,
|
|
GuardAccessor,
|
|
std::unique_ptr<WeakRefCallGuardAccessor>>(
|
|
py_m, "WeakRefCallGuardAccessor");
|
|
// NOLINTNEXTLINE(bugprone-unused-raii)
|
|
py::class_<
|
|
CallFunctionNoArgsGuardAccessor,
|
|
GuardAccessor,
|
|
std::unique_ptr<CallFunctionNoArgsGuardAccessor>>(
|
|
py_m, "CallFunctionNoArgsGuardAccessor");
|
|
// NOLINTNEXTLINE(bugprone-unused-raii)
|
|
py::class_<
|
|
TupleIteratorGetItemAccessor,
|
|
GuardAccessor,
|
|
std::unique_ptr<TupleIteratorGetItemAccessor>>(
|
|
py_m, "TupleIteratorGetItemAccessor");
|
|
// NOLINTNEXTLINE(bugprone-unused-raii)
|
|
py::class_<
|
|
GlobalWeakRefGuardAccessor,
|
|
GuardAccessor,
|
|
std::unique_ptr<GlobalWeakRefGuardAccessor>>(
|
|
py_m, "GlobalWeakRefGuardAccessor");
|
|
|
|
// Guard Manager - No constructor in python, python should use
|
|
// RootGuardManager.
|
|
py::class_<GuardManager, std::unique_ptr<GuardManager>>(py_m, "GuardManager")
|
|
// return by reference because GuardManager has the ownership of accessors
|
|
.def("get_source", &GuardManager::get_source)
|
|
.def("fail_count", &GuardManager::fail_count)
|
|
.def(
|
|
"get_accessors",
|
|
&GuardManager::get_accessors,
|
|
py::return_value_policy::reference)
|
|
// return by reference because GuardManager has the ownership of child
|
|
// managers
|
|
.def(
|
|
"get_child_managers",
|
|
&GuardManager::get_child_managers,
|
|
py::return_value_policy::reference)
|
|
// return by reference because GuardManager has the ownership of leaf
|
|
// guards
|
|
.def(
|
|
"get_leaf_guards",
|
|
&GuardManager::get_leaf_guards,
|
|
py::return_value_policy::reference)
|
|
.def(
|
|
"add_lambda_guard",
|
|
[](GuardManager& self,
|
|
py::object lambda,
|
|
py::object verbose_code_parts) -> void {
|
|
self.add_leaf_guard(std::make_shared<LAMBDA_GUARD>(
|
|
std::move(lambda), std::move(verbose_code_parts)));
|
|
})
|
|
.def(
|
|
"add_type_match_guard",
|
|
[](GuardManager& self,
|
|
py::object value,
|
|
py::object verbose_code_parts) -> void {
|
|
SKIP_IF_GUARD_ALREADY_PRESENT("TYPE_MATCH");
|
|
self.add_leaf_guard(std::make_shared<TYPE_MATCH>(
|
|
std::move(value), std::move(verbose_code_parts)));
|
|
})
|
|
.def(
|
|
"add_id_match_guard",
|
|
[](GuardManager& self,
|
|
py::object value,
|
|
py::object verbose_code_parts) -> void {
|
|
SKIP_IF_GUARD_ALREADY_PRESENT("ID_MATCH");
|
|
self.add_leaf_guard(std::make_shared<ID_MATCH>(
|
|
std::move(value), std::move(verbose_code_parts)));
|
|
})
|
|
.def(
|
|
"add_none_match_guard",
|
|
[](GuardManager& self, py::object verbose_code_parts) -> void {
|
|
SKIP_IF_GUARD_ALREADY_PRESENT("NONE_MATCH");
|
|
self.add_leaf_guard(
|
|
std::make_shared<NONE_MATCH>(std::move(verbose_code_parts)));
|
|
})
|
|
.def(
|
|
"add_true_match_guard",
|
|
[](GuardManager& self, py::object verbose_code_parts) -> void {
|
|
SKIP_IF_GUARD_ALREADY_PRESENT("TRUE_MATCH");
|
|
self.add_leaf_guard(
|
|
std::make_shared<TRUE_MATCH>(std::move(verbose_code_parts)));
|
|
})
|
|
.def(
|
|
"add_false_match_guard",
|
|
[](GuardManager& self, py::object verbose_code_parts) -> void {
|
|
SKIP_IF_GUARD_ALREADY_PRESENT("FALSE_MATCH");
|
|
self.add_leaf_guard(
|
|
std::make_shared<FALSE_MATCH>(std::move(verbose_code_parts)));
|
|
})
|
|
.def(
|
|
"add_equals_match_guard",
|
|
[](GuardManager& self,
|
|
py::object value,
|
|
py::object verbose_code_parts) -> void {
|
|
SKIP_IF_GUARD_ALREADY_PRESENT("EQUALS_MATCH");
|
|
self.add_leaf_guard(std::make_shared<EQUALS_MATCH>(
|
|
std::move(value), std::move(verbose_code_parts)));
|
|
})
|
|
.def(
|
|
"add_length_check_guard",
|
|
[](GuardManager& self,
|
|
py::object value,
|
|
py::object verbose_code_parts) -> void {
|
|
SKIP_IF_GUARD_ALREADY_PRESENT("LENGTH_CHECK");
|
|
self.add_leaf_guard(std::make_shared<LENGTH_CHECK>(
|
|
std::move(value), std::move(verbose_code_parts)));
|
|
})
|
|
.def(
|
|
"add_dict_length_check_guard",
|
|
[](GuardManager& self,
|
|
py::object value,
|
|
py::object verbose_code_parts) -> void {
|
|
SKIP_IF_GUARD_ALREADY_PRESENT("DICT_LENGTH");
|
|
self.add_leaf_guard(std::make_shared<DICT_LENGTH>(
|
|
std::move(value), std::move(verbose_code_parts)));
|
|
})
|
|
.def(
|
|
"add_tuple_iterator_length_guard",
|
|
[](GuardManager& self,
|
|
py::object length,
|
|
py::object type_id,
|
|
py::object verbose_code_parts) -> void {
|
|
SKIP_IF_GUARD_ALREADY_PRESENT("TUPLE_ITERATOR_LEN");
|
|
self.add_leaf_guard(std::make_shared<TUPLE_ITERATOR_LEN>(
|
|
std::move(length),
|
|
std::move(type_id),
|
|
std::move(verbose_code_parts)));
|
|
})
|
|
.def(
|
|
"add_range_iterator_match_guard",
|
|
[](GuardManager& self,
|
|
py::object start,
|
|
py::object stop,
|
|
py::object step,
|
|
py::object type_id,
|
|
py::object verbose_code_parts) -> void {
|
|
SKIP_IF_GUARD_ALREADY_PRESENT("RANGE_ITERATOR_MATCH");
|
|
self.add_leaf_guard(std::make_shared<RANGE_ITERATOR_MATCH>(
|
|
std::move(start),
|
|
std::move(stop),
|
|
std::move(step),
|
|
std::move(type_id),
|
|
std::move(verbose_code_parts)));
|
|
})
|
|
.def(
|
|
"add_default_device_guard",
|
|
[](GuardManager& self, py::object verbose_code_parts) -> void {
|
|
self.add_leaf_guard(std::make_shared<DEFAULT_DEVICE>(
|
|
std::move(verbose_code_parts)));
|
|
})
|
|
.def(
|
|
"add_not_none_guard",
|
|
[](GuardManager& self, py::object verbose_code_parts) -> void {
|
|
SKIP_IF_GUARD_ALREADY_PRESENT("NOT_NONE");
|
|
self.add_leaf_guard(
|
|
std::make_shared<NOT_NONE>(std::move(verbose_code_parts)));
|
|
})
|
|
.def(
|
|
"add_mapping_keys_guard",
|
|
[](GuardManager& self,
|
|
py::object value,
|
|
py::object verbose_code_parts) -> void {
|
|
SKIP_IF_GUARD_ALREADY_PRESENT("MAPPING_KEYS_MATCH");
|
|
self.add_leaf_guard(std::make_shared<MAPPING_KEYS_MATCH>(
|
|
std::move(value), std::move(verbose_code_parts)));
|
|
})
|
|
.def(
|
|
"add_dispatch_key_set_guard",
|
|
[](GuardManager& self,
|
|
py::object value,
|
|
py::object verbose_code_parts) -> void {
|
|
SKIP_IF_GUARD_ALREADY_PRESENT("DISPATCH_KEY_SET_MATCH");
|
|
self.add_leaf_guard(std::make_shared<DISPATCH_KEY_SET_MATCH>(
|
|
self.get_root(),
|
|
std::move(value),
|
|
std::move(verbose_code_parts)));
|
|
})
|
|
.def(
|
|
"add_global_state_guard",
|
|
[](GuardManager& self, py::object verbose_code_parts) -> void {
|
|
self.add_leaf_guard(
|
|
std::make_shared<GLOBAL_STATE>(std::move(verbose_code_parts)));
|
|
})
|
|
.def(
|
|
"add_torch_function_mode_stack_guard",
|
|
[](GuardManager& self,
|
|
const py::list& initial_stack,
|
|
py::object verbose_code_parts) -> void {
|
|
self.add_leaf_guard(std::make_shared<TORCH_FUNCTION_MODE_STACK>(
|
|
initial_stack, std::move(verbose_code_parts)));
|
|
})
|
|
.def(
|
|
"add_no_hasattr_guard",
|
|
[](GuardManager& self,
|
|
py::object attr_name,
|
|
py::object verbose_code_parts) -> void {
|
|
self.add_leaf_guard(std::make_shared<NO_HASATTR>(
|
|
std::move(attr_name), std::move(verbose_code_parts)));
|
|
})
|
|
.def(
|
|
"add_dict_contains_guard",
|
|
[](GuardManager& self,
|
|
bool contains,
|
|
py::object key,
|
|
py::object verbose_code_parts) -> void {
|
|
self.add_leaf_guard(std::make_shared<DICT_CONTAINS>(
|
|
contains, std::move(key), std::move(verbose_code_parts)));
|
|
})
|
|
.def(
|
|
"add_dynamic_indices_guard",
|
|
[](GuardManager& self,
|
|
py::set value,
|
|
py::object verbose_code_parts) -> void {
|
|
self.add_leaf_guard(std::make_shared<DYNAMIC_INDICES>(
|
|
std::move(value), std::move(verbose_code_parts)));
|
|
})
|
|
.def(
|
|
"add_dict_version_guard",
|
|
[](GuardManager& self,
|
|
py::object value,
|
|
py::object verbose_code_parts) -> void {
|
|
SKIP_IF_GUARD_ALREADY_PRESENT("DICT_VERSION");
|
|
self.add_leaf_guard(std::make_shared<DICT_VERSION>(
|
|
std::move(value), std::move(verbose_code_parts)));
|
|
})
|
|
.def(
|
|
"add_tensor_match_guard",
|
|
[](GuardManager& self,
|
|
py::object value,
|
|
py::object sizes,
|
|
py::object strides,
|
|
py::object tensor_name,
|
|
py::object verbose_code_parts,
|
|
py::object pytype,
|
|
py::object dispatch_keys) -> void {
|
|
SKIP_IF_GUARD_ALREADY_PRESENT("TENSOR_MATCH");
|
|
self.add_leaf_guard(std::make_shared<TENSOR_MATCH>(
|
|
self.get_root(),
|
|
std::move(value),
|
|
std::move(sizes),
|
|
std::move(strides),
|
|
std::move(tensor_name),
|
|
std::move(verbose_code_parts),
|
|
std::move(pytype),
|
|
std::move(dispatch_keys)));
|
|
})
|
|
|
|
// return by reference because GuardManager has the ownership of accessors
|
|
// and guard managers
|
|
.def(
|
|
"getitem_manager",
|
|
&GuardManager::get_child_manager<GetItemGuardAccessor>,
|
|
py::arg("key"),
|
|
py::arg("source"),
|
|
py::arg("example_value"),
|
|
py::arg("guard_manager_enum"),
|
|
py::return_value_policy::reference)
|
|
// return by reference because GuardManager has the ownership of accessors
|
|
// and guard managers
|
|
.def(
|
|
"framelocals_manager",
|
|
&GuardManager::get_child_manager<FrameLocalsGuardAccessor>,
|
|
py::arg("key"),
|
|
py::arg("source"),
|
|
py::arg("example_value"),
|
|
py::arg("guard_manager_enum"),
|
|
py::return_value_policy::reference)
|
|
// return by reference because GuardManager has the ownership of accessors
|
|
// and guard managers
|
|
.def(
|
|
"dict_getitem_manager",
|
|
&GuardManager::get_child_manager<DictGetItemGuardAccessor>,
|
|
py::arg("key"),
|
|
py::arg("source"),
|
|
py::arg("example_value"),
|
|
py::arg("guard_manager_enum"),
|
|
py::return_value_policy::reference)
|
|
// return by reference because GuardManager has the ownership of accessors
|
|
// and guard managers
|
|
.def(
|
|
"list_getitem_manager",
|
|
&GuardManager::get_child_manager<ListGetItemGuardAccessor>,
|
|
py::arg("key"),
|
|
py::arg("source"),
|
|
py::arg("example_value"),
|
|
py::arg("guard_manager_enum"),
|
|
py::return_value_policy::reference)
|
|
// return by reference because GuardManager has the ownership of accessors
|
|
// and guard managers
|
|
.def(
|
|
"indexed_manager",
|
|
&GuardManager::get_child_manager<IndexedGuardAccessor>,
|
|
py::arg("idx"),
|
|
py::arg("source"),
|
|
py::arg("example_value"),
|
|
py::arg("guard_manager_enum"),
|
|
py::return_value_policy::reference)
|
|
// return by reference because GuardManager has the ownership of accessors
|
|
// and guard managers
|
|
.def(
|
|
"tensor_property_size_manager",
|
|
&GuardManager::get_child_manager<
|
|
TensorPropertyGuardAccessor<TensorProperty::SIZE>>,
|
|
py::arg("idx"),
|
|
py::arg("source"),
|
|
py::arg("example_value"),
|
|
py::arg("guard_manager_enum"),
|
|
py::return_value_policy::reference)
|
|
// return by reference because GuardManager has the ownership of accessors
|
|
// and guard managers
|
|
.def(
|
|
"tensor_property_stride_manager",
|
|
&GuardManager::get_child_manager<
|
|
TensorPropertyGuardAccessor<TensorProperty::STRIDE>>,
|
|
py::arg("idx"),
|
|
py::arg("source"),
|
|
py::arg("example_value"),
|
|
py::arg("guard_manager_enum"),
|
|
py::return_value_policy::reference)
|
|
// return by reference because GuardManager has the ownership of accessors
|
|
// and guard managers
|
|
.def(
|
|
"tensor_property_storage_offset_manager",
|
|
&GuardManager::get_child_manager<
|
|
TensorPropertyGuardAccessor<TensorProperty::STORAGE_OFFSET>>,
|
|
py::arg("idx"),
|
|
py::arg("source"),
|
|
py::arg("example_value"),
|
|
py::arg("guard_manager_enum"),
|
|
py::return_value_policy::reference)
|
|
// return by reference because GuardManager has the ownership of accessors
|
|
// and guard managers
|
|
.def(
|
|
"tuple_getitem_manager",
|
|
&GuardManager::get_child_manager<TupleGetItemGuardAccessor>,
|
|
py::arg("key"),
|
|
py::arg("source"),
|
|
py::arg("example_value"),
|
|
py::arg("guard_manager_enum"),
|
|
py::return_value_policy::reference)
|
|
// return by reference because GuardManager has the ownership of accessors
|
|
// and guard managers
|
|
.def(
|
|
"func_defaults_manager",
|
|
[](GuardManager& self,
|
|
std::string source,
|
|
py::object example_value,
|
|
py::handle guard_manager_enum) -> GuardManager* {
|
|
// A unique key is used to save as the accessor key.
|
|
py::str unique_key("__defaults_accessor__");
|
|
return self.get_child_manager<FuncDefaultsGuardAccessor>(
|
|
std::move(unique_key),
|
|
std::move(source),
|
|
std::move(example_value),
|
|
guard_manager_enum);
|
|
},
|
|
py::arg("source"),
|
|
py::arg("example_value"),
|
|
py::arg("guard_manager_enum"),
|
|
py::return_value_policy::reference)
|
|
|
|
// return by reference because GuardManager has the ownership of accessors
|
|
// and guard managers
|
|
.def(
|
|
"func_kwdefaults_manager",
|
|
[](GuardManager& self,
|
|
std::string source,
|
|
py::object example_value,
|
|
py::handle guard_manager_enum) -> GuardManager* {
|
|
// A unique key is used to save as the accessor key.
|
|
py::str unique_key("__kwdefaults_accessor__");
|
|
return self.get_child_manager<FuncKwDefaultsGuardAccessor>(
|
|
std::move(unique_key),
|
|
std::move(source),
|
|
std::move(example_value),
|
|
guard_manager_enum);
|
|
},
|
|
py::arg("source"),
|
|
py::arg("example_value"),
|
|
py::arg("guard_manager_enum"),
|
|
py::return_value_policy::reference)
|
|
// return by reference because GuardManager has the ownership of accessors
|
|
// and guard managers
|
|
.def(
|
|
"globals_dict_manager",
|
|
&GuardManager::get_child_manager<GlobalsGuardAccessor>,
|
|
py::arg("f_globals"),
|
|
py::arg("source"),
|
|
py::arg("example_value"),
|
|
py::arg("guard_manager_enum"),
|
|
py::return_value_policy::reference)
|
|
// return by reference because GuardManager has the ownership of accessors
|
|
// and guard managers
|
|
.def(
|
|
"type_manager",
|
|
[](GuardManager& self,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum) -> GuardManager* {
|
|
// A unique key is used to save as the accessor key.
|
|
py::str unique_key("__type_accessor__");
|
|
return self.get_child_manager<TypeGuardAccessor>(
|
|
std::move(unique_key),
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum);
|
|
},
|
|
py::arg("source"),
|
|
py::arg("example_value"),
|
|
py::arg("guard_manager_enum"),
|
|
py::return_value_policy::reference)
|
|
// return by reference because GuardManager has the ownership of accessors
|
|
// and guard managers
|
|
.def(
|
|
"weakref_call_manager",
|
|
[](GuardManager& self,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum) -> GuardManager* {
|
|
// A unique key is used to save as the accessor key.
|
|
py::str unique_key("__weakref_call_accessor__");
|
|
return self.get_child_manager<WeakRefCallGuardAccessor>(
|
|
std::move(unique_key),
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum);
|
|
},
|
|
py::arg("source"),
|
|
py::arg("example_value"),
|
|
py::arg("guard_manager_enum"),
|
|
py::return_value_policy::reference)
|
|
// return by reference because GuardManager has the ownership of accessors
|
|
// and guard managers
|
|
.def(
|
|
"call_function_no_args_manager",
|
|
[](GuardManager& self,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum) -> GuardManager* {
|
|
// A unique key is used to save as the accessor key.
|
|
py::str unique_key("__call_function_no_args_accessor__");
|
|
return self.get_child_manager<CallFunctionNoArgsGuardAccessor>(
|
|
std::move(unique_key),
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum);
|
|
},
|
|
py::arg("source"),
|
|
py::arg("example_value"),
|
|
py::arg("guard_manager_enum"),
|
|
py::return_value_policy::reference)
|
|
// return by reference because GuardManager has the ownership of accessors
|
|
// and guard managers
|
|
.def(
|
|
"tuple_iterator_getitem_manager",
|
|
&GuardManager::get_child_manager<TupleIteratorGetItemAccessor>,
|
|
py::arg("index"),
|
|
py::arg("source"),
|
|
py::arg("example_value"),
|
|
py::arg("guard_manager_enum"),
|
|
py::return_value_policy::reference)
|
|
// return by reference because GuardManager has the ownership of accessors
|
|
// and guard managers
|
|
.def(
|
|
"global_weakref_manager",
|
|
&GuardManager::get_child_manager<GlobalWeakRefGuardAccessor>,
|
|
py::arg("global_name"),
|
|
py::arg("source"),
|
|
py::arg("example_value"),
|
|
py::arg("guard_manager_enum"),
|
|
py::return_value_policy::reference)
|
|
// return by reference because GuardManager has the ownership of accessors
|
|
// and guard managers
|
|
.def(
|
|
"lambda_manager",
|
|
&GuardManager::get_child_manager<PythonLambdaGuardAccessor>,
|
|
py::arg("python_lambda"),
|
|
py::arg("source"),
|
|
py::arg("example_value"),
|
|
py::arg("guard_manager_enum"),
|
|
py::return_value_policy::reference)
|
|
// return by reference because GuardManager has the ownership of accessors
|
|
// and guard managers
|
|
.def(
|
|
"grad_manager",
|
|
[](GuardManager& self,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum) -> GuardManager* {
|
|
// A unique key is used to save as the accessor key.
|
|
py::str unique_key("__grad_accessor__");
|
|
return self.get_child_manager<GradGuardAccessor>(
|
|
std::move(unique_key),
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum);
|
|
},
|
|
py::arg("source"),
|
|
py::arg("example_value"),
|
|
py::arg("guard_manager_enum"),
|
|
py::return_value_policy::reference)
|
|
// return by reference because GuardManager has the ownership of accessors
|
|
// and guard managers
|
|
.def(
|
|
"get_generic_dict_manager",
|
|
[](GuardManager& self,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum) -> GuardManager* {
|
|
// A unique key is used to save as the accessor key.
|
|
py::str unique_key("__generic_dict_accessor__");
|
|
return self.get_child_manager<GetGenericDictGuardAccessor>(
|
|
std::move(unique_key),
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum);
|
|
},
|
|
py::arg("source"),
|
|
py::arg("example_value"),
|
|
py::arg("guard_manager_enum"),
|
|
py::return_value_policy::reference)
|
|
// return by reference because C++ GuardManager has the ownership of
|
|
// accessors and guard managers
|
|
.def(
|
|
"generic_getattr_manager",
|
|
&GuardManager::get_child_manager<GenericGetAttrGuardAccessor>,
|
|
py::arg("attr"),
|
|
py::arg("source"),
|
|
py::arg("example_value"),
|
|
py::arg("guard_manager_enum"),
|
|
py::return_value_policy::reference)
|
|
// return by reference because C++ GuardManager has the ownership of
|
|
// accessors and guard managers
|
|
.def(
|
|
"getattr_manager",
|
|
&GuardManager::get_child_manager<GetAttrGuardAccessor>,
|
|
py::arg("attr"),
|
|
py::arg("source"),
|
|
py::arg("example_value"),
|
|
py::arg("guard_manager_enum"),
|
|
py::return_value_policy::reference);
|
|
|
|
// Root Guard Manager
|
|
py::class_<RootGuardManager, GuardManager, std::unique_ptr<RootGuardManager>>(
|
|
py_m, "RootGuardManager")
|
|
.def(py::init<>())
|
|
.def("check", &RootGuardManager::check)
|
|
.def("check_verbose", &RootGuardManager::check_verbose)
|
|
.def(
|
|
"clone_manager",
|
|
&RootGuardManager::clone_manager,
|
|
py::return_value_policy::reference)
|
|
// return by reference because GuardManager has the ownership of leaf
|
|
// guards
|
|
.def(
|
|
"get_epilogue_lambda_guards",
|
|
&RootGuardManager::get_epilogue_lambda_guards,
|
|
py::return_value_policy::reference)
|
|
.def(
|
|
"add_epilogue_lambda_guard",
|
|
[](RootGuardManager& self,
|
|
py::object lambda,
|
|
py::object verbose_code_parts) -> void {
|
|
self.add_epilogue_lambda_guard(std::make_unique<LAMBDA_GUARD>(
|
|
std::move(lambda), std::move(verbose_code_parts)));
|
|
});
|
|
|
|
// Dict Guard Manager
|
|
py::class_<DictGuardManager, GuardManager, std::unique_ptr<DictGuardManager>>(
|
|
py_m, "DictGuardManager")
|
|
// return by reference because GuardManager has the ownership of accessors
|
|
// and guard managers
|
|
.def(
|
|
"get_key_manager",
|
|
[](DictGuardManager& self,
|
|
py::object index,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum) -> GuardManager* {
|
|
return self.get_key_manager(
|
|
std::move(index),
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum);
|
|
},
|
|
py::arg("index"),
|
|
py::arg("source"),
|
|
py::arg("example_value"),
|
|
py::arg("guard_manager_enum"),
|
|
py::return_value_policy::reference)
|
|
// return by reference because GuardManager has the ownership of accessors
|
|
// and guard managers
|
|
.def(
|
|
"get_value_manager",
|
|
[](DictGuardManager& self,
|
|
py::object index,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum) -> GuardManager* {
|
|
return self.get_value_manager(
|
|
std::move(index),
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum);
|
|
},
|
|
py::arg("index"),
|
|
py::arg("source"),
|
|
py::arg("example_value"),
|
|
py::arg("guard_manager_enum"),
|
|
py::return_value_policy::reference)
|
|
// return by reference because GuardManager has the ownership of leaf
|
|
// guards
|
|
.def(
|
|
"get_key_value_managers",
|
|
&DictGuardManager::get_key_value_managers,
|
|
py::return_value_policy::reference)
|
|
// Skipped leaf guards
|
|
.def("add_type_match_guard", &DictGuardManager::skip_adding_guard)
|
|
.def("add_dict_length_check_guard", &DictGuardManager::skip_adding_guard)
|
|
// Permitted leaf guards
|
|
.def(
|
|
"add_dict_contains_guard",
|
|
[](DictGuardManager& self,
|
|
bool contains,
|
|
py::object key,
|
|
py::object verbose_code_parts) -> void {
|
|
self.add_permitted_leaf_guard(std::make_shared<DICT_CONTAINS>(
|
|
contains, std::move(key), std::move(verbose_code_parts)));
|
|
})
|
|
.def(
|
|
"add_dict_version_guard",
|
|
[](DictGuardManager& self,
|
|
py::object value,
|
|
py::object verbose_code_parts) -> void {
|
|
SKIP_IF_GUARD_ALREADY_PRESENT("DICT_VERSION");
|
|
self.add_permitted_leaf_guard(std::make_shared<DICT_VERSION>(
|
|
std::move(value), std::move(verbose_code_parts)));
|
|
})
|
|
.def(
|
|
"add_no_hasattr_guard",
|
|
[](DictGuardManager& self,
|
|
py::object attr_name,
|
|
py::object verbose_code_parts) -> void {
|
|
self.add_permitted_leaf_guard(std::make_shared<NO_HASATTR>(
|
|
std::move(attr_name), std::move(verbose_code_parts)));
|
|
})
|
|
// Not permitted accessors
|
|
.def("lambda_manager", &DictGuardManager::fail_on_get_child_manager)
|
|
.def("getitem_manager", &DictGuardManager::fail_on_get_child_manager)
|
|
.def("dict_getitem_manager", &DictGuardManager::fail_on_get_child_manager)
|
|
.def("globals_dict_manager", &DictGuardManager::fail_on_get_child_manager)
|
|
.def(
|
|
"tuple_iterator_getitem_manager",
|
|
&DictGuardManager::fail_on_get_child_manager)
|
|
.def(
|
|
"global_weakref_manager",
|
|
&DictGuardManager::fail_on_get_child_manager)
|
|
.def("lambda_manager", &DictGuardManager::fail_on_get_child_manager)
|
|
// Permitted accessors (and also type_manager)
|
|
// return by reference because GuardManager has the ownership of accessors
|
|
// and guard managers
|
|
.def(
|
|
"getattr_manager",
|
|
[](DictGuardManager& self,
|
|
py::object attr_name,
|
|
std::string source,
|
|
py::handle example_value,
|
|
py::handle guard_manager_enum) -> GuardManager* {
|
|
if (self.is_exact_dict_type()) {
|
|
throw std::runtime_error(
|
|
"getattr_manager on a DictGuardManager is supported only for dict subclasses");
|
|
}
|
|
return self.get_child_manager<GetAttrGuardAccessor>(
|
|
std::move(attr_name),
|
|
std::move(source),
|
|
example_value,
|
|
guard_manager_enum);
|
|
},
|
|
py::arg("attr"),
|
|
py::arg("source"),
|
|
py::arg("example_value"),
|
|
py::arg("guard_manager_enum"),
|
|
py::return_value_policy::reference);
|
|
|
|
py_m.def("install_object_aliasing_guard", install_object_aliasing_guard);
|
|
py_m.def(
|
|
"install_no_tensor_aliasing_guard", install_no_tensor_aliasing_guard);
|
|
py_m.def(
|
|
"install_storage_overlapping_guard", install_storage_overlapping_guard);
|
|
py_m.def(
|
|
"compute_overlapping_tensors",
|
|
[](const std::vector<Tensor> tensors, bool symbolic) {
|
|
// Pick the correct Meta class, depending on whether we are
|
|
// dealing with symbolic values or not.
|
|
if (symbolic) {
|
|
return compute_overlapping_tensors<DynamicMeta>(tensors);
|
|
} else {
|
|
return compute_overlapping_tensors<StaticMeta>(tensors);
|
|
}
|
|
},
|
|
py::arg("tensors"),
|
|
py::arg("symbolic") = true);
|
|
py_m.def("install_symbolic_shape_guard", install_symbolic_shape_guard);
|
|
py_m.def("profile_guard_manager", profile_guard_manager);
|
|
|
|
// initialize dict_version_map watcher for 3.12
|
|
#if IS_PYTHON_3_12_PLUS
|
|
|
|
dict_version_watcher_id = PyDict_AddWatcher(dict_version_watch_callback);
|
|
if (dict_version_watcher_id == -1) {
|
|
throw std::runtime_error("Failed to install dict_version_watch_callback");
|
|
}
|
|
|
|
#endif
|
|
|
|
return m;
|
|
}
|
|
|
|
} // namespace torch::dynamo
|