Files
pytorch/torch/csrc/python_dimname.cpp
Richard Zou caed485873 Turn on BUILD_NAMEDTENSOR permanently (#26060)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/26060

This PR enables BUILD_NAMEDTENSOR by default. This is done via including
a header, `c10/core/EnableNamedTensor`, that sets `BUILD_NAMEDTENSOR`.
In the future, the plan is to get rid of the flag entirely: we can
incrementally delete usages after this PR goes in.

This PR also maintains the namedtensor ci vs regular ci distinction.
`test/test_namedtensor.py` only runs if TEST_NAMEDTENSOR=1 is specified.
TEST_NAMEDTENSOR=1 is set on the namedtensor ci. I'll remove this
distinction later and send out an announcement about it; devs will be
responsible for named tensor failures after that.

The initial reason why we had the BUILD_NAMEDTENSOR flag was so that we
could quickly prototype named tensor features without worrying about
adding overhead to the framework. The overheads can be categorized as
memory overhead and performance overhead.

Memory overhead: named tensors adds 1 additional word per Tensor. This
is because TensorImpl stores a `unique_ptr<NamedTensorMetaInterface>`
field. This is not a lot of overhead.

Performance overhead: At all entry points to name inference, we check
if inputs to an op are named. If inputs are not named, we short-circuit
and don't do name inference. These calls should therefore be as
efficient as error-checking code and not take up a lot of time.

My plan is to benchmark a few functions and then post the results in a
comment to this PR.

Test Plan: - [namedtensor ci]

Differential Revision: D17331635

Pulled By: zou3519

fbshipit-source-id: deed901347448ae2c26066c1fa432e3dc0cadb92
2019-09-17 08:25:00 -07:00

104 lines
3.3 KiB
C++

#include <torch/csrc/python_dimname.h>
#include <torch/csrc/Exceptions.h>
#include <torch/csrc/utils/python_strings.h>
#include <c10/util/flat_hash_map.h>
#include <ATen/core/EnableNamedTensor.h>
#ifdef BUILD_NAMEDTENSOR
namespace torch {
struct InternedStringsTable {
InternedStringsTable() = default;
~InternedStringsTable();
InternedStringsTable(const InternedStringsTable &) = delete;
InternedStringsTable& operator =(InternedStringsTable const&) = delete;
InternedStringsTable(InternedStringsTable&&) = delete;
InternedStringsTable& operator=(InternedStringsTable&&) = delete;
at::optional<at::Dimname> lookup(PyObject* obj);
// Precondition: obj is an interned python string.
void addMapping(PyObject* obj, at::Dimname dimname);
private:
ska::flat_hash_map<PyObject*,at::Dimname> py_interned_string_to_dimname_;
};
InternedStringsTable kPyInternedStringToDimname;
InternedStringsTable::~InternedStringsTable() {
for (auto it = py_interned_string_to_dimname_.begin();
it != py_interned_string_to_dimname_.end(); ++it) {
// See Note [References to python interned strings]
Py_DECREF(it->first);
}
}
at::optional<at::Dimname> InternedStringsTable::lookup(PyObject* obj) {
auto it = py_interned_string_to_dimname_.find(obj);
if (it == py_interned_string_to_dimname_.end()) {
return at::nullopt;
}
return it->second;
}
void InternedStringsTable::addMapping(PyObject* obj, at::Dimname dimname) {
// Note [References to python interned strings]
// If a Python interned string has no references to it, then it gets
// deallocated, invalidating this mapping. Let's immortalize the string by
// holding a refcount to it and releasing it in the destructor
Py_INCREF(obj);
py_interned_string_to_dimname_.emplace(obj, dimname);
}
} // namespace torch
bool THPUtils_checkDimname(PyObject* obj) {
return obj == Py_None || THPUtils_checkString(obj);
}
// To avoid ambiguity with IntArrayRef, we parse obj as a DimnameList if
// it is a list or tuple and its first elt is a Dimname
bool THPUtils_checkDimnameList(PyObject* obj) {
auto tuple = PyTuple_Check(obj);
if (!tuple && !PyList_Check(obj)) {
return false;
}
auto size = tuple ? PyTuple_GET_SIZE(obj) : PyList_GET_SIZE(obj);
if (size == 0) {
return true;
}
PyObject* first_elt = tuple ? PyTuple_GET_ITEM(obj, 0) : PyList_GET_ITEM(obj, 0);
return THPUtils_checkDimname(first_elt);
}
at::Dimname THPDimname_parse(PyObject* obj) {
if (obj == Py_None) {
return at::Dimname::wildcard();
}
if (!THPUtils_checkString(obj)) {
throw torch::TypeError("expected None or string for Dimname but got %s", Py_TYPE(obj)->tp_name);
}
if (!THPUtils_isInterned(obj)) {
// internStringInPlace decrefs obj and increfs the result. Because we're
// not actually returning the result to the user, we need to undo these.
// See https://docs.python.org/3/c-api/unicode.html#c.PyUnicode_InternInPlace
Py_INCREF(obj);
THPUtils_internStringInPlace(&obj);
Py_DECREF(obj);
}
auto maybeDimname = torch::kPyInternedStringToDimname.lookup(obj);
if (maybeDimname) {
return *maybeDimname;
}
const auto name = THPUtils_unpackString(obj);
auto dimname = at::Dimname::fromSymbol(at::Symbol::dimname(name));
torch::kPyInternedStringToDimname.addMapping(obj, dimname);
return dimname;
}
#endif