mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Summary: As part of the Variable/Tensor merge work: https://github.com/pytorch/pytorch/issues/13638, we make the following changes in this PR: 1. Remove the `Variable::Impl` class and the `DifferentiableViewImpl` class 2. Change all `Variable.data()` call sites to either use `Variable` directly, or use `Variable.tensor_data()` 3. Remove `Variable.data()` API 3. Add `Variable.variable_data()` that matches `tensor.data` in Python API, which creates a new `Variable` that shares the same storage and tensor metadata with the original `Variable`, but with a completely new autograd history. After this PR, Variable doesn't wrap a Tensor internally anymore, and both Variable and Tensor use the same TensorImpl class as its `impl_`. The only difference is that Variable always has AutogradMeta in its TensorImpl, but Tensor doesn't. **Note that this PR is BC-breaking in the following use cases:** **Use Case 1:** Previously, `x.data = y` works even if `x` and `y` are of different TensorImpl type (e.g. `x` is a CPU dense tensor whose impl is of type TensorImpl, while `y` is a CPU sparse tensor whose impl is of type SparseTensorImpl). However, after this PR, `x.data = y` doesn't work anymore if `x` and `y` are of different TensorImpl type, because the underlying implementation `variable.set_data(tensor)` no longer works if `variable` and `tensor` have different TensorImpl type. **Use Case 2:** If a tensor `x`'s `grad` is sparse, accumulating dense gradients to `x` will change the tensor that `x.grad` is pointing to. This is better illustrated with the following example: ```python params = torch.tensor([1.5, 1.5]).requires_grad_() with torch.no_grad(): # Change gradient to a sparse tensor params.grad = torch.sparse_coo_tensor(torch.tensor([[1, 1]]).long(), torch.tensor([1., 1.])) grad_saved = params.grad params.backward(torch.tensor([1.5, 1.5])) assert id(grad_saved) == id(params.grad) # This will fail after this PR ``` The assertion in the last line will fail after this PR, because adding dense gradients to sparse gradients will change the `params.grad` tensor reference. Pull Request resolved: https://github.com/pytorch/pytorch/pull/17072 Differential Revision: D14075257 Pulled By: yf225 fbshipit-source-id: 0e681df641270dea586042dd26db59f2e76b5957
199 lines
5.8 KiB
C++
199 lines
5.8 KiB
C++
#include <torch/csrc/autograd/python_hook.h>
|
|
|
|
#include <sstream>
|
|
|
|
#include <torch/csrc/THP.h>
|
|
#include <torch/csrc/autograd/python_variable.h>
|
|
#include <torch/csrc/utils/auto_gil.h>
|
|
#include <torch/csrc/utils/object_ptr.h>
|
|
#include <torch/csrc/utils/python_strings.h>
|
|
#include <torch/csrc/Exceptions.h>
|
|
|
|
using torch::autograd::variable_list;
|
|
using torch::autograd::Variable;
|
|
|
|
static PyObject* wrap_variables(const variable_list& c_variables);
|
|
static variable_list unwrap_variables(PyObject* py_variables);
|
|
static std::string hook_name(PyObject* hook);
|
|
static void check_result(PyObject* original, PyObject* result, PyObject* hook);
|
|
static void check_single_result(PyObject* original, PyObject* result, PyObject* hook);
|
|
|
|
|
|
namespace torch { namespace autograd {
|
|
|
|
PyFunctionPreHook::PyFunctionPreHook(PyObject* dict, int value_idx)
|
|
: dict(dict)
|
|
, value_idx(value_idx)
|
|
{
|
|
Py_INCREF(dict);
|
|
}
|
|
|
|
PyFunctionPreHook::~PyFunctionPreHook() {
|
|
AutoGIL gil;
|
|
Py_DECREF(dict);
|
|
}
|
|
|
|
auto PyFunctionPreHook::operator()(const variable_list& values) -> variable_list
|
|
{
|
|
AutoGIL gil;
|
|
|
|
THPObjectPtr value(THPVariable_Wrap(values.at(value_idx)));
|
|
if (!value) throw python_error();
|
|
|
|
PyObject *key, *hook;
|
|
Py_ssize_t pos = 0;
|
|
while (PyDict_Next(dict, &pos, &key, &hook)) {
|
|
THPObjectPtr res(PyObject_CallFunctionObjArgs(hook, value.get(), nullptr));
|
|
if (!res) throw python_error();
|
|
if (res == Py_None) continue;
|
|
check_single_result(value.get(), res.get(), hook);
|
|
value = std::move(res);
|
|
}
|
|
|
|
variable_list results(values);
|
|
if (value != Py_None) results[value_idx] = ((THPVariable*)value.get())->cdata;
|
|
return results;
|
|
}
|
|
|
|
PyFunctionPostHook::PyFunctionPostHook(PyObject* dict) : dict(dict) {
|
|
Py_INCREF(dict);
|
|
}
|
|
|
|
PyFunctionPostHook::~PyFunctionPostHook() {
|
|
AutoGIL gil;
|
|
Py_DECREF(dict);
|
|
}
|
|
|
|
auto PyFunctionPostHook::operator()(
|
|
const variable_list& _outputs, /* grad_inputs */
|
|
const variable_list& _inputs /* grad_outputs */) -> variable_list
|
|
{
|
|
AutoGIL gil;
|
|
|
|
THPObjectPtr outputs(wrap_variables(_outputs));
|
|
THPObjectPtr inputs(wrap_variables(_inputs));
|
|
|
|
PyObject *key, *hook;
|
|
Py_ssize_t pos = 0;
|
|
while (PyDict_Next(dict, &pos, &key, &hook)) {
|
|
THPObjectPtr res(PyObject_CallFunctionObjArgs(
|
|
hook, outputs.get(), inputs.get(), nullptr));
|
|
if (!res) throw python_error();
|
|
if (res == Py_None) continue;
|
|
check_result(outputs, res, hook);
|
|
outputs = std::move(res);
|
|
}
|
|
|
|
return unwrap_variables(outputs.get());
|
|
}
|
|
|
|
}} // namespace torch::autograd
|
|
|
|
|
|
static PyObject *wrap_variables(const variable_list& c_variables)
|
|
{
|
|
size_t num_vars = c_variables.size();
|
|
THPObjectPtr tuple(PyTuple_New(num_vars));
|
|
if (!tuple) throw python_error();
|
|
for (size_t i = 0; i < num_vars; ++i) {
|
|
THPObjectPtr var(THPVariable_Wrap(c_variables[i]));
|
|
if (!var) throw python_error();
|
|
PyTuple_SET_ITEM(tuple.get(), i, var.release());
|
|
}
|
|
return tuple.release();
|
|
}
|
|
|
|
static variable_list unwrap_variables(PyObject* py_variables) {
|
|
variable_list results(PyTuple_GET_SIZE(py_variables));
|
|
for (size_t i = 0; i < results.size(); i++) {
|
|
PyObject* item = PyTuple_GET_ITEM(py_variables, i);
|
|
if (item == Py_None) {
|
|
continue;
|
|
} else if (THPVariable_Check(item)) {
|
|
results[i] = ((THPVariable*)item)->cdata;
|
|
} else {
|
|
// this should never happen, but just in case...
|
|
std::stringstream ss;
|
|
ss << "expected variable but got " << Py_TYPE(item)->tp_name;
|
|
throw std::runtime_error(ss.str());
|
|
}
|
|
}
|
|
return results;
|
|
}
|
|
|
|
static void check_result(PyObject* prev, PyObject* result, PyObject* hook) {
|
|
if (!PyTuple_Check(result)) {
|
|
PyErr_Format(PyExc_TypeError, "expected tuple, but hook returned '%s'",
|
|
THPUtils_typename(result));
|
|
throw python_error();
|
|
}
|
|
|
|
auto prev_size = PyTuple_GET_SIZE(prev);
|
|
auto result_size = PyTuple_GET_SIZE(result);
|
|
if (prev_size != result_size) {
|
|
std::stringstream ss;
|
|
auto name = hook_name(hook);
|
|
ss << "hook '" << name << "' has returned an incorrect number ";
|
|
ss << "of values (got " << result_size << ", but expected ";
|
|
ss << prev_size << ")";
|
|
throw std::runtime_error(ss.str());
|
|
}
|
|
|
|
for (auto i = 0; i < prev_size; i++) {
|
|
check_single_result(PyTuple_GET_ITEM(prev, i), PyTuple_GET_ITEM(result, i), hook);
|
|
}
|
|
}
|
|
|
|
static void check_single_result(PyObject* _original, PyObject* _result, PyObject* hook) {
|
|
if (_result == Py_None) return;
|
|
|
|
if (_original == Py_None) {
|
|
throw std::runtime_error("can't replace a None gradient with a non-None value");
|
|
}
|
|
|
|
if (!PyObject_IsInstance(_result, THPVariableClass)) {
|
|
PyErr_Format(PyExc_TypeError, "expected Variable, but hook returned '%s'",
|
|
THPUtils_typename(_result));
|
|
throw python_error();
|
|
}
|
|
|
|
auto& original = ((THPVariable*)_original)->cdata;
|
|
auto& result = ((THPVariable*)_result)->cdata;
|
|
|
|
if (original.type() != result.type()) {
|
|
std::stringstream ss;
|
|
auto name = hook_name(hook);
|
|
ss << "hook '" << name << "' has changed the type of value (";
|
|
ss << "was " << original.toString() << " got ";
|
|
ss << result.toString() << ")";
|
|
throw std::runtime_error(ss.str());
|
|
}
|
|
|
|
if (original.is_cuda() != result.is_cuda()) {
|
|
std::stringstream ss;
|
|
auto name = hook_name(hook);
|
|
ss << "hook '" << name << "' has changed the type of value";
|
|
if (original.is_cuda()) {
|
|
ss << " (was CUDA tensor got CPU tensor)";
|
|
} else {
|
|
ss << " (was CPU tensor got CUDA tensor)";
|
|
}
|
|
throw std::runtime_error(ss.str());
|
|
}
|
|
|
|
if (original.sizes().vec() != result.sizes().vec()) {
|
|
std::stringstream ss;
|
|
auto name = hook_name(hook);
|
|
ss << "hook '" << name << "' has changed the size of value";
|
|
throw std::runtime_error(ss.str());
|
|
}
|
|
}
|
|
|
|
static std::string hook_name(PyObject* hook) {
|
|
THPObjectPtr name(PyObject_GetAttrString(hook, "__name__"));
|
|
if (name && THPUtils_checkString(name.get())) {
|
|
return THPUtils_unpackString(name.get());
|
|
}
|
|
return "<unknown>";
|
|
}
|