mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Summary: As part of the Variable/Tensor merge work: https://github.com/pytorch/pytorch/issues/13638, we make the following changes in this PR: 1. Remove the `Variable::Impl` class and the `DifferentiableViewImpl` class 2. Change all `Variable.data()` call sites to either use `Variable` directly, or use `Variable.tensor_data()` 3. Remove `Variable.data()` API 3. Add `Variable.variable_data()` that matches `tensor.data` in Python API, which creates a new `Variable` that shares the same storage and tensor metadata with the original `Variable`, but with a completely new autograd history. After this PR, Variable doesn't wrap a Tensor internally anymore, and both Variable and Tensor use the same TensorImpl class as its `impl_`. The only difference is that Variable always has AutogradMeta in its TensorImpl, but Tensor doesn't. **Note that this PR is BC-breaking in the following use cases:** **Use Case 1:** Previously, `x.data = y` works even if `x` and `y` are of different TensorImpl type (e.g. `x` is a CPU dense tensor whose impl is of type TensorImpl, while `y` is a CPU sparse tensor whose impl is of type SparseTensorImpl). However, after this PR, `x.data = y` doesn't work anymore if `x` and `y` are of different TensorImpl type, because the underlying implementation `variable.set_data(tensor)` no longer works if `variable` and `tensor` have different TensorImpl type. **Use Case 2:** If a tensor `x`'s `grad` is sparse, accumulating dense gradients to `x` will change the tensor that `x.grad` is pointing to. This is better illustrated with the following example: ```python params = torch.tensor([1.5, 1.5]).requires_grad_() with torch.no_grad(): # Change gradient to a sparse tensor params.grad = torch.sparse_coo_tensor(torch.tensor([[1, 1]]).long(), torch.tensor([1., 1.])) grad_saved = params.grad params.backward(torch.tensor([1.5, 1.5])) assert id(grad_saved) == id(params.grad) # This will fail after this PR ``` The assertion in the last line will fail after this PR, because adding dense gradients to sparse gradients will change the `params.grad` tensor reference. Pull Request resolved: https://github.com/pytorch/pytorch/pull/17072 Differential Revision: D14075257 Pulled By: yf225 fbshipit-source-id: 0e681df641270dea586042dd26db59f2e76b5957
208 lines
6.0 KiB
C++
208 lines
6.0 KiB
C++
#include <c10/core/TensorImpl.h>
|
|
|
|
#include <c10/core/Backend.h>
|
|
#include <c10/core/WrapDimMinimal.h>
|
|
#include <c10/util/Optional.h>
|
|
|
|
C10_DEFINE_bool(
|
|
caffe2_keep_on_shrink,
|
|
true,
|
|
"If set, keeps memory when a tensor is shrinking its size.");
|
|
|
|
C10_DEFINE_int64(
|
|
caffe2_max_keep_on_shrink_memory,
|
|
LLONG_MAX,
|
|
"The maximum memory in bytes to keep on shrink, if the difference between "
|
|
"tensor sizes is bigger than this then tensor will be reset.");
|
|
|
|
namespace c10 {
|
|
|
|
at::Tensor& TensorImpl::grad() {
|
|
if (autograd_meta()) {
|
|
return autograd_meta()->grad();
|
|
} else {
|
|
AT_ERROR("grad is not implemented for Tensor");
|
|
}
|
|
}
|
|
|
|
const at::Tensor& TensorImpl::grad() const {
|
|
if (autograd_meta()) {
|
|
return autograd_meta()->grad();
|
|
} else {
|
|
AT_ERROR("grad is not implemented for Tensor");
|
|
}
|
|
}
|
|
|
|
TensorImpl::TensorImpl(Storage&& storage, TensorTypeId type_id)
|
|
: TensorImpl(std::move(storage), type_id, storage.dtype(), storage.device()) {}
|
|
|
|
TensorImpl::TensorImpl(TensorTypeId type_id, const caffe2::TypeMeta& data_type, c10::optional<c10::Device> device_opt)
|
|
: TensorImpl({}, type_id, data_type, std::move(device_opt)) {}
|
|
|
|
TensorImpl::TensorImpl(Storage&& storage, TensorTypeId type_id, const caffe2::TypeMeta& data_type,
|
|
c10::optional<c10::Device> device_opt)
|
|
: storage_(std::move(storage)),
|
|
sizes_{0},
|
|
storage_offset_(0),
|
|
numel_(0),
|
|
data_type_(data_type),
|
|
device_opt_(device_opt),
|
|
type_id_(type_id) {
|
|
AT_ASSERT(type_id == UndefinedTensorId() || data_type.id() == caffe2::TypeIdentifier::uninitialized() ||
|
|
device_opt_.has_value());
|
|
// we would also like to check that non-cpu devices have an index, but some Caffe2 operators create
|
|
// Storages with default devices.
|
|
strides_.push_back(1);
|
|
}
|
|
|
|
IntArrayRef TensorImpl::sizes() const {
|
|
return sizes_;
|
|
}
|
|
|
|
IntArrayRef TensorImpl::strides() const {
|
|
return strides_;
|
|
}
|
|
|
|
bool TensorImpl::compute_contiguous() const {
|
|
bool is_contiguous = true;
|
|
if (is_empty())
|
|
return is_contiguous;
|
|
int64_t z = 1;
|
|
for (int64_t d = dim() - 1; d >= 0; d--) {
|
|
if (size(d) != 1) {
|
|
if (stride(d) == z) {
|
|
z *= size(d);
|
|
} else {
|
|
is_contiguous = false;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return is_contiguous;
|
|
}
|
|
|
|
void TensorImpl::release_resources() {
|
|
autograd_meta_.reset();
|
|
if (storage_) {
|
|
storage_ = {};
|
|
}
|
|
}
|
|
|
|
int64_t TensorImpl::dim() const {
|
|
return sizes_.size();
|
|
}
|
|
|
|
int64_t TensorImpl::size(int64_t d) const {
|
|
d = at::maybe_wrap_dim(d, dim(), false);
|
|
return sizes_[d];
|
|
}
|
|
|
|
int64_t TensorImpl::stride(int64_t d) const {
|
|
d = at::maybe_wrap_dim(d, dim(), false);
|
|
return strides_[d];
|
|
}
|
|
|
|
TensorImpl* TensorImpl::maybe_zero_dim(bool condition_when_zero_dim) {
|
|
bool set_zero_dim = condition_when_zero_dim && this->sizes().size() == 1 && this->size(0) == 1;
|
|
if (set_zero_dim) {
|
|
resize_dim(0);
|
|
}
|
|
return this;
|
|
}
|
|
|
|
bool TensorImpl::has_storage() const {
|
|
return storage_;
|
|
}
|
|
|
|
bool TensorImpl::is_contiguous(at::MemoryFormat memory_format) const {
|
|
#ifdef DEBUG
|
|
AT_ASSERT(compute_contiguous() == is_contiguous_);
|
|
#endif
|
|
if (memory_format == at::MemoryFormat::ChannelsLast) {
|
|
if (dim() == 4) {
|
|
auto strides_1 = 1;
|
|
auto strides_3 = sizes_[1];
|
|
auto strides_2 = strides_3 * sizes_[3];
|
|
auto strides_0 = strides_2 * sizes_[2];
|
|
if (strides_0 == strides_[0] && strides_1 == strides_[1] &&
|
|
strides_2 == strides_[2] && strides_3 == strides_[3]) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
return is_contiguous_;
|
|
}
|
|
|
|
const Storage& TensorImpl::storage() const {
|
|
return storage_;
|
|
}
|
|
|
|
static void deletePlacementDeleteContext(void* ptr) {
|
|
delete static_cast<PlacementDeleteContext*>(ptr);
|
|
}
|
|
|
|
at::DataPtr PlacementDeleteContext::makeDataPtr(
|
|
at::DataPtr&& data_ptr,
|
|
PlacementDtor placement_dtor,
|
|
size_t size,
|
|
at::Device device) {
|
|
auto* ptr = data_ptr.get();
|
|
return {ptr,
|
|
new PlacementDeleteContext(std::move(data_ptr), placement_dtor, size),
|
|
&deletePlacementDeleteContext,
|
|
device};
|
|
}
|
|
|
|
AutogradMetaInterface::~AutogradMetaInterface() {}
|
|
|
|
/// NOTE [ Treating Variables as non-Variables in type dispatch ]
|
|
///
|
|
/// Previously, in VariableType_*.cpp (generated by gen_variable_type.py), when
|
|
/// a function is using the 'use_derived' strategy, we call its implementation
|
|
/// on the base non-Variable type (`baseType`), passing unwrapped tensors to the
|
|
/// call so that any `.dispatch_type()` calls in the implementation can treat the passed
|
|
/// tensors as non-Variables and won't dispatch back to functions in VariableType.
|
|
///
|
|
/// However, after the Variable/Tensor merge, there is no concept of unwrapping
|
|
/// a tensor anymore, and directly passing variables to the base type calls will
|
|
/// cause the `.dispatch_type()` dispatch in the implementation to treat the tensor as a
|
|
/// variable, and any function dispatch based on `.dispatch_type()` will dispatch back to
|
|
/// VariableType, which is not what we want.
|
|
///
|
|
/// The solution to the above problem is to add `at::NonVariableTypeMode`, which
|
|
/// when enabled will cause `legacyTensorType()` and `getType()` to always return
|
|
/// non-Variable type, even if the tensor being called on is a variable.
|
|
///
|
|
/// TODO: Since `torch::NoGradGuard` serves the same purpose in libtorch, we should
|
|
/// merge these two thread-local guards.
|
|
|
|
/// In the CAFFE2_FB_LIMITED_MOBILE_CAPABILITY build setting,
|
|
/// thread_local is not supported. In that case, we don't provide
|
|
/// `at::NonVariableTypeMode`.
|
|
#ifndef CAFFE2_FB_LIMITED_MOBILE_CAPABILITY
|
|
|
|
thread_local bool NonVariableTypeMode_enabled = false;
|
|
|
|
bool NonVariableTypeMode::is_enabled() {
|
|
return NonVariableTypeMode_enabled;
|
|
}
|
|
|
|
void NonVariableTypeMode::set_enabled(bool enabled) {
|
|
NonVariableTypeMode_enabled = enabled;
|
|
}
|
|
|
|
#else // defined(CAFFE2_FB_LIMITED_MOBILE_CAPABILITY)
|
|
|
|
bool NonVariableTypeMode::is_enabled() {
|
|
throw std::runtime_error("NonVariableTypeMode is not supported on mobile");
|
|
}
|
|
|
|
void NonVariableTypeMode::set_enabled(bool enabled) {
|
|
throw std::runtime_error("NonVariableTypeMode is not supported on mobile");
|
|
}
|
|
|
|
#endif
|
|
|
|
} // namespace c10
|