#pragma once #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // A global boolean variable to control whether we free memory when a Tensor // is shrinked to a smaller size. As a result, a Tensor is always going to // keep the memory allocated for its maximum capacity reshaped to so far. // // This parameter is respected "upper-case" methods which call Resize() // (e.g., CopyFrom, ResizeLike); it is NOT respected by Tensor::resize_ // or ShrinkTo, both of which guarantee to never to free memory. C10_DECLARE_bool(caffe2_keep_on_shrink); // Since we can have high variance in blob memory allocated across different // inputs in the same run, we will shrink the blob only if the memory gain // is larger than this flag in bytes. This only applies to functions which // respect caffe2_keep_on_shrink. C10_DECLARE_int64(caffe2_max_keep_on_shrink_memory); namespace at { class Tensor; } namespace c10 { class Scalar; struct Storage; /** * A utility function to convert vector to vector. */ inline std::vector ToVectorint64_t(ArrayRef src) { return std::vector(src.begin(), src.end()); } /** * Return product of all dimensions starting from k */ inline int64_t size_from_dim_(int k, IntArrayRef dims) { int64_t r = 1; for (size_t i = k; i < dims.size(); ++i) { r *= dims[i]; } return r; } // Product of all dims up to k (not including dims[k]) inline int64_t size_to_dim_(int k, IntArrayRef dims) { TORCH_CHECK((unsigned)k <= dims.size()); int64_t r = 1; for (int i = 0; i < k; ++i) { r *= dims[i]; } return r; } // Product of all dims between k and l (not including dims[k] and dims[l]) inline int64_t size_between_dim_(int k, int l, IntArrayRef dims) { TORCH_CHECK((unsigned)l < dims.size()); int64_t r = 1; if (k < l) { for (int i = k + 1; i < l; ++i) { r *= dims[i]; } } else { for (int i = l + 1; i < k; ++i) { r *= dims[i]; } } return r; } // Wrap around axis_index if it is negative, s.t., -1 is the last dim inline int canonical_axis_index_(int axis_index, int ndims) { TORCH_CHECK(axis_index >= -ndims); TORCH_CHECK(axis_index < ndims); if (axis_index < 0) { return axis_index + ndims; } return axis_index; } using PlacementDtor = void (*)(void*, size_t); /* * A Context that will call extra placement deleter during * deconstruction. * * Accept a already constructed DataPtr and store it as member * during destruction, we'll call extra deleter on the underlying * data pointer before the DataPtr is destructed. * `data_ptr_` owns the memory. */ struct C10_API PlacementDeleteContext { DataPtr data_ptr_; PlacementDtor placement_dtor_; size_t size_; PlacementDeleteContext( DataPtr&& data_ptr, PlacementDtor placement_dtor, size_t size) : data_ptr_(std::move(data_ptr)), placement_dtor_(placement_dtor), size_(size) {} static DataPtr makeDataPtr( DataPtr&& data_ptr, PlacementDtor placement_dtor, size_t size, Device device); ~PlacementDeleteContext() { placement_dtor_(data_ptr_.get(), size_); // original memory will be freed when data_ptr_ is destructed } }; struct TensorImpl; struct C10_API AutogradMetaInterface { virtual void set_requires_grad(bool requires_grad, at::TensorImpl* self_impl) = 0; virtual bool requires_grad() const = 0; virtual at::Tensor& grad() = 0; virtual const at::Tensor& grad() const = 0; virtual ~AutogradMetaInterface(); }; struct C10_API NonVariableTypeMode { static bool is_enabled(); static void set_enabled(bool enabled); }; // NOTE [ Version Counter Sharing ] // // Every Tensor has a version counter. Version counters are incremented whenever the // data or size of a tensor changes through in-place Variable operations. Version // counters are used to detect modifications to saved variables which would result in // incorrect gradient calculations. Version counters may be shared between Variables: // // 1. A view shares the version counter of the base Variable, // 2. `x.detach()` shares the version counter of `x`, // 3. Unpacked saved variables share the version counter of the source. // // Version counters are not shared in these scenarios: // // 1. When we replace a `Variable`'s underlying `Tensor` by calling `set_data(...)`, // 2. `x.data` does not share the version counter of `x`. (See discussion at // https://github.com/pytorch/pytorch/issues/5396) // // Question: Why do we put the version counter in TensorImpl instead of AutogradMeta? // // Answer: After the Variable/Tensor merge, a tensor will not have AutogradMeta when // its `requires_grad_` is false, but when we use this tensor in the forward pass of // a function that requires saving this tensor for backward, we need to keep track of // this tensor's version to make sure it's always valid in the autograd graph. // // To achieve this goal, we put the version counter in TensorImpl instead of AutogradMeta, // and have it always be available. This allows us to have the optimization of not // carrying AutogradMeta when a tensor doesn't require gradient. // // A hypothetical alternative way to achieve this goal is to initialize AutogradMeta and // create the version counter for the non-requires-grad tensor only when it's saved for // backward. However, since saving a tensor for backward happens in the forward pass, and // our invariant is that forward pass needs to be thread-safe, lazy-initializing AutogradMeta // when saving a tensor can introduce race conditions when we are running the forward // pass in multi-thread scenarios, thus making the forward pass not thread-safe anymore, // which breaks the invariant. struct C10_API VariableVersion { public: // NOTE: As of C++11 and 14, default-constructing a std::atomic variable // leaves it in a persistently undefined state. See // https://cplusplus.github.io/LWG/issue2334. VariableVersion(uint32_t version = 0) : version_block_(std::make_shared>(version)) {} void bump() noexcept { version_block_->fetch_add(1); } uint32_t current_version() const noexcept { return version_block_->load(); } private: std::shared_ptr> version_block_; }; /** * The low-level representation of a tensor, which contains a pointer * to a storage (which contains the actual data) and metadata (e.g., sizes and * strides) describing this particular view of the data as a tensor. * * Some basic characteristics about our in-memory representation of * tensors: * * - It contains a pointer to a storage struct (Storage/StorageImpl) * which contains the pointer to the actual data and records the * data type and device of the view. This allows multiple tensors * to alias the same underlying data, which allows to efficiently * implement differing *views* on a tensor. * * - The tensor struct itself records view-specific metadata about * the tensor, e.g., sizes, strides and offset into storage. * Each view of a storage can have a different size or offset. * * - This class is intrusively refcounted. It is refcounted so that * we can support prompt deallocation of large tensors; it is * intrusively refcounted so that we can still perform reference * counted operations on raw pointers, which is often more convenient * when passing tensors across language boundaries. * * - For backwards-compatibility reasons, a tensor may be in an * uninitialized state. A tensor may be uninitialized in the following * two ways: * * - A tensor may be DTYPE UNINITIALIZED. A tensor of this * form has an uninitialized dtype. This situation most * frequently arises when a user writes Tensor x(CPU). The dtype and * is subsequently initialized when mutable_data() is * invoked for the first time. * * - A tensor may be STORAGE UNINITIALIZED. A tensor of this form * has non-zero size, but has a storage with a null data pointer. * This situation most frequently arises when a user calls * Resize() or FreeMemory(). This is because Caffe2 historically * does lazy allocation: allocation of data doesn't occur until * mutable_data() is invoked. A tensor with zero size is * always storage initialized, because no allocation is necessary * in this case. * * All combinations of these two uninitialized states are possible. * Consider the following transcript in idiomatic Caffe2 API: * * Tensor x(CPU); // x is storage-initialized, dtype-UNINITIALIZED * x.Resize(4); // x is storage-UNINITIALIZED, dtype-UNINITIALIZED * x.mutable_data(); // x is storage-initialized, dtype-initialized * x.FreeMemory(); // x is storage-UNINITIALIZED, dtype-initialized. * * All other fields on tensor are always initialized. In particular, * size is always valid. (Historically, a tensor declared as Tensor x(CPU) * also had uninitialized size, encoded as numel == -1, but we have now * decided to default to zero size, resulting in numel == 0). * * Uninitialized storages MUST be uniquely owned, to keep our model * simple. Thus, we will reject operations which could cause an * uninitialized storage to become shared (or a shared storage to * become uninitialized, e.g., from FreeMemory). * * In practice, tensors which are storage-UNINITIALIZED and * dtype-UNINITIALIZED are *extremely* ephemeral: essentially, * after you do a Resize(), you basically always call mutable_data() * immediately afterwards. Most functions are not designed to * work if given a storage-UNINITIALIZED, dtype-UNINITIALIZED tensor. * * We intend to eliminate all uninitialized states, so that every * tensor is fully initialized in all fields. Please do not write new code * that depends on these uninitialized states. */ struct C10_API TensorImpl : public c10::intrusive_ptr_target { TensorImpl() = delete; /** * Construct a 1-dim 0-size tensor backed by the given storage. */ TensorImpl(Storage&& storage, TensorTypeId type_id); /** * Construct a 1-dim 0 size tensor that doesn't have a storage. */ TensorImpl(TensorTypeId type_id, const caffe2::TypeMeta& data_type, c10::optional device_opt); private: // This constructor is private, because the data_type is redundant with // storage. Still, we pass it in separately because it's easier to write // the initializer list if we're not worried about storage being moved out // from under us. TensorImpl(Storage&& storage, TensorTypeId type_id, const caffe2::TypeMeta& data_type, c10::optional); public: TensorImpl(const TensorImpl&) = delete; TensorImpl& operator=(const TensorImpl&) = delete; TensorImpl(TensorImpl&&) = default; TensorImpl& operator=(TensorImpl&&) = default; /** * Release (decref) storage, and any other external allocations. This * override is for `intrusive_ptr_target` and is used to implement weak * tensors. */ virtual void release_resources() override; // TODO: Ideally, type_id() would be the *only* key we need to consult // to do a dispatch, instead of having to grovel through three different // variables. Here's what's standing in the way: // // - To eliminate ScalarType, we have to allocate a TensorTypeId for // each ScalarType+Backend combination, and then set it appropriately // when we initially allocate a TensorImpl. // // - To eliminate is_variable, we have to allocate two classes of // TensorTypeId: ones that are variables, and ones that are not. // We may not want to eliminate this in the short term, because // hard-coding variable status into type_id() makes it more difficult // to do the "thread-local no_grad" trick (where we process Variables // "as if" they were non-Variables by setting a thread local variable.) // // TODO: type() is a very attractive name for a method, but we don't // actually want people to use it. Rename this to something else. /** * Return the TensorTypeId corresponding to this Tensor. In the future, * this will be the sole piece of information required to dispatch * to an operator; however, at the moment, it is not used for * dispatch. * * type_id() and type() are NOT in one-to-one correspondence; we only * have a single type_id() for CPU tensors, but many Types (CPUFloatTensor, * CPUDoubleTensor...) */ TensorTypeId type_id() const { return type_id_; } /** * Return a reference to the sizes of this tensor. This reference remains * valid as long as the tensor is live and not resized. */ virtual IntArrayRef sizes() const; /** * Return a reference to the strides of this tensor. This reference remains * valid as long as the tensor is live and not restrided. */ virtual IntArrayRef strides() const; /** * Return the number of dimensions of this tensor. Note that 0-dimension * represents a Tensor that is a Scalar, e.g., one that has a single element. */ virtual int64_t dim() const; /** * True if this tensor has storage. See storage() for details. */ virtual bool has_storage() const; /** * Return the underlying storage of a Tensor. Multiple tensors may share * a single storage. A Storage is an impoverished, Tensor-like class * which supports far less operations than Tensor. * * Avoid using this method if possible; try to use only Tensor APIs to perform * operations. */ virtual const Storage& storage() const; /** * The number of elements in a tensor. * * WARNING: Previously, if you were using the Caffe2 API, you could * test numel() == -1 to see if a tensor was uninitialized. This * is no longer true; numel always accurately reports the product * of sizes of a tensor. */ virtual int64_t numel() const { #ifdef DEBUG TORCH_INTERNAL_ASSERT(compute_numel() == numel_); #endif return numel_; } /** * Whether or not a tensor is laid out in contiguous memory. * * Tensors with non-trivial strides are not contiguous. See * compute_contiguous() for the exact definition of whether or not * a tensor is contiguous or not. */ virtual bool is_contiguous(at::MemoryFormat memory_format=at::MemoryFormat::Any) const; bool is_sparse() const { // NB: This method is not virtual and avoid dispatches for performance reasons. auto tid = type_id(); // NB: At the moment, variables have the same TensorTypeId as their // corresponding tensor, but if this ever changes, we need to modify this. return tid == SparseCPUTensorId() || tid == SparseCUDATensorId() || tid == SparseHIPTensorId(); } bool is_quantized() const { // NB: This method is not virtual and avoid dispatches for performance reasons. auto tid = type_id(); // NB: At the moment, variables have the same TensorTypeId as their // corresponding tensor, but if this ever changes, we need to modify this. return tid == QuantizedCPUTensorId(); } bool is_cuda() const { // NB: This method is not virtual and avoid dispatches for performance reasons. auto tid = type_id(); // NB: At the moment, variables have the same TensorTypeId as their // corresponding tensor, but if this ever changes, we need to modify this. return tid == CUDATensorId() || tid == SparseCUDATensorId(); } bool is_hip() const { // NB: This method is not virtual and avoid dispatches for performance reasons. auto tid = type_id(); // NB: At the moment, variables have the same TensorTypeId as their // corresponding tensor, but if this ever changes, we need to modify this. return tid == HIPTensorId() || tid == SparseHIPTensorId(); } bool is_mkldnn() const { return type_id() == MkldnnCPUTensorId(); } int64_t get_device() const { TORCH_CHECK( device_opt_.has_value(), "tensor with backend ", toString(tensorTypeIdToBackend(type_id())), " does not have a device"); // See NOTE [c10::optional operator usage in CUDA] return (*device_opt_).index(); } Device device() const { TORCH_CHECK( device_opt_.has_value(), "tensor with backend ", toString(tensorTypeIdToBackend(type_id())), " does not have a device"); // See NOTE [c10::optional operator usage in CUDA] return *device_opt_; } Layout layout() const { // NB: This method is not virtual and avoid dispatches for perf. if (is_sparse()) { return kSparse; } else if (is_mkldnn()) { return kMkldnn; } else { return kStrided; } } /** * If `condition_when_zero_dim` is true, and the tensor is a 1-dim, 1-size * tensor, reshape the tensor into a 0-dim tensor (scalar). * * This helper function is called from generated wrapper code, to help * "fix up" tensors that legacy code didn't generate in the correct shape. * For example, suppose that we have a legacy function 'add' which produces * a tensor which is the same shape as its inputs; however, if the inputs * were zero-dimensional, it produced a 1-dim 1-size tensor (don't ask). * result->maybe_zero_dim(lhs->dim() == 0 && rhs->dim() == 0) will be called, * correctly resetting the dimension to 0 when when the inputs had 0-dim. * * As we teach more and more of TH to handle 0-dim correctly, this function * will become less necessary. At the moment, it is often called from functions * that correctly handle the 0-dim case, and is just dead code in this case. * In the glorious future, this function will be eliminated entirely. */ virtual TensorImpl* maybe_zero_dim(bool condition_when_zero_dim); /** * True if a tensor was auto-wrapped from a C++ or Python number. * For example, when you write 't + 2', 2 is auto-wrapped into a Tensor * with `is_wrapped_number_` set to true. * * Wrapped numbers do not participate in the result type computation for * mixed-type operations if there are any Tensors that are not wrapped * numbers. This is useful, because we want 't + 2' to work with * any type of tensor, not just LongTensor (which is what integers * in Python represent). * * Otherwise, they behave like their non-wrapped equivalents. * See [Result type computation] in TensorIterator.h. * * Why did we opt for wrapped numbers, as opposed to just having * an extra function add(Tensor, Scalar)? This helps greatly reduce * the amount of code we have to write for add, when actually * a Tensor-Scalar addition is really just a Tensor-Tensor * addition when the RHS is 0-dim (except for promotion behavior.) */ bool is_wrapped_number() const { return is_wrapped_number_; } /** * Set whether or not a tensor was auto-wrapped from a C++ or Python * number. You probably don't want to call this, unless you are * writing binding code. */ void set_wrapped_number(bool value) { TORCH_INTERNAL_ASSERT(dim() == 0); is_wrapped_number_ = value; } // ~~~~~ Autograd API ~~~~~ // Some methods below are defined in TensorImpl.cpp because Tensor is an // incomplete type. // // Note [Tensor versus Variable in C++] // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // Autograd methods are only valid for Variables (i.e. Tensors that contain // autograd metadata). /** * Set whether or not a tensor requires gradient. * * It is only valid to call this method on a Variable. * See Note [Tensor versus Variable in C++]. */ void set_requires_grad(bool requires_grad) { TORCH_INTERNAL_ASSERT(autograd_meta(), "set_requires_grad is not implemented for Tensor"); autograd_meta()->set_requires_grad(requires_grad, this); } /** * True if a tensor requires gradient. Tensors which require gradient * have history tracked for any operations performed on them, so that * we can automatically differentiate back to them. A tensor that * requires gradient and has no history is a "leaf" tensor, which we * accumulate gradients into. * * It is only valid to call this method on a Variable. * See Note [Tensor versus Variable in C++]. */ bool requires_grad() const { TORCH_INTERNAL_ASSERT(autograd_meta(), "requires_grad is not implemented for Tensor"); return autograd_meta()->requires_grad(); } /** * Return a mutable reference to the gradient. This is conventionally * used as `t.grad() = x` to set a gradient to a completely new tensor. * * It is only valid to call this method on a Variable. * See Note [Tensor versus Variable in C++]. */ at::Tensor& grad(); /** * Return the accumulated gradient of a tensor. This gradient is written * into when performing backwards, when this tensor is a leaf tensor. * * It is only valid to call this method on a Variable. * See Note [Tensor versus Variable in C++]. */ const at::Tensor& grad() const; /** * Return a typed data pointer to the actual data which this tensor refers to. * This checks that the requested type (from the template parameter) matches * the internal type of the tensor. * * It is invalid to call data() on a dtype-uninitialized tensor, even if * the size is 0. * * WARNING: If a tensor is not contiguous, you MUST use strides when * performing index calculations to determine the location of elements in * the tensor. We recommend using 'TensorAccessor' to handle this computation * for you; this class is available from 'Tensor'. */ template inline T * data() const { TORCH_CHECK(has_storage(), "Cannot access data pointer of Tensor that doesn't have storage"); TORCH_CHECK( storage_initialized(), "The tensor has a non-zero number of elements, but its data is not allocated yet. " "Caffe2 uses a lazy allocation, so you will need to call " "mutable_data() or raw_mutable_data() to actually allocate memory."); TORCH_CHECK( storage_.IsType(), "Tensor type mismatch, caller expects elements to be ", caffe2::TypeMeta::TypeName(), ", while tensor contains ", data_type_.name(), ". "); // We managed the type check ourselves return storage_.unsafe_data() + storage_offset_; } /** * Return a void* data pointer to the actual data which this tensor refers to. * * It is invalid to call data() on a dtype-uninitialized tensor, even if the * size is 0. * * WARNING: The data pointed to by this tensor may not contiguous; do NOT * assume that itemsize() * numel() is sufficient to compute the bytes that * can be validly read from this tensor. */ inline void* data() const { TORCH_CHECK(has_storage(), "Cannot access data pointer of Tensor that doesn't have storage"); TORCH_CHECK(dtype_initialized(), "Cannot access data pointer of Tensor that doesn't have initialized dtype " "(e.g., caffe2::Tensor x(CPU), prior to calling mutable_data() on x)"); return static_cast( static_cast(storage_.data()) + data_type_.itemsize() * storage_offset_); } /** * Like data(), but performs no checks. You are responsible for ensuring * that all invariants required by data() are upheld here. */ template inline T * unsafe_data() const { return storage_.unsafe_data() + storage_offset_; } /** * Returns the TypeMeta of a tensor, which describes what data type * it is (e.g., int, float, ...) */ const caffe2::TypeMeta& dtype() const { return data_type_; } /** * Return the size of a single element of this tensor in bytes. */ size_t itemsize() const { TORCH_CHECK(dtype_initialized(), "Cannot report itemsize of Tensor that doesn't have initialized dtype " "(e.g., caffe2::Tensor x(CPU), prior to calling mutable_data() on x)"); return data_type_.itemsize(); } /** * Return the offset in number of elements into the storage that this * tensor points to. Most tensors have storage_offset() == 0, but, * for example, an index into a tensor will have a non-zero storage_offset(). * * WARNING: This is NOT computed in bytes. * * XXX: The only thing stopping this function from being virtual is Variable. */ virtual int64_t storage_offset() const { return storage_offset_; } /** * True if a tensor has no elements (e.g., numel() == 0). */ inline bool is_empty() const { return numel() == 0; } /** * Change the dimensionality of a tensor. This is truly a resize: * old sizes, if they are still valid, are preserved (this invariant * is utilized by some call-sites, e.g., the implementation of squeeze, which * mostly wants the sizes to stay the same). New dimensions are given zero * size and zero stride; this is probably not what you want--you should * set_size/set_stride afterwards. * * TODO: This should be jettisoned in favor of `set_sizes_and_strides`, * which is harder to misuse. */ virtual void resize_dim(int64_t ndim) { TORCH_CHECK(allow_tensor_metadata_change(), "resize_dim is not allowed on Tensor created from .data or .detach()"); sizes_.resize(ndim, 0); strides_.resize(ndim, 0); refresh_numel(); refresh_contiguous(); } /** * Change the size at some dimension. This DOES NOT update strides; * thus, most changes to size will not preserve contiguity. You probably * also want to call set_stride() when you call this. * * TODO: This should be jettisoned in favor of `set_sizes_and_strides`, * which is harder to misuse. */ virtual void set_size(int64_t dim, int64_t new_size) { TORCH_CHECK(allow_tensor_metadata_change(), "set_size is not allowed on Tensor created from .data or .detach()"); sizes_.at(dim) = new_size; refresh_numel(); refresh_contiguous(); } /** * Change the stride at some dimension. * * TODO: This should be jettisoned in favor of `set_sizes_and_strides`, * which is harder to misuse. */ virtual void set_stride(int64_t dim, int64_t new_stride) { TORCH_CHECK(allow_tensor_metadata_change(), "set_stride is not allowed on Tensor created from .data or .detach()"); strides_[dim] = new_stride; refresh_numel(); refresh_contiguous(); } /** * Set the offset into the storage of this tensor. * * WARNING: This does NOT check if the tensor is in bounds for the new * location at the storage; the caller is responsible for checking this * (and resizing if necessary.) */ virtual void set_storage_offset(int64_t storage_offset) { TORCH_CHECK(allow_tensor_metadata_change(), "set_storage_offset is not allowed on Tensor created from .data or .detach()"); storage_offset_ = storage_offset; } /** * Like set_sizes_and_strides but assumes contiguous strides. * * WARNING: This function does not check if the requested * sizes/strides are in bounds for the storage that is allocated; * this is the responsibility of the caller */ void set_sizes_contiguous(IntArrayRef new_size) { TORCH_CHECK(allow_tensor_metadata_change(), "set_sizes_contiguous is not allowed on Tensor created from .data or .detach()"); auto old_dim = sizes_.size(); auto new_dim = new_size.size(); sizes_.resize(new_dim); for (size_t dim = 0; dim < new_dim; ++dim) { sizes_[dim] = new_size[dim]; } update_to_contiguous_strides(old_dim); refresh_numel(); } /** * Set the sizes and strides of a tensor. * * WARNING: This function does not check if the requested * sizes/strides are in bounds for the storage that is allocated; * this is the responsibility of the caller */ void set_sizes_and_strides(IntArrayRef new_size, IntArrayRef new_stride) { TORCH_CHECK(allow_tensor_metadata_change(), "set_sizes_and_strides is not allowed on Tensor created from .data or .detach()"); TORCH_CHECK( new_size.size() == new_stride.size(), "dimensionality of sizes (", new_size.size(), ") must match dimensionality of strides (", new_stride.size(), ")"); auto new_dim = new_size.size(); sizes_.resize(new_dim); for (size_t dim = 0; dim < new_dim; ++dim) { sizes_[dim] = new_size[dim]; } strides_.resize(new_dim); if (new_dim > 0) { for (size_t dim = new_dim - 1; ; dim--) { if (new_stride[dim] >= 0) { strides_[dim] = new_stride[dim]; } else { // XXX: This behavior is surprising and may need to be removed to // support negative strides. Some pytorch functions rely on it: // for example, torch.cat (run TestTorch.test_cat_empty). if (dim == new_dim - 1) { strides_[dim] = 1; } else { // Keep stride monotonically increasing to match NumPy. strides_[dim] = std::max(sizes_[dim + 1], 1) * strides_[dim + 1]; } } if (dim == 0) break; } } refresh_numel(); refresh_contiguous(); } /** * Return the size of a tensor at some dimension. */ virtual int64_t size(int64_t d) const; /** * Return the stride of a tensor at some dimension. */ virtual int64_t stride(int64_t d) const; /** * True if a tensor is a variable. See Note [Tensor versus Variable in C++] */ bool is_variable() const { return autograd_meta_ != nullptr && !at::NonVariableTypeMode::is_enabled(); } /** * Set whether a tensor allows changes to its metadata (e.g. sizes / strides / storage / storage_offset). * See NOTE [ Metadata Change for a Detached Tensor ] for details. */ virtual void set_allow_tensor_metadata_change(bool value) { allow_tensor_metadata_change_ = value; } /** * True if a tensor allows changes to its metadata (e.g. sizes / strides / storage / storage_offset). * See NOTE [ Metadata Change for a Detached Tensor ] for details. */ virtual bool allow_tensor_metadata_change() const { return allow_tensor_metadata_change_; } /** * Set the pointer to autograd metadata. */ void set_autograd_meta(std::unique_ptr autograd_meta) { autograd_meta_ = std::move(autograd_meta); } /** * Return the pointer to autograd metadata. */ c10::AutogradMetaInterface* autograd_meta() const { return autograd_meta_.get(); } /** * Detach the autograd metadata unique_ptr from this tensor, and return it. */ std::unique_ptr detach_autograd_meta() { return std::move(autograd_meta_); } // NOTE [ TensorImpl Shallow-Copying ] // // TensorImpl shallow-copying is used when we want to have two Variables share the same storage pointer // and tensor metadata, but each with a different autograd history. Example call sites: // // 1. `var_detached = var.detach()` uses `shallow_copy_and_detach()` to create `var_detached` that shares // the same storage pointer and tensor metadata with `var`, but with a completely new autograd history. // 2. `var.set_data(tensor)` uses `shallow_copy_from()` to copy storage pointer and tensor metadata from // `tensor` into `var`, while keeping `var`'s original AutogradMeta. // // Functions that shallow-copy a TensorImpl (such as `shallow_copy_and_detach()` / `shallow_copy_from()` / // `copy_tensor_data()`) copy the storage pointer and the tensor metadata fields (e.g. sizes / strides / // storage_offset) by value. However, the following fields are not copied: // // 1. the AutogradMeta pointer, because it is unique for each Variable. // 2. the version counter, because the destination TensorImpl's version counter is either set to the // passed-in `version_counter` (in `shallow_copy_and_detach()` and `copy_tensor_data()`), or it is kept // intact (in `shallow_copy_from()`). See NOTE [ Version Counter Sharing ] for details. // // In `shallow_copy_and_detach()` and `copy_tensor_data()`, the passed-in `allow_tensor_metadata_change` // determines whether the TensorImpl shallow-copy allows changes to its metadata (e.g. sizes / strides / // storage / storage_offset). See NOTE [ Metadata Change for a Detached Tensor ] for details. // // In `shallow_copy_from()`, we don't check the destination TensorImpl's `allow_tensor_metadata_change_`, // because `shallow_copy_from()` is used for implementing functions such as `var.set_data(tensor)`, which // changes `var`'s tensor metadata and expects its `allow_tensor_metadata_change_` to be ignored. /** * Return a TensorImpl that is a shallow-copy of this TensorImpl. * * For usage of `version_counter` and `allow_tensor_metadata_change`, * see NOTE [ TensorImpl Shallow-Copying ]. */ virtual c10::intrusive_ptr shallow_copy_and_detach( const c10::VariableVersion& version_counter, bool allow_tensor_metadata_change) const { auto impl = c10::make_intrusive(Storage(storage()), type_id()); copy_tensor_data( /*src_impl=*/this, /*dest_impl=*/impl.get(), /*version_counter=*/version_counter, /*allow_tensor_metadata_change=*/allow_tensor_metadata_change); impl->refresh_numel(); impl->refresh_contiguous(); return impl; } /** * Shallow-copies data from another TensorImpl into this TensorImpl. * * For why this function doesn't check this TensorImpl's `allow_tensor_metadata_change_`, * see NOTE [ TensorImpl Shallow-Copying ]. */ virtual void shallow_copy_from(const c10::intrusive_ptr& impl) { copy_tensor_data( /*src_impl=*/impl.get(), /*dest_impl=*/this, /*version_counter=*/version_counter(), /*allow_tensor_metadata_change=*/allow_tensor_metadata_change()); refresh_numel(); refresh_contiguous(); } void set_version_counter( const c10::VariableVersion& version_counter) noexcept { version_counter_ = version_counter; } const c10::VariableVersion& version_counter() const noexcept { return version_counter_; } void bump_version() noexcept { version_counter_.bump(); } inline void set_pyobj(PyObject* pyobj) noexcept { pyobj_ = pyobj; } inline PyObject* pyobj() const noexcept { return pyobj_; } private: // See NOTE [c10::optional operator usage in CUDA] // We probably don't want to expose this publically until // the note is addressed. c10::optional device_opt() const { return device_opt_; } public: /** * The device type of a Tensor, e.g., DeviceType::CPU or DeviceType::CUDA. */ DeviceType device_type() const { // TODO: A useful internal assert would be to show that device_opt_ is null // only if you are an undefined tensor TORCH_CHECK(device_opt_.has_value(), "device_type cannot be run on undefined Tensor"); // See NOTE [c10::optional operator usage in CUDA] return (*device_opt_).type(); } /** * @brief Extends the outer-most dimension of this tensor by num elements, * preserving the existing data. * * The underlying data may be reallocated in order to accommodate the new * elements, in which case this tensors' capacity is grown at a factor of * growthPct. This ensures that Extend runs on an amortized O(1) time * complexity. * * This op is auto-asynchronous if the underlying device (CUDA) supports it. */ void Extend(int64_t num, float growthPct) { TORCH_CHECK(sizes_.size() >= 1u); TORCH_CHECK(num >= 0, "`num` must be non-negative for Extend"); TORCH_CHECK( is_contiguous_, "Right now Extend is only supported for contiguous Tensor."); auto newDims = sizes_; newDims[0] += num; if (!storage_.data()) { Resize(newDims); return; } auto newNumel = std::accumulate( newDims.begin(), newDims.end(), static_cast(1), std::multiplies()); if (newNumel * storage_.itemsize() <= storage_.capacity()) { sizes_ = newDims; numel_ = newNumel; return; } auto newCapacity = sizes_; newCapacity[0] = std::max( newDims[0], std::ceil(sizes_[0] * (growthPct + 100) / 100)); auto oldData = std::move(storage_.data_ptr()); auto oldSize = numel_; auto oldDims = sizes_; Resize(newCapacity); auto* newData = raw_mutable_data(data_type_); if (data_type_.copy()) { TORCH_CHECK( device_type() == DeviceType::CPU, "non-POD types work only on CPU"); data_type_.copy()(oldData.get(), newData, oldSize); } else { // The following copy uses the current (thread local) stream for copying // and also takes the GPU id from the device() field passed in. // // TODO: Potentially more enforcements are necessary to avoid accidental // switch to sync copy if the currently set device is wrong. // // Specifically, we might need to switch to a different context device // here explicitly to avoid relying on user synchronizing things // properly. CopyBytes( oldSize * itemsize(), oldData.get(), device(), newData, device(), true); // non-blocking } reserved_ = true; sizes_ = newDims; numel_ = newNumel; } /** * @brief Reserve space for the underlying tensor. * * This must be called after Resize(), since we only specify the first * dimension This does not copy over the old data to the newly allocated space */ template void ReserveSpace(const T& outer_dim) { TORCH_CHECK( is_contiguous_, "Right now ReserveSpace is only supported for contiguous Tensor."); TORCH_CHECK( storage_.unique(), "Can't call ReserveSpace on shared storage."); auto newCapacity = sizes_; newCapacity[0] = outer_dim; auto newNumel = std::accumulate( newCapacity.begin(), newCapacity.end(), static_cast(1), std::multiplies()); if (newNumel * storage_.itemsize() <= storage_.capacity()) { return; } // Old data is discarded storage_.data_ptr().clear(); auto oldSize = numel_; auto oldDims = sizes_; Resize(newCapacity); // Allocate new memory but don't copy over the data raw_mutable_data(data_type_); sizes_ = oldDims; numel_ = oldSize; reserved_ = true; } /** * @brief Resizes a tensor. * * Resize takes in a vector of ints specifying the dimensions of the tensor. * You can pass in an empty vector to specify that it is a scalar (i.e. * containing one single item). * * The underlying storage may be deleted after calling Resize: if the new * shape leads to a different number of items in the tensor, the old memory * is deleted and new memory will be allocated next time you call * mutable_data(). However, if the shape is different but the total number of * items is the same, the underlying storage is kept. * * This method respects caffe2_keep_on_shrink. Consult the internal logic * of this method to see exactly under what circumstances this flag matters. */ template void Resize(Ts... dim_source) { bool size_changed = SetDims(dim_source...); if (size_changed) { // If needed, we will free the data. the next mutable_data() call // will create the data storage. bool reset_tensor = false; if (reserved_) { // If tensor is reserved then don't claim its memeory unless capacity() // is smaller than new size reset_tensor = storage_.capacity() < (storage_offset_ + numel_) * storage_.itemsize(); } else { reset_tensor = storage_.capacity() < (storage_offset_ + numel_) * storage_.itemsize() || !FLAGS_caffe2_keep_on_shrink || storage_.capacity() - (storage_offset_ + numel_) * storage_.itemsize() > static_cast(FLAGS_caffe2_max_keep_on_shrink_memory); } if (reset_tensor && storage_initialized()) { FreeMemory(); } } } /** * Resizes the tensor without touching underlying storage. * This requires the total size of the tensor to remains constant. */ inline void Reshape(const std::vector& dims) { TORCH_CHECK( is_contiguous_, "Right now Reshape is only supported for contiguous Tensor."); int64_t new_size = 1; for (auto d : dims) { TORCH_CHECK(d >= 0); new_size *= d; } TORCH_CHECK( new_size == numel_, "New size and old size are not equal. You cannot use Reshape, " "but should use Resize." // TODO(jiayq): remove the following warning after pending diffs // stabilize. " The old caffe2 mixes Reshape and Resize but this behavior has " "been changed. If you find this error, most likely you will need " "to change corresponding code from Reshape to Resize."); auto old_dim = sizes_.size(); sizes_ = dims; update_to_contiguous_strides(old_dim); } /** * Release whatever memory the tensor was holding but keep size and type * information. Subsequent call to mutable_data will trigger new memory * allocation. */ inline void FreeMemory() { // We'll detach from the old Storage and create a new one storage_ = Storage::create_legacy(storage_.device(), data_type_); storage_offset_ = 0; } /** * @brief Shares the data with another tensor. * * To share data between two tensors, the sizes of the two tensors must be * equal already. The reason we do not implicitly do a Resize to make the two * tensors have the same shape is that we want to allow tensors of different * shapes but the same number of items to still be able to share data. This * allows one to e.g. have a n-dimensional Tensor and a flattened version * sharing the same underlying storage. * * The source tensor should already have its data allocated. */ // To be deprecated void ShareData(const TensorImpl& src) { // Right now, we are assuming the device_type are the same, since it is // inherently the same in the non-templatized code. We should probably add // an assert here which might affect perf a little bit. TORCH_CHECK( src.numel_ == numel_, "Size mismatch - did you call reshape before sharing the data?"); // It is possible that the source tensor hasn't called mutable_data() yet, // in which case ShareData() doesn't make much sense since we don't really // know what to share yet. // TODO: Add the assert after all uninitialized states are eliminated // TORCH_CHECK(src.dtype_initialized(), // "Source tensor don't have a data type (did you call mutable_data on the tensor?)"); if (!src.dtype_initialized()) { C10_LOG_EVERY_MS(WARNING, 1000) << "Source tensor don't have a data type (did you call mutable_data on the tensor?)"; } TORCH_CHECK( src.storage_initialized(), "Source tensor has no content and has size > 0"); // Finally, do sharing. /* Since we create new Storage whenever we need to change data_type/capacity * this still keeps the original semantics */ storage_ = src.storage(); data_type_ = src.dtype(); device_opt_ = src.device_opt(); storage_offset_ = src.storage_offset(); } void ShareExternalPointer( DataPtr&& data_ptr, const caffe2::TypeMeta& data_type, size_t capacity) { TORCH_CHECK( data_type.id() != caffe2::TypeIdentifier::uninitialized(), "To share with a raw external pointer you need to pass in an " "initialized data_type(TypeMeta)."); if (!capacity) { capacity = numel_ * data_type.itemsize(); } if (storage_.unique()) { storage_.UniqueStorageShareExternalPointer( std::move(data_ptr), data_type, capacity); data_type_ = data_type; device_opt_ = storage_.device(); storage_offset_ = 0; } else { int64_t numel = capacity / data_type.itemsize(); // Create a new Storage storage_ = Storage( data_type, numel, std::move(data_ptr), /*allocator=*/nullptr, /*resizable=*/false); data_type_ = data_type; device_opt_ = storage_.device(); storage_offset_ = 0; } } /** * Returns a mutable raw pointer of the underlying storage. Since we will need * to know the type of the data for allocation, a TypeMeta object is passed in * to specify the necessary information. This is conceptually equivalent of * calling mutable_data() where the TypeMeta parameter meta is derived from * the type T. This function differs from mutable_data() in the sense that * the type T can be specified during runtime via the TypeMeta object. * * If the existing data does not match the desired type, it will be deleted * and a new storage will be created. */ inline void* raw_mutable_data(const caffe2::TypeMeta& meta) { // For 0-size tensors it's fine to return any pointer (including nullptr) if (data_type_ == meta && storage_initialized()) { return static_cast(static_cast(storage_.data()) + storage_offset_ * meta.itemsize()); } else { bool had_special_dtor = data_type_.placementDelete() != nullptr; storage_offset_ = 0; if (storage_.unique()) { storage_.set_dtype(meta); } else { if (data_type_ != meta) { storage_ = Storage::create_legacy(storage_.device(), meta); } } data_type_ = meta; // NB: device is not changed // We can reuse the existing buffer if the current data does not have // a special destructor and the new data doesn't have a special // constructor. if (numel_ == 0 || (meta.placementNew() == nullptr && !had_special_dtor && storage_.numel() >= numel_)) { TORCH_INTERNAL_ASSERT(storage_offset_ == 0); // because we just reallocated return storage_.data(); } const Allocator* allocator = storage_.allocator(); // Storage might have nullptr allocator in rare cases, for example, if // an external memory segment has been wrapped with Tensor and we don't // know how to reallocate it. However, in order to preserve legacy C2 // behavior, we allow reallocating the memory using default allocator. if (allocator == nullptr) { allocator = GetAllocator(storage_.device_type()); } if (meta.placementNew()) { // For types that need placement new, we will call it, as well as // making sure that when the data is freed, it calls the right // destruction procedure. auto size = numel_; auto dtor = data_type_.placementDelete(); auto data_ptr = allocator->allocate(numel_ * storage_.itemsize()); storage_.set_data_ptr(PlacementDeleteContext::makeDataPtr( std::move(data_ptr), dtor, size, storage_.device())); data_type_.placementNew()(storage_.data(), numel_); } else { // For fundamental type, new and delete is easier. storage_.set_data_ptr( allocator->allocate(numel_ * storage_.itemsize())); } storage_.set_numel(numel_); TORCH_INTERNAL_ASSERT(storage_offset_ == 0); // because we just reallocated device_opt_ = storage_.device(); return storage_.data(); } } /** * Returns a typed pointer of the underlying storage. * * For fundamental types, we reuse possible existing storage if there * is sufficient capacity. */ template inline T* mutable_data() { if (storage_initialized() && storage_.IsType()) { return static_cast(storage_.data()) + storage_offset_; } // Check it here statically - otherwise TypeMeta would throw the runtime // error in attempt to invoke TypeMeta::ctor() static_assert( std::is_default_constructible::value, "Tensor can't hold non-default-constructible types"); return static_cast(raw_mutable_data(caffe2::TypeMeta::Make())); } /** * True if a tensor is storage initialized. A tensor may become * storage UNINITIALIZED after a Resize() or FreeMemory() */ bool storage_initialized() const { TORCH_CHECK(has_storage(), "cannot call storage_initialized on tensor that does not have storage"); return storage_.data() || numel_ == 0; } /** * True if a tensor is dtype initialized. A tensor allocated with * Caffe2-style constructors is dtype uninitialized until the * first time mutable_data() is called. */ bool dtype_initialized() const noexcept { return data_type_ != caffe2::TypeMeta(); } void set_storage(at::Storage storage) { TORCH_CHECK(allow_tensor_metadata_change(), "set_storage is not allowed on Tensor created from .data or .detach()"); storage_ = std::move(storage); data_type_ = storage_.dtype(); device_opt_ = storage_.device(); } private: // The Caffe2 Resize() method supports being called both as Resize({2,2}) as // well as variadic with Resize(2, 2). These overloads provide all of the // supported calling configurations, while being overloads (and not templates) // so that implicit conversions still work. // // SetDims on ArrayRef is internally implemented as a template, so we can // handle both ArrayRefs of different types (there are some uses of // Resize in Caffe2 which pass in int, not int64_t.) template < typename T, typename = typename std::enable_if::value>::type> bool SetDimsTemplate(ArrayRef src) { auto old_numel = numel_; auto old_dim = sizes_.size(); sizes_.resize(src.size()); int64_t new_numel = 1; for (size_t i = 0; i < src.size(); ++i) { new_numel *= src[i]; sizes_[i] = src[i]; } update_to_contiguous_strides(old_dim); numel_ = new_numel; return numel_ != old_numel; } bool SetDims(ArrayRef s) { return SetDimsTemplate(s); } bool SetDims(ArrayRef s) { return SetDimsTemplate(s); } bool SetDims(ArrayRef s) { return SetDimsTemplate(s); } bool SetDims() { return SetDims(IntArrayRef{}); } bool SetDims(const int64_t d0) { return SetDims(IntArrayRef{d0}); } bool SetDims(const int64_t d0, const int64_t d1) { return SetDims(IntArrayRef{d0, d1}); } bool SetDims(const int64_t d0, const int64_t d1, const int64_t d2) { return SetDims(IntArrayRef{d0, d1, d2}); } bool SetDims(const int64_t d0, const int64_t d1, const int64_t d2, const int64_t d3) { return SetDims(IntArrayRef{d0, d1, d2, d3}); } inline void update_to_contiguous_strides(size_t old_dim) { strides_.resize(sizes_.size(), 0); if (dim() > 0) { int last_idx = dim() - 1; strides_[last_idx] = 1; for (auto i = last_idx - 1; i >= 0; --i) { strides_[i] = strides_[i + 1] * std::max(sizes_[i + 1], 1); } } is_contiguous_ = true; } /** * Compute the number of elements based on the sizes of a tensor. */ int64_t compute_numel() const { int64_t n = 1; for (auto s : sizes()) { n *= s; } return n; } /** * Compute whether or not a tensor is contiguous based on the sizes and * strides of a tensor. */ bool compute_contiguous() const; protected: /** * Recompute the cached numel of a tensor. Call this if you modify sizes. */ void refresh_numel() { numel_ = compute_numel(); } /** * Recompute the cached contiguity of a tensor. Call this if you modify sizes * or strides. */ void refresh_contiguous() { is_contiguous_ = compute_contiguous(); } /** * Copy the storage pointer and the tensor metadata fields (e.g. sizes / strides / storage_offset) * from one TensorImpl to another TensorImpl. * * For usage of `version_counter` and `allow_tensor_metadata_change`, see NOTE [ TensorImpl Shallow-Copying ]. */ static void copy_tensor_data( const TensorImpl* src_impl, TensorImpl* dest_impl, const c10::VariableVersion& version_counter, bool allow_tensor_metadata_change) { dest_impl->storage_ = src_impl->storage_; dest_impl->sizes_ = src_impl->sizes_; dest_impl->strides_ = src_impl->strides_; dest_impl->storage_offset_ = src_impl->storage_offset_; dest_impl->data_type_ = src_impl->data_type_; dest_impl->device_opt_ = src_impl->device_opt_; dest_impl->type_id_ = src_impl->type_id_; dest_impl->is_contiguous_ = src_impl->is_contiguous_; dest_impl->is_wrapped_number_ = src_impl->is_wrapped_number_; dest_impl->reserved_ = src_impl->reserved_; dest_impl->set_version_counter(version_counter); dest_impl->set_allow_tensor_metadata_change(allow_tensor_metadata_change); } protected: Storage storage_; // This pointer points to an AutogradMeta struct that stores autograd-specific fields // (such as grad_ / grad_fn_ / grad_accumulator_). // This pointer always has unique ownership (meaning only one TensorImpl can own it // at a time). std::unique_ptr autograd_meta_ = nullptr; c10::VariableVersion version_counter_; PyObject* pyobj_ = nullptr; // weak reference // We could save a word or two by combining the SmallVector structs, // since their size is redundant, and if we need to overflow the buffer space // we could keep the two pointers together. However, that would require // implementing another struct from scratch, so only do this if we're desperate. SmallVector sizes_; SmallVector strides_; int64_t storage_offset_ = 0; // If sizes and strides are empty, the numel is 1!! However, most of the // time, we will immediately set sizes to {0} and reset numel to 0. // (Can't do that in the default initializers, because there's no way to // spell "allocate a one-element array" for strides_). int64_t numel_ = 1; // INVARIANT: When storage is non-null, this type meta must // agree with the type meta in storage caffe2::TypeMeta data_type_; // NOTE [c10::optional operator usage in CUDA] // Our optional definition doesn't compile in .cu file if `value()` or // `operator->` are used. Instead, we always use `operator*`. // See https://github.com/pytorch/pytorch/issues/18496 for more info. // If this is too burdensome to maintain, we can just // manually implement this with an additional bool. // INVARIANT: When storage is non-null, this Device must // agree with the type meta in storage. // // INVARIANT: device_opt_ is only nullopt for undefined tensors // (which do not have a device.) c10::optional device_opt_; // You get to have eight byte-size fields here, before you // should pack this into a bitfield. TensorTypeId type_id_; bool is_contiguous_ = true; bool is_wrapped_number_ = false; // NOTE [ Metadata Change for a Detached Tensor ] // // Normally, a user is allowed to change the tensor metadata // (e.g. sizes / strides / storage / storage_offset) of a tensor. // However, if the tensor is created by `t1_detached = t1.data` in Python // or `t1_detached = t1.detach()` in Python/C++, those changes to the // tensor metadata of `t1_detached` will not be propagated back to the // original tensor `t1`. In order to make such changes explicitly illegal, // we created the `allow_tensor_metadata_change_` flag, to prevent users // from changing metadata of the detached tensor and expecting the original // tensor to also be updated. // // NOTE: For a full list of tensor metadata fields, please see // `shallow_copy_and_detach()` in TensorImpl and its subclasses to find // which fields are copied by value. bool allow_tensor_metadata_change_ = true; // we decide to keep reserved_ and it will // live in Tensor after the split // The logic is that if Extend() or ReserveSpace() were ever called, // then subsequent Resize()s will not free up Storage. bool reserved_ = false; }; // Note [TensorImpl size constraints] // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // Changed the size of TensorImpl? If the size went down, good for // you! Adjust the documentation below and the expected size. // Did it go up? Read on... // // Struct size matters. In some production systems at Facebook, we have // 400M live tensors during a training run. Do the math: every 64-bit // word you add to Tensor is an extra 3.2 gigabytes in RAM. // // If you are a Facebook employee, you can check if the run in question // has tipped you over the point using the command here: // https://fburl.com/q5enpv98 // // For reference, we OOMed at 160 bytes (20 words) per TensorImpl. // This is not counting overhead from strides out-of-line allocation and // StorageImpl space and this is from before we inlined sizes and strides // directly into TensorImpl as SmallVectors. // // Our memory usage on 32-bit systems is suboptimal, but we're not checking // for it at the moment (to help avoid rage inducing cycles when the // 32-bit number is wrong). // // Current breakdown: // // vtable pointer // strong refcount TODO: pack these into one word // weak refcount // storage pointer // autograd metadata pointer // version counter (word 0) // version counter (word 1) // PyObject pointer // sizes SmallVector (begin) // sizes SmallVector (end) // sizes SmallVector (capacity) // sizes SmallVector (pre-allocated 0) // sizes SmallVector (pre-allocated 1) // sizes SmallVector (pre-allocated 2) // sizes SmallVector (pre-allocated 3) // sizes SmallVector (pre-allocated 4) // strides SmallVector (begin) // strides SmallVector (end) // strides SmallVector (capacity) // strides SmallVector (pre-allocated 0) // strides SmallVector (pre-allocated 1) // strides SmallVector (pre-allocated 2) // strides SmallVector (pre-allocated 3) // strides SmallVector (pre-allocated 4) // storage offset // numel // data type pointer // (optional) device // miscellaneous bitfield // static_assert(sizeof(void*) != sizeof(int64_t) || // if 64-bit... sizeof(TensorImpl) == sizeof(int64_t) * 29, "You changed the size of TensorImpl on 64-bit arch." "See Note [TensorImpl size constraints] on how to proceed."); } // namespace c10