mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
Summary: #19975 was separated by 2 PRs. This one: Introduce MemoryFormat argument to the `x.is_contiguous(memory_format=torch.channels_last)` and to the `y = x.contiguous(memory_format=torch.channels_last)` functions. At this moment both functions just operate with strides and doesn't store any tensor state. (Original RFC #19092) ----- Expands functionality of two tensor functions `.is_contiguous` and `.contiguous` (both python and c++ api). Note: We had several complaints about `.to(memory_format)` function, and decided not to support it. 1. `.contiguous` now support optional keyword-only argument - `memory_format`, which can be either `torch.contiguous_format` or `torch.channels_last`. - Using `torch.contiguous_format` will preserve existing `.contiguous()` behavior. - Calling `x.contiguous(memory_format=torch.channels_last)` returns new tensor which maintain same semantical layout (NCHW), but have different memory allocation pattern. `x.contiguous(memory_format=torch.channels_last)` expects input tensor to be 3d, 4d or 5d; and fails otherwise. 2. `.is_contiguous` now support optional keyword-only argument - `memory_format`, which can be either `torch.contiguous_format` or `torch.channels_last`. - `x.is_contiguous(memory_format=torch.contiguous_format)` preserves same functionality as `x.is_contiguous()` and remains unchanged. - `x.is_contiguous(memory_format=torch.channels_last)` returns true if A) input tensor is contiguous in memory AND B) allocated in the memory in NWHC (or similar for 3d,5d) format. Note: By the end of the phase one `x.is_contiguous(memory_format=torch.channels_last)` will calculate state of the Tensor on every call. This functionality going to be updated later. Pull Request resolved: https://github.com/pytorch/pytorch/pull/20455 Differential Revision: D15341577 Pulled By: VitalyFedyunin fbshipit-source-id: bbb6b4159a8a49149110ad321109a3742383185d
90 lines
2.9 KiB
C++
90 lines
2.9 KiB
C++
#include <ATen/ATen.h>
|
|
#include <ATen/NativeFunctions.h>
|
|
#include <ATen/WrapDimUtils.h>
|
|
#include <ATen/detail/CUDAHooksInterface.h>
|
|
|
|
#include <ATen/Config.h>
|
|
namespace at {
|
|
namespace native {
|
|
|
|
bool is_same_size(const Tensor& self, const Tensor& other) {
|
|
return self.sizes().equals(other.sizes());
|
|
}
|
|
|
|
int64_t size(const Tensor& self, int64_t dim) {
|
|
// false is passed to maybe_wrap_dim so behavior is identical to array access (but with wrapping)
|
|
dim = maybe_wrap_dim(dim, self.dim(), false);
|
|
return self.sizes()[dim];
|
|
}
|
|
|
|
int64_t stride(const Tensor& self, int64_t dim) {
|
|
// false is passed to maybe_wrap_dim so behavior is identical to array access (but with wrapping)
|
|
dim = maybe_wrap_dim(dim, self.dim(), false);
|
|
return self.strides()[dim];
|
|
}
|
|
|
|
bool cudnn_is_acceptable(const Tensor& self) {
|
|
if (!globalContext().userEnabledCuDNN()) return false;
|
|
if (!self.is_cuda()) return false;
|
|
auto st = self.scalar_type();
|
|
if (!(st == kDouble || st == kFloat || st == kHalf)) return false;
|
|
if (!detail::getCUDAHooks().compiledWithCuDNN()) return false;
|
|
// cuDNN functions like grid_sampler returns CUDNN_STATUS_BAD_PARAM on empty
|
|
// tensors. Maybe some cuDNN functions actually support empty tensors, but
|
|
// native/THNN kernels shouldn't be much slower because the output is also
|
|
// likely empty.
|
|
if (self.numel() == 0) return false;
|
|
// NB: In the old Python code, there was also a test to see if the
|
|
// cuDNN library was actually dynamically linked or not. I'm not
|
|
// sure if we can actually test this.
|
|
return true;
|
|
}
|
|
|
|
Tensor detach(const Tensor& self) {
|
|
// this just exists to give us a hook in VariableType and an entry in Declarations.yaml
|
|
AT_ERROR("detach is not implemented for Tensor");
|
|
return self;
|
|
}
|
|
|
|
Tensor & detach_(Tensor & self) {
|
|
// this just exists to give us a hook in VariableType and an entry in Declarations.yaml
|
|
AT_ERROR("detach_ is not implemented for Tensor");
|
|
return self;
|
|
}
|
|
|
|
Tensor contiguous(const Tensor & self) {
|
|
return contiguous(self, MemoryFormat::Contiguous);
|
|
}
|
|
|
|
Tensor contiguous(const Tensor& self, MemoryFormat memory_format) {
|
|
if (self.is_contiguous(memory_format)) {
|
|
return self;
|
|
}
|
|
auto result = at::empty_like(self);
|
|
switch (memory_format) {
|
|
case MemoryFormat::Any: // Back compatibility with old defaults
|
|
case MemoryFormat::Contiguous: {
|
|
break;
|
|
}
|
|
case MemoryFormat::ChannelsLast: {
|
|
AT_CHECK(
|
|
result.dim() == 4,
|
|
" required rank 4 tensor to use channels_last format");
|
|
std::vector<int64_t> newStrides(self.dim());
|
|
auto sizes = result.sizes();
|
|
newStrides[1] = 1;
|
|
newStrides[3] = sizes[1];
|
|
newStrides[2] = newStrides[3] * sizes[3];
|
|
newStrides[0] = newStrides[2] * sizes[2];
|
|
result = result.as_strided(sizes, newStrides);
|
|
break;
|
|
}
|
|
default: {
|
|
AT_CHECK(false, " unsupported memory format");
|
|
}
|
|
}
|
|
return result.copy_(self);
|
|
}
|
|
} // namespace native
|
|
}
|