Files
pytorch/caffe2/core/tensor.cc
Peter Bell d701357d92 Factor out TensorBase that doesn't depend on native operators (#63612)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/63612

This makes Tensor inherit from a new class TensorBase, that provides a subset of Tensor that doesn't
directly depend on native_functions.yaml. Code that only includes TensorBase.h with thus not need to
be rebuilt every time someone changes an operator signature.

Making `Tensor` inherit from this class means that `const TensorBase&` parameters will be callable
with an ordinary `Tensor`. I've also made `Tensor` constructible and assignable from `TensorBase` to
minimize friction in code mixing the two types.

To help enforce that `Tensor.h` and `Functions.h` aren't accidentally included, I've added an error
into `Operators.h` if `TORCH_ASSERT_NO_OPERATORS` is defined. We can either set this in the build
system for certain folders, or just define it at the top of any file.

I've also included an example of manually special-casing the commonly used `contiguous` operator.
The inline function's slow path defers to `TensorBase::__dispatch_contiguous` which is defined in
`Tensor.cpp`. I've made it so `OptionalTensorRef` is constructible from `TensorBase`, so I can
materialize a `Tensor` for use in dispatch without actually increasing its refcount.

Test Plan: Imported from OSS

Reviewed By: gchanan

Differential Revision: D30728580

Pulled By: ezyang

fbshipit-source-id: 2cbc8eee08043382ee6904ea8e743b1286921c03
2021-09-08 13:28:54 -07:00

334 lines
10 KiB
C++

#include "caffe2/core/tensor.h"
#include "caffe2/core/tensor_int8.h"
#include "caffe2/core/blob_stats.h"
#if defined(EXPOSE_C2_OPS) || \
!defined(CAFFE2_IS_XPLAT_BUILD) && !defined(C10_MOBILE)
#include <ATen/core/grad_mode.h>
#include "ATen/core/Tensor.h"
#endif
namespace caffe2 {
CAFFE_KNOWN_TYPE(Tensor);
TensorPrinter::TensorPrinter(
// NOLINTNEXTLINE(modernize-pass-by-value)
const std::string& tensor_name,
const std::string& file_name,
int limit)
: to_file_(!file_name.empty()),
limit_(limit ? limit : k_limit_default_),
tensor_name_(tensor_name) {
if (to_file_) {
// We will output to file instead of printing on screen.
// We will write each individual tensor to its individual file.
// NOLINTNEXTLINE(modernize-make-unique)
log_file_.reset(new std::ofstream(
file_name, std::ofstream::out | std::ofstream::trunc));
CAFFE_ENFORCE(
log_file_->good(),
"Failed to open TensorPrinter file ",
file_name,
". rdstate() = ",
log_file_->rdstate());
}
}
// NOLINTNEXTLINE(bugprone-exception-escape)
TensorPrinter::~TensorPrinter() {
if (log_file_.get()) {
log_file_->close();
}
}
void TensorPrinter::PrintMeta(const Tensor& tensor) {
if (to_file_) {
(*log_file_) << MetaStr(tensor) << std::endl;
} else {
LOG(INFO) << MetaStr(tensor);
}
}
std::string TensorPrinter::MetaStr(const Tensor& tensor) {
std::stringstream meta_stream;
meta_stream << "Tensor " << tensor_name_ << " of type "
<< tensor.dtype().name() << ". Dims: (";
for (const auto dim : tensor.sizes()) {
meta_stream << dim << ",";
}
meta_stream << "): ";
return meta_stream.str();
}
TypeMeta GetTensorType(const void* c) {
const Tensor* tc = static_cast<const Tensor*>(c);
return tc->dtype();
}
TypeMeta GetInt8TensorType(const void* c) {
const int8::Int8TensorCPU* int8_tensor =
static_cast<const int8::Int8TensorCPU*>(c);
return (int8_tensor->t).dtype();
}
// TODO(jerryzh): Remove
static CaffeMap<TypeIdentifier, TypeCall> type_call_registry_{
{TypeMeta::Id<Tensor>(), GetTensorType},
{TypeMeta::Id<int8::Int8TensorCPU>(), GetInt8TensorType},
};
TypeCall GetTypeCallFunction(TypeIdentifier id) {
auto f = type_call_registry_.find(id);
if (f == type_call_registry_.end()) {
return nullptr;
}
return f->second;
}
void RegisterTypeCallFunction(TypeIdentifier id, TypeCall c) {
type_call_registry_[id] = c;
}
int GetGPUIDForPointer(const void* ptr);
vector<int64_t>
GetTensorInfo(const void* c, size_t* capacity, DeviceOption* device) {
CHECK(capacity);
const Tensor* tc = static_cast<const Tensor*>(c);
CHECK(tc);
// NOLINTNEXTLINE(clang-analyzer-core.CallAndMessage)
CHECK(tc->unsafeGetTensorImpl());
CHECK(tc->unsafeGetTensorImpl()->storage().unsafeGetStorageImpl());
*capacity = tc->storage().nbytes();
ExtractDeviceOption(device, tc->GetDevice());
return tc->sizes().vec();
}
vector<int64_t>
GetInt8TensorInfo(const void* c, size_t* capacity, DeviceOption* device) {
const int8::Int8TensorCPU* int8_tensor =
static_cast<const int8::Int8TensorCPU*>(c);
return GetTensorInfo(&(int8_tensor->t), capacity, device);
}
// since we only have one tensor, probably need to remove this at some point?
static CaffeMap<TypeIdentifier, TensorInfoCall> tensor_info_call_registry_{
{TypeMeta::Id<Tensor>(), GetTensorInfo},
{TypeMeta::Id<int8::Int8TensorCPU>(), GetInt8TensorInfo},
};
// TODO: Remove this code in a separate diff, since we only have one
// GetTensorInfo function now
TensorInfoCall GetTensorInfoFunction(TypeIdentifier id) {
auto f = tensor_info_call_registry_.find(id);
if (f == tensor_info_call_registry_.end()) {
return nullptr;
}
return f->second;
}
void RegisterTensorInfoFunction(TypeIdentifier id, TensorInfoCall c) {
tensor_info_call_registry_[id] = c;
}
void TensorVectorResize(
std::vector<Tensor>& tensors,
int size,
DeviceType type) {
tensors.reserve(size);
for (auto i = 0; i < size; ++i) {
tensors.emplace_back(type);
}
}
Tensor empty(at::IntArrayRef dims, at::TensorOptions options) {
// TODO: merge this with at::empty after Tensor is merged
auto tensor = Tensor(dims, options.device());
tensor.raw_mutable_data(options.dtype());
return tensor;
}
void ReinitializeTensor(
Tensor* tensor,
at::IntArrayRef dims,
at::TensorOptions options) {
CAFFE_ENFORCE(options.device_opt() != c10::nullopt);
if (*tensor) {
// Note: we don't compare device_id here because of the purpose of
// ReinitializeTensor: https://github.com/pytorch/pytorch/pull/13147
// In the original code, we don't have device_id defined, therefore, we
// should not include device_id in the comparison
if (tensor->GetDeviceType() == options.device().type()) {
if (tensor->sizes() != dims) {
// Resize when the dims doesn't match
tensor->Resize(dims);
}
if (tensor->dtype() == options.dtype()) {
tensor->raw_mutable_data();
} else {
// This C10 logging API is not thread-safe, and should not be called here
// This can lead to a memory corruption in glog.
// C10_LOG_FIRST_N(WARNING, 1)
// << "Changing the data type of Tensor is discouraged."
// << " Attempt to change data type from: " << tensor->dtype()
// << " to: " << options.dtype();
// create a new Tensor when the data_type doesn't match
*tensor = caffe2::empty(dims, options);
}
return;
}
// create a new Tensor when device doesn't match
}
VLOG(1) << "Create new mutable object " << TypeMeta::TypeName<Tensor>()
<< " dims: " << dims;
*tensor = caffe2::empty(dims, options);
}
void ReinitializeAndCopyFrom(
Tensor* t,
at::TensorOptions options,
const Tensor& src,
bool async) {
auto device_type = options.device().type();
CAFFE_ENFORCE(t != nullptr, "Target tensor ptr is null.");
if (!*t || device_type != t->GetDeviceType()) {
*t = Tensor(device_type);
}
CAFFE_ENFORCE(
!t->dtype_initialized() || t->dtype() == src.dtype(),
"We don't allow a change of data type in ReinitializeAndCopyFrom. Attempt to "
" change from: ",
t->dtype(),
" to: ",
src.dtype());
t->CopyFrom(src, async);
}
void Tensor::enforce_invariants() {
if (impl_.get() == nullptr) {
throw std::runtime_error("TensorImpl with nullptr is not supported");
}
// TODO: only check `!impl_->requires_grad()` after Variable and Tensor are
// merged
#if !defined(CAFFE2_IS_XPLAT_BUILD) && !defined(C10_MOBILE)
CAFFE_ENFORCE(
!(impl_->requires_grad() && at::GradMode::is_enabled()),
"Caffe2 tensor wrapper doesn't support autograd variables that require grad");
#endif
CAFFE_ENFORCE_EQ(
impl_->layout(),
at::kStrided,
"Caffe2 tensor wrapper supports only regular non-sparse tensors");
CAFFE_ENFORCE(
impl_->is_contiguous(),
"Caffe2 tensor wrapper supports only contiguous tensors");
}
void Tensor::CopyFrom(const Tensor& src, bool async) {
// TODO: only check `!impl_->requires_grad()` after Variable and Tensor are
// merged
#if defined(EXPOSE_C2_OPS) || \
!defined(CAFFE2_IS_XPLAT_BUILD) && !defined(C10_MOBILE)
AT_ASSERT(!(impl_->requires_grad() && at::GradMode::is_enabled()));
#endif
AT_ASSERTM(
src.impl_->is_contiguous(),
"Right now only copy of contiguous source Tensor is supported.");
AT_ASSERTM(
src.impl_->storage_initialized(),
"Cannot copy from an uninitialized Tensor");
if (src.impl_.get() == impl_.get()) {
return;
}
// Test if we need to allocate a new storage
// Uninitialized storages are guaranteed to be uniquely owned,
// so we don't need to swap in dst case.
// If the dtype changed, we need to reallocate storage.
if (impl_->dtype() != src.impl_->dtype()) {
// NB: copy preserves device_type
// This storage will get initialized by the mutable_data call below.
impl_->set_storage_and_dtype(
at::Storage::create_legacy(impl_->device_type()), src.impl_->dtype());
}
impl_->Resize(src.impl_->sizes());
if (impl_->numel() > 0) {
if (impl_->dtype().copy()) {
AT_ASSERTM(
impl_->device_type() == ::at::DeviceType::CPU,
"In CopyFrom source and dest tensors must both be CPU for "
"non-POD copy, but dest tensor was ",
impl_->device_type());
AT_ASSERTM(
src.impl_->device_type() == ::at::DeviceType::CPU,
"In CopyFrom source and dest tensors must both be CPU for "
"non-POD copy, but src tensor was ",
src.impl_->device_type());
impl_->dtype().copy()(
src.impl_->data(),
impl_->raw_mutable_data(impl_->dtype()),
impl_->numel());
} else {
// The following copy uses the current (thread local) stream for copying
// and also takes the GPU id from the device() field passed in.
//
// TODO: Potentially more enforcements are necessary to avoid accidental
// switch to sync copy if the currently set device is wrong.
//
// Specifically, we might need to switch to a different context device
// here explicitly to avoid relying on user synchronizing things
// properly.
//
// note: raw_mutable_data initializes device here
void* new_data = impl_->raw_mutable_data(impl_->dtype());
at::CopyBytes(
impl_->numel() * impl_->itemsize(),
src.impl_->data(),
src.impl_->device(),
new_data,
impl_->device(),
async);
}
}
}
#if defined(EXPOSE_C2_OPS) || \
!defined(CAFFE2_IS_XPLAT_BUILD) && !defined(C10_MOBILE)
Tensor::Tensor(at::Tensor tensor) : impl_(tensor.unsafeReleaseIntrusivePtr()) {
enforce_invariants();
}
Tensor::operator at::Tensor() const& {
return at::Tensor::wrap_tensor_impl(impl_);
}
Tensor::operator at::Tensor() && {
return at::Tensor::wrap_tensor_impl(std::move(impl_));
}
#endif
namespace {
struct TensorStatGetter : BlobStatGetter {
size_t sizeBytes(const Blob& blob) const override {
const auto& tensor = blob.Get<Tensor>();
auto nbytes = tensor.nbytes();
if (nbytes > 0 && tensor.IsType<std::string>()) {
const auto* data = tensor.data<std::string>();
for (int i = 0; i < tensor.numel(); ++i) {
nbytes += data[i].size();
}
}
return nbytes;
}
};
REGISTER_BLOB_STAT_GETTER(Tensor, TensorStatGetter);
} // namespace
} // namespace caffe2