mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/63612 This makes Tensor inherit from a new class TensorBase, that provides a subset of Tensor that doesn't directly depend on native_functions.yaml. Code that only includes TensorBase.h with thus not need to be rebuilt every time someone changes an operator signature. Making `Tensor` inherit from this class means that `const TensorBase&` parameters will be callable with an ordinary `Tensor`. I've also made `Tensor` constructible and assignable from `TensorBase` to minimize friction in code mixing the two types. To help enforce that `Tensor.h` and `Functions.h` aren't accidentally included, I've added an error into `Operators.h` if `TORCH_ASSERT_NO_OPERATORS` is defined. We can either set this in the build system for certain folders, or just define it at the top of any file. I've also included an example of manually special-casing the commonly used `contiguous` operator. The inline function's slow path defers to `TensorBase::__dispatch_contiguous` which is defined in `Tensor.cpp`. I've made it so `OptionalTensorRef` is constructible from `TensorBase`, so I can materialize a `Tensor` for use in dispatch without actually increasing its refcount. Test Plan: Imported from OSS Reviewed By: gchanan Differential Revision: D30728580 Pulled By: ezyang fbshipit-source-id: 2cbc8eee08043382ee6904ea8e743b1286921c03
334 lines
10 KiB
C++
334 lines
10 KiB
C++
#include "caffe2/core/tensor.h"
|
|
#include "caffe2/core/tensor_int8.h"
|
|
|
|
#include "caffe2/core/blob_stats.h"
|
|
|
|
#if defined(EXPOSE_C2_OPS) || \
|
|
!defined(CAFFE2_IS_XPLAT_BUILD) && !defined(C10_MOBILE)
|
|
#include <ATen/core/grad_mode.h>
|
|
#include "ATen/core/Tensor.h"
|
|
#endif
|
|
|
|
namespace caffe2 {
|
|
|
|
CAFFE_KNOWN_TYPE(Tensor);
|
|
|
|
TensorPrinter::TensorPrinter(
|
|
// NOLINTNEXTLINE(modernize-pass-by-value)
|
|
const std::string& tensor_name,
|
|
const std::string& file_name,
|
|
int limit)
|
|
: to_file_(!file_name.empty()),
|
|
limit_(limit ? limit : k_limit_default_),
|
|
tensor_name_(tensor_name) {
|
|
if (to_file_) {
|
|
// We will output to file instead of printing on screen.
|
|
// We will write each individual tensor to its individual file.
|
|
// NOLINTNEXTLINE(modernize-make-unique)
|
|
log_file_.reset(new std::ofstream(
|
|
file_name, std::ofstream::out | std::ofstream::trunc));
|
|
CAFFE_ENFORCE(
|
|
log_file_->good(),
|
|
"Failed to open TensorPrinter file ",
|
|
file_name,
|
|
". rdstate() = ",
|
|
log_file_->rdstate());
|
|
}
|
|
}
|
|
|
|
// NOLINTNEXTLINE(bugprone-exception-escape)
|
|
TensorPrinter::~TensorPrinter() {
|
|
if (log_file_.get()) {
|
|
log_file_->close();
|
|
}
|
|
}
|
|
|
|
void TensorPrinter::PrintMeta(const Tensor& tensor) {
|
|
if (to_file_) {
|
|
(*log_file_) << MetaStr(tensor) << std::endl;
|
|
} else {
|
|
LOG(INFO) << MetaStr(tensor);
|
|
}
|
|
}
|
|
|
|
std::string TensorPrinter::MetaStr(const Tensor& tensor) {
|
|
std::stringstream meta_stream;
|
|
meta_stream << "Tensor " << tensor_name_ << " of type "
|
|
<< tensor.dtype().name() << ". Dims: (";
|
|
for (const auto dim : tensor.sizes()) {
|
|
meta_stream << dim << ",";
|
|
}
|
|
meta_stream << "): ";
|
|
return meta_stream.str();
|
|
}
|
|
|
|
TypeMeta GetTensorType(const void* c) {
|
|
const Tensor* tc = static_cast<const Tensor*>(c);
|
|
return tc->dtype();
|
|
}
|
|
|
|
TypeMeta GetInt8TensorType(const void* c) {
|
|
const int8::Int8TensorCPU* int8_tensor =
|
|
static_cast<const int8::Int8TensorCPU*>(c);
|
|
return (int8_tensor->t).dtype();
|
|
}
|
|
|
|
// TODO(jerryzh): Remove
|
|
static CaffeMap<TypeIdentifier, TypeCall> type_call_registry_{
|
|
{TypeMeta::Id<Tensor>(), GetTensorType},
|
|
{TypeMeta::Id<int8::Int8TensorCPU>(), GetInt8TensorType},
|
|
};
|
|
|
|
TypeCall GetTypeCallFunction(TypeIdentifier id) {
|
|
auto f = type_call_registry_.find(id);
|
|
if (f == type_call_registry_.end()) {
|
|
return nullptr;
|
|
}
|
|
return f->second;
|
|
}
|
|
|
|
void RegisterTypeCallFunction(TypeIdentifier id, TypeCall c) {
|
|
type_call_registry_[id] = c;
|
|
}
|
|
|
|
int GetGPUIDForPointer(const void* ptr);
|
|
|
|
vector<int64_t>
|
|
GetTensorInfo(const void* c, size_t* capacity, DeviceOption* device) {
|
|
CHECK(capacity);
|
|
const Tensor* tc = static_cast<const Tensor*>(c);
|
|
CHECK(tc);
|
|
// NOLINTNEXTLINE(clang-analyzer-core.CallAndMessage)
|
|
CHECK(tc->unsafeGetTensorImpl());
|
|
CHECK(tc->unsafeGetTensorImpl()->storage().unsafeGetStorageImpl());
|
|
*capacity = tc->storage().nbytes();
|
|
ExtractDeviceOption(device, tc->GetDevice());
|
|
return tc->sizes().vec();
|
|
}
|
|
|
|
vector<int64_t>
|
|
GetInt8TensorInfo(const void* c, size_t* capacity, DeviceOption* device) {
|
|
const int8::Int8TensorCPU* int8_tensor =
|
|
static_cast<const int8::Int8TensorCPU*>(c);
|
|
return GetTensorInfo(&(int8_tensor->t), capacity, device);
|
|
}
|
|
|
|
// since we only have one tensor, probably need to remove this at some point?
|
|
static CaffeMap<TypeIdentifier, TensorInfoCall> tensor_info_call_registry_{
|
|
{TypeMeta::Id<Tensor>(), GetTensorInfo},
|
|
{TypeMeta::Id<int8::Int8TensorCPU>(), GetInt8TensorInfo},
|
|
};
|
|
|
|
// TODO: Remove this code in a separate diff, since we only have one
|
|
// GetTensorInfo function now
|
|
TensorInfoCall GetTensorInfoFunction(TypeIdentifier id) {
|
|
auto f = tensor_info_call_registry_.find(id);
|
|
if (f == tensor_info_call_registry_.end()) {
|
|
return nullptr;
|
|
}
|
|
return f->second;
|
|
}
|
|
|
|
void RegisterTensorInfoFunction(TypeIdentifier id, TensorInfoCall c) {
|
|
tensor_info_call_registry_[id] = c;
|
|
}
|
|
|
|
void TensorVectorResize(
|
|
std::vector<Tensor>& tensors,
|
|
int size,
|
|
DeviceType type) {
|
|
tensors.reserve(size);
|
|
for (auto i = 0; i < size; ++i) {
|
|
tensors.emplace_back(type);
|
|
}
|
|
}
|
|
|
|
Tensor empty(at::IntArrayRef dims, at::TensorOptions options) {
|
|
// TODO: merge this with at::empty after Tensor is merged
|
|
auto tensor = Tensor(dims, options.device());
|
|
tensor.raw_mutable_data(options.dtype());
|
|
return tensor;
|
|
}
|
|
|
|
void ReinitializeTensor(
|
|
Tensor* tensor,
|
|
at::IntArrayRef dims,
|
|
at::TensorOptions options) {
|
|
CAFFE_ENFORCE(options.device_opt() != c10::nullopt);
|
|
if (*tensor) {
|
|
// Note: we don't compare device_id here because of the purpose of
|
|
// ReinitializeTensor: https://github.com/pytorch/pytorch/pull/13147
|
|
// In the original code, we don't have device_id defined, therefore, we
|
|
// should not include device_id in the comparison
|
|
if (tensor->GetDeviceType() == options.device().type()) {
|
|
if (tensor->sizes() != dims) {
|
|
// Resize when the dims doesn't match
|
|
tensor->Resize(dims);
|
|
}
|
|
if (tensor->dtype() == options.dtype()) {
|
|
tensor->raw_mutable_data();
|
|
} else {
|
|
// This C10 logging API is not thread-safe, and should not be called here
|
|
// This can lead to a memory corruption in glog.
|
|
// C10_LOG_FIRST_N(WARNING, 1)
|
|
// << "Changing the data type of Tensor is discouraged."
|
|
// << " Attempt to change data type from: " << tensor->dtype()
|
|
// << " to: " << options.dtype();
|
|
// create a new Tensor when the data_type doesn't match
|
|
*tensor = caffe2::empty(dims, options);
|
|
}
|
|
return;
|
|
}
|
|
// create a new Tensor when device doesn't match
|
|
}
|
|
|
|
VLOG(1) << "Create new mutable object " << TypeMeta::TypeName<Tensor>()
|
|
<< " dims: " << dims;
|
|
*tensor = caffe2::empty(dims, options);
|
|
}
|
|
|
|
void ReinitializeAndCopyFrom(
|
|
Tensor* t,
|
|
at::TensorOptions options,
|
|
const Tensor& src,
|
|
bool async) {
|
|
auto device_type = options.device().type();
|
|
CAFFE_ENFORCE(t != nullptr, "Target tensor ptr is null.");
|
|
if (!*t || device_type != t->GetDeviceType()) {
|
|
*t = Tensor(device_type);
|
|
}
|
|
CAFFE_ENFORCE(
|
|
!t->dtype_initialized() || t->dtype() == src.dtype(),
|
|
"We don't allow a change of data type in ReinitializeAndCopyFrom. Attempt to "
|
|
" change from: ",
|
|
t->dtype(),
|
|
" to: ",
|
|
src.dtype());
|
|
t->CopyFrom(src, async);
|
|
}
|
|
|
|
void Tensor::enforce_invariants() {
|
|
if (impl_.get() == nullptr) {
|
|
throw std::runtime_error("TensorImpl with nullptr is not supported");
|
|
}
|
|
// TODO: only check `!impl_->requires_grad()` after Variable and Tensor are
|
|
// merged
|
|
#if !defined(CAFFE2_IS_XPLAT_BUILD) && !defined(C10_MOBILE)
|
|
CAFFE_ENFORCE(
|
|
!(impl_->requires_grad() && at::GradMode::is_enabled()),
|
|
"Caffe2 tensor wrapper doesn't support autograd variables that require grad");
|
|
#endif
|
|
CAFFE_ENFORCE_EQ(
|
|
impl_->layout(),
|
|
at::kStrided,
|
|
"Caffe2 tensor wrapper supports only regular non-sparse tensors");
|
|
CAFFE_ENFORCE(
|
|
impl_->is_contiguous(),
|
|
"Caffe2 tensor wrapper supports only contiguous tensors");
|
|
}
|
|
|
|
void Tensor::CopyFrom(const Tensor& src, bool async) {
|
|
// TODO: only check `!impl_->requires_grad()` after Variable and Tensor are
|
|
// merged
|
|
#if defined(EXPOSE_C2_OPS) || \
|
|
!defined(CAFFE2_IS_XPLAT_BUILD) && !defined(C10_MOBILE)
|
|
AT_ASSERT(!(impl_->requires_grad() && at::GradMode::is_enabled()));
|
|
#endif
|
|
AT_ASSERTM(
|
|
src.impl_->is_contiguous(),
|
|
"Right now only copy of contiguous source Tensor is supported.");
|
|
AT_ASSERTM(
|
|
src.impl_->storage_initialized(),
|
|
"Cannot copy from an uninitialized Tensor");
|
|
|
|
if (src.impl_.get() == impl_.get()) {
|
|
return;
|
|
}
|
|
|
|
// Test if we need to allocate a new storage
|
|
// Uninitialized storages are guaranteed to be uniquely owned,
|
|
// so we don't need to swap in dst case.
|
|
// If the dtype changed, we need to reallocate storage.
|
|
if (impl_->dtype() != src.impl_->dtype()) {
|
|
// NB: copy preserves device_type
|
|
// This storage will get initialized by the mutable_data call below.
|
|
impl_->set_storage_and_dtype(
|
|
at::Storage::create_legacy(impl_->device_type()), src.impl_->dtype());
|
|
}
|
|
impl_->Resize(src.impl_->sizes());
|
|
|
|
if (impl_->numel() > 0) {
|
|
if (impl_->dtype().copy()) {
|
|
AT_ASSERTM(
|
|
impl_->device_type() == ::at::DeviceType::CPU,
|
|
"In CopyFrom source and dest tensors must both be CPU for "
|
|
"non-POD copy, but dest tensor was ",
|
|
impl_->device_type());
|
|
AT_ASSERTM(
|
|
src.impl_->device_type() == ::at::DeviceType::CPU,
|
|
"In CopyFrom source and dest tensors must both be CPU for "
|
|
"non-POD copy, but src tensor was ",
|
|
src.impl_->device_type());
|
|
impl_->dtype().copy()(
|
|
src.impl_->data(),
|
|
impl_->raw_mutable_data(impl_->dtype()),
|
|
impl_->numel());
|
|
} else {
|
|
// The following copy uses the current (thread local) stream for copying
|
|
// and also takes the GPU id from the device() field passed in.
|
|
//
|
|
// TODO: Potentially more enforcements are necessary to avoid accidental
|
|
// switch to sync copy if the currently set device is wrong.
|
|
//
|
|
// Specifically, we might need to switch to a different context device
|
|
// here explicitly to avoid relying on user synchronizing things
|
|
// properly.
|
|
//
|
|
// note: raw_mutable_data initializes device here
|
|
void* new_data = impl_->raw_mutable_data(impl_->dtype());
|
|
at::CopyBytes(
|
|
impl_->numel() * impl_->itemsize(),
|
|
src.impl_->data(),
|
|
src.impl_->device(),
|
|
new_data,
|
|
impl_->device(),
|
|
async);
|
|
}
|
|
}
|
|
}
|
|
|
|
#if defined(EXPOSE_C2_OPS) || \
|
|
!defined(CAFFE2_IS_XPLAT_BUILD) && !defined(C10_MOBILE)
|
|
Tensor::Tensor(at::Tensor tensor) : impl_(tensor.unsafeReleaseIntrusivePtr()) {
|
|
enforce_invariants();
|
|
}
|
|
|
|
Tensor::operator at::Tensor() const& {
|
|
return at::Tensor::wrap_tensor_impl(impl_);
|
|
}
|
|
|
|
Tensor::operator at::Tensor() && {
|
|
return at::Tensor::wrap_tensor_impl(std::move(impl_));
|
|
}
|
|
#endif
|
|
|
|
namespace {
|
|
|
|
struct TensorStatGetter : BlobStatGetter {
|
|
size_t sizeBytes(const Blob& blob) const override {
|
|
const auto& tensor = blob.Get<Tensor>();
|
|
auto nbytes = tensor.nbytes();
|
|
if (nbytes > 0 && tensor.IsType<std::string>()) {
|
|
const auto* data = tensor.data<std::string>();
|
|
for (int i = 0; i < tensor.numel(); ++i) {
|
|
nbytes += data[i].size();
|
|
}
|
|
}
|
|
return nbytes;
|
|
}
|
|
};
|
|
REGISTER_BLOB_STAT_GETTER(Tensor, TensorStatGetter);
|
|
} // namespace
|
|
|
|
} // namespace caffe2
|