New in StaticContext returns at::DataPtr (#12029)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/12029

In order to remove New() function in StaticContext(to remove StaticContext) and converge to the Allocator design, we'll first change the return type of New to at::DataPtr.

Reviewed By: ezyang

Differential Revision: D9889990

fbshipit-source-id: 3257c763530b987025f428741bdd2e089d11bad4
This commit is contained in:
Jerry Zhang
2018-10-03 19:06:54 -07:00
committed by Facebook Github Bot
parent bcc2a0599b
commit 74dc4460eb
20 changed files with 179 additions and 132 deletions

View File

@ -43,6 +43,9 @@ class DataPtr {
void* release_context() { void* release_context() {
return ptr_.release_context(); return ptr_.release_context();
} }
std::unique_ptr<void, DeleterFnPtr>&& move_context() {
return ptr_.move_context();
}
operator bool() const { operator bool() const {
return static_cast<bool>(ptr_); return static_cast<bool>(ptr_);
} }
@ -50,6 +53,9 @@ class DataPtr {
T* cast_context(DeleterFnPtr expected_deleter) const { T* cast_context(DeleterFnPtr expected_deleter) const {
return ptr_.cast_context<T>(expected_deleter); return ptr_.cast_context<T>(expected_deleter);
} }
DeleterFnPtr get_deleter() const {
return ptr_.get_deleter();
}
Device device() const { Device device() const {
return device_; return device_;
} }

View File

@ -109,4 +109,20 @@ const Storage& TensorImpl::storage() const {
return storage_; return storage_;
} }
static void deletePlacementDeleteContext(void* ptr) {
delete static_cast<PlacementDeleteContext*>(ptr);
}
at::DataPtr PlacementDeleteContext::makeDataPtr(
at::DataPtr&& data_ptr,
PlacementDtor placement_dtor,
size_t size,
at::Device device) {
auto* ptr = data_ptr.get();
return {ptr,
new PlacementDeleteContext(std::move(data_ptr), placement_dtor, size),
&deletePlacementDeleteContext,
device};
}
} // namespace at } // namespace at

View File

@ -3,13 +3,13 @@
#include <atomic> #include <atomic>
#include <memory> #include <memory>
#include "ATen/core/Storage.h" #include <ATen/core/Backend.h>
#include "ATen/core/optional.h" #include <ATen/core/LegacyTypeDispatch.h>
#include "ATen/core/TensorTypeId.h" #include <ATen/core/Storage.h>
#include "ATen/core/TensorTypeIdRegistration.h" #include <ATen/core/TensorTypeId.h>
#include "ATen/core/LegacyTypeDispatch.h" #include <ATen/core/TensorTypeIdRegistration.h>
#include "ATen/core/Backend.h" #include <ATen/core/context_base.h>
#include "ATen/core/context_base.h" #include <ATen/core/optional.h>
#include "caffe2/core/allocator.h" #include "caffe2/core/allocator.h"
#include "caffe2/core/common.h" #include "caffe2/core/common.h"
@ -99,6 +99,39 @@ inline int canonical_axis_index_(int axis_index, int ndims) {
return axis_index; return axis_index;
} }
using PlacementDtor = void (*)(void*, size_t);
/*
* A Context that will call extra placement deleter during
* deconstruction.
*
* Accept a already constructed DataPtr and store it as member
* during destruction, we'll call extra deleter on the underlying
* data pointer before the DataPtr is destructed.
* `data_ptr_` owns the memory.
*/
struct CAFFE2_API PlacementDeleteContext {
at::DataPtr data_ptr_;
PlacementDtor placement_dtor_;
size_t size_;
PlacementDeleteContext(
at::DataPtr&& data_ptr,
PlacementDtor placement_dtor,
size_t size)
: data_ptr_(std::move(data_ptr)),
placement_dtor_(placement_dtor),
size_(size) {}
static at::DataPtr makeDataPtr(
at::DataPtr&& data_ptr,
PlacementDtor placement_dtor,
size_t size,
at::Device device);
~PlacementDeleteContext() {
placement_dtor_(data_ptr_.get(), size_);
// original memory will be freed when data_ptr_ is destructed
}
};
/** /**
* The low-level representation of a tensor, which contains a storage * The low-level representation of a tensor, which contains a storage
* (which contains the actual data) and metadata (e.g., sizes and strides) * (which contains the actual data) and metadata (e.g., sizes and strides)
@ -734,29 +767,19 @@ struct CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
// destruction procedure. // destruction procedure.
auto size = numel_; auto size = numel_;
auto dtor = data_type_.dtor(); auto dtor = data_type_.dtor();
void* ptr; auto data_ptr = GetStaticContext()->New(
at::DeleterFnPtr deleter;
auto ptr_and_deleter = GetStaticContext()->New(
numel_ * storage_.itemsize()); // Removing this can get rid of numel_ * storage_.itemsize()); // Removing this can get rid of
// InefficientStdFunctionContext // InefficientStdFunctionContext
ptr = ptr_and_deleter.first; storage_.set_data_ptr(PlacementDeleteContext::makeDataPtr(
deleter = ptr_and_deleter.second; std::move(data_ptr),
storage_.set_data_ptr(at::InefficientStdFunctionContext::makeDataPtr( dtor,
ptr, size,
[size, dtor, deleter](void* local_ptr) -> void {
dtor(local_ptr, size);
deleter(local_ptr);
},
at::Device(storage_.device_type()))); at::Device(storage_.device_type())));
data_type_.ctor()(storage_.data(), numel_); data_type_.ctor()(storage_.data(), numel_);
} else { } else {
// For fundamental type, new and delete is easier. // For fundamental type, new and delete is easier.
auto ptr_and_deleter = storage_.set_data_ptr(
GetStaticContext()->New(numel_ * storage_.itemsize()); GetStaticContext()->New(numel_ * storage_.itemsize()));
storage_.set_data_ptr(at::InefficientStdFunctionContext::makeDataPtr(
ptr_and_deleter.first,
ptr_and_deleter.second,
at::Device(storage_.device_type())));
} }
storage_.set_numel(numel_); storage_.set_numel(numel_);
AT_ASSERT(storage_offset_ == 0); // because we just reallocated AT_ASSERT(storage_offset_ == 0); // because we just reallocated

View File

@ -63,6 +63,10 @@ class UniqueVoidPtr {
void* release_context() { void* release_context() {
return ctx_.release(); return ctx_.release();
} }
std::unique_ptr<void, DeleterFnPtr>&& move_context() {
return std::move(ctx_);
}
template <typename T> template <typename T>
T* cast_context(DeleterFnPtr expected_deleter) const { T* cast_context(DeleterFnPtr expected_deleter) const {
if (get_deleter() != expected_deleter) if (get_deleter() != expected_deleter)

View File

@ -7,6 +7,7 @@
#include <unordered_map> #include <unordered_map>
#include <ATen/core/ATenGeneral.h> #include <ATen/core/ATenGeneral.h>
#include <ATen/core/Allocator.h>
#include <ATen/core/Device.h> #include <ATen/core/Device.h>
#include <ATen/core/Error.h> #include <ATen/core/Error.h>
#include <ATen/core/UniqueVoidPtr.h> #include <ATen/core/UniqueVoidPtr.h>
@ -30,7 +31,7 @@ class CAFFE2_API BaseStaticContext {
public: public:
virtual ~BaseStaticContext() noexcept {} virtual ~BaseStaticContext() noexcept {}
virtual std::pair<void*, DeleterFnPtr> New(size_t nbytes) const = 0; virtual at::DataPtr New(size_t nbytes) const = 0;
virtual DeviceType GetDeviceType() = 0; virtual DeviceType GetDeviceType() = 0;

View File

@ -190,9 +190,8 @@ BENCHMARK(BM_OperatorCreationCUDA);
static void BM_RawAllocDeallocCPU(benchmark::State& state) { static void BM_RawAllocDeallocCPU(benchmark::State& state) {
while (state.KeepRunning()) { while (state.KeepRunning()) {
// Allocating only 1 byte in order to measure the overhead. // Allocating only 1 byte in order to measure the overhead.
auto ptr_and_deleter = GetCPUAllocator()->New(1); auto data_ptr = GetCPUAllocator()->allocate(1);
// Deallocate. // Deallocated when it's out of scope
ptr_and_deleter.second(ptr_and_deleter.first);
} }
} }
BENCHMARK(BM_RawAllocDeallocCPU); BENCHMARK(BM_RawAllocDeallocCPU);

View File

@ -16,16 +16,17 @@ namespace caffe2 {
void NoDelete(void*) {} void NoDelete(void*) {}
static std::unique_ptr<CPUAllocator> g_cpu_allocator(new DefaultCPUAllocator()); static std::unique_ptr<at::Allocator> g_cpu_allocator(
CPUAllocator* GetCPUAllocator() { new DefaultCPUAllocator());
at::Allocator* GetCPUAllocator() {
return g_cpu_allocator.get(); return g_cpu_allocator.get();
} }
void SetCPUAllocator(CPUAllocator* alloc) { void SetCPUAllocator(at::Allocator* alloc) {
g_cpu_allocator.reset(alloc); g_cpu_allocator.reset(alloc);
} }
MemoryAllocationReporter CPUStaticContext::reporter_; MemoryAllocationReporter DefaultCPUAllocator::reporter_;
void MemoryAllocationReporter::New(void* ptr, size_t nbytes) { void MemoryAllocationReporter::New(void* ptr, size_t nbytes) {
std::lock_guard<std::mutex> guard(mutex_); std::lock_guard<std::mutex> guard(mutex_);

View File

@ -4,6 +4,7 @@
#include <cstring> #include <cstring>
#include <unordered_map> #include <unordered_map>
#include <ATen/core/Allocator.h>
#include "caffe2/core/logging.h" #include "caffe2/core/logging.h"
#include "caffe2/core/numa.h" #include "caffe2/core/numa.h"
@ -42,10 +43,10 @@ class CAFFE2_API MemoryAllocationReporter {
size_t allocated_; size_t allocated_;
}; };
struct CAFFE2_API DefaultCPUAllocator final : CPUAllocator { struct CAFFE2_API DefaultCPUAllocator final : at::Allocator {
DefaultCPUAllocator() {} DefaultCPUAllocator() {}
~DefaultCPUAllocator() override {} ~DefaultCPUAllocator() override {}
std::pair<void*, MemoryDeleter> New(size_t nbytes) override { at::DataPtr allocate(size_t nbytes) const override {
void* data = nullptr; void* data = nullptr;
#ifdef __ANDROID__ #ifdef __ANDROID__
data = memalign(gCaffe2Alignment, nbytes); data = memalign(gCaffe2Alignment, nbytes);
@ -60,7 +61,11 @@ struct CAFFE2_API DefaultCPUAllocator final : CPUAllocator {
if (FLAGS_caffe2_cpu_allocator_do_zero_fill) { if (FLAGS_caffe2_cpu_allocator_do_zero_fill) {
memset(data, 0, nbytes); memset(data, 0, nbytes);
} }
return {data, Delete}; if (FLAGS_caffe2_report_cpu_memory_usage) {
reporter_.New(data, nbytes);
return {data, data, &ReportAndDelete, at::Device(at::DeviceType::CPU)};
}
return {data, data, &Delete, at::Device(at::DeviceType::CPU)};
} }
#ifdef _MSC_VER #ifdef _MSC_VER
@ -73,16 +78,27 @@ struct CAFFE2_API DefaultCPUAllocator final : CPUAllocator {
} }
#endif #endif
MemoryDeleter GetDeleter() override { static void ReportAndDelete(void* ptr) {
return Delete; reporter_.Delete(ptr);
Delete(ptr);
} }
at::DeleterFnPtr raw_deleter() const override {
if (FLAGS_caffe2_report_cpu_memory_usage) {
return &ReportAndDelete;
}
return &Delete;
}
protected:
static MemoryAllocationReporter reporter_;
}; };
// Get the CPU Alloctor. // Get the CPU Alloctor.
CAFFE2_API CPUAllocator* GetCPUAllocator(); CAFFE2_API at::Allocator* GetCPUAllocator();
// Sets the CPU allocator to the given allocator: the caller gives away the // Sets the CPU allocator to the given allocator: the caller gives away the
// ownership of the pointer. // ownership of the pointer.
CAFFE2_API void SetCPUAllocator(CPUAllocator* alloc); CAFFE2_API void SetCPUAllocator(at::Allocator* alloc);
} // namespace caffe2 } // namespace caffe2

View File

@ -13,8 +13,8 @@
#include "caffe2/core/typeid.h" #include "caffe2/core/typeid.h"
#include "caffe2/proto/caffe2_pb.h" #include "caffe2/proto/caffe2_pb.h"
#include "ATen/core/ATenCoreTest.h" #include <ATen/core/ATenCoreTest.h>
#include "ATen/core/ArrayRef.h" #include <ATen/core/ArrayRef.h>
CAFFE2_DECLARE_bool(caffe2_report_cpu_memory_usage); CAFFE2_DECLARE_bool(caffe2_report_cpu_memory_usage);
@ -85,7 +85,7 @@ class CAFFE2_API CPUContext final : public BaseContext {
return *random_generator_.get(); return *random_generator_.get();
} }
inline static std::pair<void*, MemoryDeleter> New(size_t nbytes) { inline static at::DataPtr New(size_t nbytes) {
return StaticContext()->New(nbytes); return StaticContext()->New(nbytes);
} }
@ -185,13 +185,8 @@ inline void CPUContext::CopyBytes<CPUContext, CPUContext>(
// TODO(jerryzh): merge CPUStaticContext with Allocator // TODO(jerryzh): merge CPUStaticContext with Allocator
class CAFFE2_API CPUStaticContext : public BaseStaticContext { class CAFFE2_API CPUStaticContext : public BaseStaticContext {
public: public:
std::pair<void*, MemoryDeleter> New(size_t nbytes) const override { at::DataPtr New(size_t nbytes) const override {
auto data_and_deleter = GetCPUAllocator()->New(nbytes); return GetCPUAllocator()->allocate(nbytes);
if (FLAGS_caffe2_report_cpu_memory_usage) {
reporter_.New(data_and_deleter.first, nbytes);
data_and_deleter.second = ReportAndDelete;
}
return data_and_deleter;
} }
DeviceType GetDeviceType() override { DeviceType GetDeviceType() override {
@ -204,14 +199,6 @@ class CAFFE2_API CPUStaticContext : public BaseStaticContext {
device->set_device_type(TypeToProto(GetDeviceType())); device->set_device_type(TypeToProto(GetDeviceType()));
} }
protected:
static MemoryAllocationReporter reporter_;
private:
static void ReportAndDelete(void* ptr) {
reporter_.Delete(ptr);
GetCPUAllocator()->GetDeleter()(ptr);
}
}; };
} // namespace caffe2 } // namespace caffe2

View File

@ -314,7 +314,8 @@ void TrackMemoryAlloc(size_t nbytes) {
} }
} }
std::pair<void*, MemoryDeleter> CUDAStaticContext::New(size_t nbytes) const { // TODO: wrap this function in DefaultCUDAAllocator
at::DataPtr CUDAStaticContext::New(size_t nbytes) const {
// Lock the mutex // Lock the mutex
std::lock_guard<std::mutex> lock(CUDAContext::mutex()); std::lock_guard<std::mutex> lock(CUDAContext::mutex());
// A one-time caffe2 cuda initializer. // A one-time caffe2 cuda initializer.
@ -331,7 +332,7 @@ std::pair<void*, MemoryDeleter> CUDAStaticContext::New(size_t nbytes) const {
g_size_map[ptr] = nbytes; g_size_map[ptr] = nbytes;
g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice(); g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice();
} }
return {ptr, Delete}; return {ptr, ptr, Delete, at::Device(CUDA)};
case CudaMemoryPoolType::CUB: case CudaMemoryPoolType::CUB:
CUDA_ENFORCE(g_cub_allocator->DeviceAllocate(&ptr, nbytes)); CUDA_ENFORCE(g_cub_allocator->DeviceAllocate(&ptr, nbytes));
g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice(); g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice();
@ -340,16 +341,16 @@ std::pair<void*, MemoryDeleter> CUDAStaticContext::New(size_t nbytes) const {
if (FLAGS_caffe2_gpu_memory_tracking) { if (FLAGS_caffe2_gpu_memory_tracking) {
g_size_map[ptr] = nbytes; g_size_map[ptr] = nbytes;
} }
return {ptr, Delete}; return {ptr, ptr, Delete, at::Device(CUDA)};
case CudaMemoryPoolType::THC: case CudaMemoryPoolType::THC:
CUDA_ENFORCE(g_thc_allocator->Alloc(&ptr, nbytes, 0 /* stream */)); CUDA_ENFORCE(g_thc_allocator->Alloc(&ptr, nbytes, 0 /* stream */));
if (FLAGS_caffe2_gpu_memory_tracking) { if (FLAGS_caffe2_gpu_memory_tracking) {
g_size_map[ptr] = nbytes; g_size_map[ptr] = nbytes;
g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice(); g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice();
} }
return {ptr, Delete}; return {ptr, ptr, Delete, at::Device(CUDA)};
} }
return {nullptr, Delete}; return {nullptr, nullptr, Delete, at::Device(CUDA)};
} }
void CUDAStaticContext::Delete(void* ptr) { void CUDAStaticContext::Delete(void* ptr) {

View File

@ -223,7 +223,7 @@ class CAFFE2_CUDA_API CUDAContext final : public BaseContext {
return curand_generator_; return curand_generator_;
} }
inline static std::pair<void*, MemoryDeleter> New(size_t nbytes) { inline static at::DataPtr New(size_t nbytes) {
return StaticContext()->New(nbytes); return StaticContext()->New(nbytes);
} }
@ -334,26 +334,28 @@ inline void CPUContext::CopyBytes<CPUContext, CUDAContext>(
* GPU present during runtime, at global initialization time we will set * GPU present during runtime, at global initialization time we will set
* the CPU memory allocator to allocate pinned memory. * the CPU memory allocator to allocate pinned memory.
*/ */
struct CAFFE2_CUDA_API PinnedCPUAllocator final : CPUAllocator { struct CAFFE2_CUDA_API PinnedCPUAllocator final : public at::Allocator {
PinnedCPUAllocator() {} PinnedCPUAllocator() {}
~PinnedCPUAllocator() override {} ~PinnedCPUAllocator() override {}
std::pair<void*, MemoryDeleter> New(size_t nbytes) override { at::DataPtr allocate(size_t nbytes) const override {
void* data; void* data;
at::DataPtr data_ptr;
std::lock_guard<std::mutex> lock(CUDAContext::mutex()); std::lock_guard<std::mutex> lock(CUDAContext::mutex());
if (IsNUMAEnabled()) { if (IsNUMAEnabled()) {
auto ptr_and_deleter = baseAllocator_.New(nbytes); data_ptr = baseAllocator_.allocate(nbytes);
data = ptr_and_deleter.first; data = data_ptr.get();
CAFFE_ENFORCE(data); CAFFE_ENFORCE(data);
CUDA_ENFORCE(cudaHostRegister(data, nbytes, cudaHostRegisterDefault)); CUDA_ENFORCE(cudaHostRegister(data, nbytes, cudaHostRegisterDefault));
} else { } else {
CUDA_ENFORCE(cudaMallocHost(&data, nbytes)); CUDA_ENFORCE(cudaMallocHost(&data, nbytes));
data_ptr = {data, data, &Delete, at::Device(CPU)};
} }
memset(data, 0, nbytes); memset(data, 0, nbytes);
return {data, Delete}; return data_ptr;
} }
MemoryDeleter GetDeleter() override { at::DeleterFnPtr raw_deleter() const override {
return Delete; return &Delete;
} }
private: private:
@ -385,13 +387,14 @@ struct CAFFE2_CUDA_API PinnedCPUAllocator final : CPUAllocator {
class CAFFE2_CUDA_API CUDAStaticContext final : public BaseStaticContext { class CAFFE2_CUDA_API CUDAStaticContext final : public BaseStaticContext {
public: public:
std::pair<void*, MemoryDeleter> New(size_t nbytes) const override; at::DataPtr New(size_t nbytes) const override;
DeviceType GetDeviceType() override { DeviceType GetDeviceType() override {
return CUDA; return CUDA;
} }
void ExtractDeviceOption(DeviceOption* device, const void* data) override { void ExtractDeviceOption(DeviceOption* device, const void* data) override {
CAFFE_ENFORCE(data, "data cannot be nullptr");
device->set_device_type(TypeToProto(GetDeviceType())); device->set_device_type(TypeToProto(GetDeviceType()));
device->set_cuda_gpu_id(GetGPUIDForPointer(data)); device->set_cuda_gpu_id(GetGPUIDForPointer(data));
} }

View File

@ -11,12 +11,6 @@ CAFFE2_DECLARE_bool(caffe2_cuda_full_device_control);
namespace caffe2 { namespace caffe2 {
namespace {
std::shared_ptr<void> shared_from_new(std::pair<void*, MemoryDeleter>&& p) {
return std::shared_ptr<void>(p.first, std::move(p.second));
}
}
TEST(CUDATest, HasCudaRuntime) { TEST(CUDATest, HasCudaRuntime) {
EXPECT_TRUE(HasCudaRuntime()); EXPECT_TRUE(HasCudaRuntime());
} }
@ -25,7 +19,7 @@ TEST(CUDAContextTest, TestAllocDealloc) {
if (!HasCudaGPU()) return; if (!HasCudaGPU()) return;
CUDAContext context(0); CUDAContext context(0);
context.SwitchToDevice(); context.SwitchToDevice();
auto data = shared_from_new(CUDAContext::New(10 * sizeof(float))); auto data = CUDAContext::New(10 * sizeof(float));
EXPECT_NE(data.get(), nullptr); EXPECT_NE(data.get(), nullptr);
} }
@ -66,20 +60,20 @@ TEST(CUDAContextTest, MemoryPoolAllocateDealloc) {
for (int i = 0; i < NumCudaDevices(); ++i) { for (int i = 0; i < NumCudaDevices(); ++i) {
LOG(INFO) << "Device " << i << " of " << NumCudaDevices(); LOG(INFO) << "Device " << i << " of " << NumCudaDevices();
DeviceGuard guard(i); DeviceGuard guard(i);
auto allocated = shared_from_new(CUDAContext::New(nbytes)); auto allocated = CUDAContext::New(nbytes);
EXPECT_NE(allocated, nullptr); EXPECT_NE(allocated, nullptr);
cudaPointerAttributes attr; cudaPointerAttributes attr;
CUDA_ENFORCE(cudaPointerGetAttributes(&attr, allocated.get())); CUDA_ENFORCE(cudaPointerGetAttributes(&attr, allocated.get()));
EXPECT_EQ(attr.memoryType, cudaMemoryTypeDevice); EXPECT_EQ(attr.memoryType, cudaMemoryTypeDevice);
EXPECT_EQ(attr.device, i); EXPECT_EQ(attr.device, i);
void* prev_allocated = allocated.get(); void* prev_allocated = allocated.get();
allocated.reset(); allocated.clear();
auto new_allocated = shared_from_new(CUDAContext::New(nbytes)); auto new_allocated = CUDAContext::New(nbytes);
// With a pool, the above allocation should yield the same address. // With a pool, the above allocation should yield the same address.
EXPECT_EQ(new_allocated.get(), prev_allocated); EXPECT_EQ(new_allocated.get(), prev_allocated);
// But, if we are allocating something larger, we will have a different // But, if we are allocating something larger, we will have a different
// chunk of memory. // chunk of memory.
auto larger_allocated = shared_from_new(CUDAContext::New(nbytes * 2)); auto larger_allocated = CUDAContext::New(nbytes * 2);
EXPECT_NE(larger_allocated.get(), prev_allocated); EXPECT_NE(larger_allocated.get(), prev_allocated);
} }
} }

View File

@ -14,17 +14,17 @@ TEST(CPUContextTest, ATenCoreTest) {
TEST(CPUContextTest, TestAllocAlignment) { TEST(CPUContextTest, TestAllocAlignment) {
for (int i = 1; i < 10; ++i) { for (int i = 1; i < 10; ++i) {
auto data = CPUContext::New(i); auto data = CPUContext::New(i);
EXPECT_EQ((reinterpret_cast<size_t>(data.first) % gCaffe2Alignment), 0); EXPECT_EQ((reinterpret_cast<size_t>(data.get()) % gCaffe2Alignment), 0);
data.second(data.first); // data is freed when out of scope
} }
} }
TEST(CPUContextTest, TestAllocDealloc) { TEST(CPUContextTest, TestAllocDealloc) {
auto data_and_deleter = CPUContext::New(10 * sizeof(float)); auto data_ptr = CPUContext::New(10 * sizeof(float));
float* data = static_cast<float*>(data_and_deleter.first); float* data = static_cast<float*>(data_ptr.get());
EXPECT_NE(data, nullptr); EXPECT_NE(data, nullptr);
auto dst_data_and_deleter = CPUContext::New(10 * sizeof(float)); auto dst_data_ptr = CPUContext::New(10 * sizeof(float));
float* dst_data = static_cast<float*>(dst_data_and_deleter.first); float* dst_data = static_cast<float*>(dst_data_ptr.get());
EXPECT_NE(dst_data, nullptr); EXPECT_NE(dst_data, nullptr);
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
data[i] = i; data[i] = i;
@ -35,8 +35,7 @@ TEST(CPUContextTest, TestAllocDealloc) {
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
EXPECT_FLOAT_EQ(dst_data[i], i); EXPECT_FLOAT_EQ(dst_data[i], i);
} }
data_and_deleter.second(data); // data_ptr is freed when out of scope
dst_data_and_deleter.second(dst_data);
} }
} // namespace caffe2 } // namespace caffe2

View File

@ -24,8 +24,7 @@ struct CuDNNWorkspace {
void* get(size_t nbytes) { void* get(size_t nbytes) {
if (nbytes_ < nbytes) { if (nbytes_ < nbytes) {
reset(); reset();
auto data_and_deleter = CUDAContext::New(nbytes); data_ = CUDAContext::New(nbytes);
data_ = {data_and_deleter.first, data_and_deleter.second};
nbytes_ = nbytes; nbytes_ = nbytes;
} }
CAFFE_ENFORCE_GE(nbytes_, nbytes); CAFFE_ENFORCE_GE(nbytes_, nbytes);
@ -33,12 +32,12 @@ struct CuDNNWorkspace {
} }
void reset() { void reset() {
data_ = nullptr; data_.clear();
nbytes_ = 0; nbytes_ = 0;
} }
private: private:
std::unique_ptr<void, MemoryDeleter> data_{nullptr, NoDelete}; at::DataPtr data_{nullptr, nullptr, &NoDelete, at::Device(CUDA)};
size_t nbytes_{0}; size_t nbytes_{0};
}; };

View File

@ -326,7 +326,7 @@ void TrackMemoryAlloc(size_t nbytes)
} }
} }
std::pair<void*, MemoryDeleter> HIPStaticContext::New(size_t nbytes) const { at::DataPtr HIPStaticContext::New(size_t nbytes) const {
// Lock the mutex // Lock the mutex
std::lock_guard<std::mutex> lock(HIPContext::mutex()); std::lock_guard<std::mutex> lock(HIPContext::mutex());
// A one-time caffe2 cuda initializer. // A one-time caffe2 cuda initializer.
@ -344,7 +344,7 @@ std::pair<void*, MemoryDeleter> HIPStaticContext::New(size_t nbytes) const {
g_size_map[ptr] = nbytes; g_size_map[ptr] = nbytes;
g_hip_device_affiliation[ptr] = CaffeHipGetDevice(); g_hip_device_affiliation[ptr] = CaffeHipGetDevice();
} }
return {ptr, Delete}; return {ptr, ptr, &Delete, at::Device(HIP)};
case HipMemoryPoolType::CUB: case HipMemoryPoolType::CUB:
HIP_ENFORCE(g_cub_allocator->DeviceAllocate(&ptr, nbytes)); HIP_ENFORCE(g_cub_allocator->DeviceAllocate(&ptr, nbytes));
g_hip_device_affiliation[ptr] = CaffeHipGetDevice(); g_hip_device_affiliation[ptr] = CaffeHipGetDevice();
@ -353,7 +353,7 @@ std::pair<void*, MemoryDeleter> HIPStaticContext::New(size_t nbytes) const {
{ {
g_size_map[ptr] = nbytes; g_size_map[ptr] = nbytes;
} }
return {ptr, Delete}; return {ptr, ptr, &Delete, at::Device(HIP)};
case HipMemoryPoolType::THC: case HipMemoryPoolType::THC:
HIP_ENFORCE(g_thc_allocator->Alloc(&ptr, nbytes, 0 /* stream */)); HIP_ENFORCE(g_thc_allocator->Alloc(&ptr, nbytes, 0 /* stream */));
if (FLAGS_caffe2_gpu_memory_tracking) if (FLAGS_caffe2_gpu_memory_tracking)
@ -361,9 +361,9 @@ std::pair<void*, MemoryDeleter> HIPStaticContext::New(size_t nbytes) const {
g_size_map[ptr] = nbytes; g_size_map[ptr] = nbytes;
g_hip_device_affiliation[ptr] = CaffeHipGetDevice(); g_hip_device_affiliation[ptr] = CaffeHipGetDevice();
} }
return {ptr, Delete}; return {ptr, ptr, &Delete, at::Device(HIP)};
} }
return {nullptr, Delete}; return {nullptr, nullptr, &Delete, at::Device(HIP)};
} }
void HIPStaticContext::Delete(void* ptr) { void HIPStaticContext::Delete(void* ptr) {

View File

@ -206,7 +206,7 @@ class HIPContext final : public BaseContext {
return hiprand_generator_; return hiprand_generator_;
} }
static std::pair<void*, MemoryDeleter> New(size_t nbytes) { static at::DataPtr New(size_t nbytes) {
return StaticContext()->New(nbytes); return StaticContext()->New(nbytes);
} }
@ -323,26 +323,28 @@ inline void CPUContext::CopyBytes<CPUContext, HIPContext>(
* GPU present during runtime, at global initialization time we will set * GPU present during runtime, at global initialization time we will set
* the CPU memory allocator to allocate pinned memory. * the CPU memory allocator to allocate pinned memory.
*/ */
struct PinnedCPUAllocator final : CPUAllocator { struct PinnedCPUAllocator final : public at::Allocator {
PinnedCPUAllocator() {} PinnedCPUAllocator() {}
~PinnedCPUAllocator() override {} ~PinnedCPUAllocator() override {}
std::pair<void*, MemoryDeleter> New(size_t nbytes) override { at::DataPtr allocate(size_t nbytes) const override {
void* data; void* data;
at::DataPtr data_ptr;
std::lock_guard<std::mutex> lock(HIPContext::mutex()); std::lock_guard<std::mutex> lock(HIPContext::mutex());
if (IsNUMAEnabled()) { if (IsNUMAEnabled()) {
auto ptr_and_deleter = baseAllocator_.New(nbytes); data_ptr = baseAllocator_.allocate(nbytes);
data = ptr_and_deleter.first; data = data_ptr.get();
CAFFE_ENFORCE(data); CAFFE_ENFORCE(data);
HIP_ENFORCE(hipHostRegister(data, nbytes, hipHostRegisterDefault)); HIP_ENFORCE(hipHostRegister(data, nbytes, hipHostRegisterDefault));
} else { } else {
HIP_ENFORCE(hipHostMalloc(&data, nbytes)); HIP_ENFORCE(hipHostMalloc(&data, nbytes));
data_ptr = {data, data, &Delete, at::Device(CPU)};
} }
memset(data, 0, nbytes); memset(data, 0, nbytes);
return {data, Delete}; return data_ptr;
} }
MemoryDeleter GetDeleter() override { at::DeleterFnPtr raw_deleter() const override {
return Delete; return &Delete;
} }
private: private:
@ -374,7 +376,7 @@ struct PinnedCPUAllocator final : CPUAllocator {
class HIPStaticContext final : public BaseStaticContext { class HIPStaticContext final : public BaseStaticContext {
public: public:
std::pair<void*, MemoryDeleter> New(size_t nbytes) const override; at::DataPtr New(size_t nbytes) const override;
DeviceType GetDeviceType() override { DeviceType GetDeviceType() override {
return HIP; return HIP;

View File

@ -26,8 +26,7 @@ struct MIOPENWorkspace
if(nbytes_ < nbytes) if(nbytes_ < nbytes)
{ {
reset(); reset();
auto data_and_deleter = HIPContext::New(nbytes); data_ = HIPContext::New(nbytes);
data_ = {data_and_deleter.first, data_and_deleter.second};
nbytes_ = nbytes; nbytes_ = nbytes;
} }
CAFFE_ENFORCE_GE(nbytes_, nbytes); CAFFE_ENFORCE_GE(nbytes_, nbytes);
@ -36,13 +35,13 @@ struct MIOPENWorkspace
void reset() void reset()
{ {
data_ = nullptr; data_.clear();
nbytes_ = 0; nbytes_ = 0;
} }
private: private:
std::unique_ptr<void, MemoryDeleter> data_{nullptr, NoDelete}; at::DataPtr data_;
size_t nbytes_{0}; size_t nbytes_{0};
}; };
// MIOPENState is the owner of the MIOPENWorkspace, and serializes all // MIOPENState is the owner of the MIOPENWorkspace, and serializes all

View File

@ -59,7 +59,7 @@ class C10_EXPORT QTensor {
size_t source_size = std::accumulate( size_t source_size = std::accumulate(
dim_source.begin(), dim_source.end(), 1, std::multiplies<int>()); dim_source.begin(), dim_source.end(), 1, std::multiplies<int>());
if ((source_size * (precision_ + signed_)) > capacity_) { if ((source_size * (precision_ + signed_)) > capacity_) {
data_.reset(); data_ptr_.clear();
capacity_ = 0; capacity_ = 0;
} }
dims_ = dim_source; dims_ = dim_source;
@ -104,12 +104,12 @@ class C10_EXPORT QTensor {
void SetPrecision(const unsigned char precision) { void SetPrecision(const unsigned char precision) {
precision_ = precision; precision_ = precision;
data_.reset(); data_ptr_.clear();
} }
void SetSigned(const bool make_signed = true) { void SetSigned(const bool make_signed = true) {
signed_ = make_signed; signed_ = make_signed;
data_.reset(); data_ptr_.clear();
} }
void SetScale(const double scale) { void SetScale(const double scale) {
@ -121,19 +121,16 @@ class C10_EXPORT QTensor {
} }
unsigned char* mutable_data() { unsigned char* mutable_data() {
if (!data_) { if (!data_ptr_) {
auto ptr_and_deleter = Context::New(nbytes()); data_ptr_ = Context::New(nbytes());
data_.reset(
static_cast<unsigned char*>(ptr_and_deleter.first),
ptr_and_deleter.second);
capacity_ = nbytes() * CHAR_BIT; capacity_ = nbytes() * CHAR_BIT;
} }
CAFFE_ENFORCE(capacity_ == nbytes() * CHAR_BIT); CAFFE_ENFORCE(capacity_ == nbytes() * CHAR_BIT);
return data_.get(); return static_cast<unsigned char*>(data_ptr_.get());
} }
inline const unsigned char* data() const { inline const unsigned char* data() const {
return data_.get(); return static_cast<unsigned char*>(data_ptr_.get());
} }
inline size_t size() const { inline size_t size() const {
@ -242,7 +239,7 @@ class C10_EXPORT QTensor {
unsigned char alignment_ = CHAR_BIT; unsigned char alignment_ = CHAR_BIT;
// Allocated data. // Allocated data.
std::shared_ptr<unsigned char> data_; at::DataPtr data_ptr_;
// value = scale_ * (x + bias_) // value = scale_ * (x + bias_)
double scale_; double scale_;

View File

@ -55,7 +55,7 @@ class IDEEPContext final : public BaseContext {
return *random_generator_.get(); return *random_generator_.get();
} }
inline static std::pair<void*, MemoryDeleter> New(size_t nbytes) { inline static at::DataPtr New(size_t nbytes) {
return StaticContext()->New(nbytes); return StaticContext()->New(nbytes);
} }
@ -176,8 +176,8 @@ inline void IDEEPContext::CopyBytes<IDEEPContext, CPUContext>(
class IDEEPStaticContext : public BaseStaticContext { class IDEEPStaticContext : public BaseStaticContext {
public: public:
inline std::pair<void*, MemoryDeleter> New(size_t nbytes) const override { inline at::DataPtr New(size_t nbytes) const override {
return GetCPUAllocator()->New(nbytes); return GetCPUAllocator()->allocate(nbytes);
} }
DeviceType GetDeviceType() override { DeviceType GetDeviceType() override {

View File

@ -62,7 +62,7 @@ class MKLContext : public BaseContext {
return *random_generator_.get(); return *random_generator_.get();
} }
inline static std::pair<void*, MemoryDeleter> New(size_t nbytes) { inline static at::DataPtr New(size_t nbytes) {
return StaticContext()->New(nbytes); return StaticContext()->New(nbytes);
} }
@ -153,8 +153,8 @@ inline void MKLContext::CopyBytes<MKLContext, MKLContext>(
class MKLStaticContext : public BaseStaticContext { class MKLStaticContext : public BaseStaticContext {
public: public:
inline std::pair<void*, MemoryDeleter> New(size_t nbytes) const override { inline at::DataPtr New(size_t nbytes) const override {
return GetCPUAllocator()->New(nbytes); return GetCPUAllocator()->allocate(nbytes);
} }
DeviceType GetDeviceType() override { DeviceType GetDeviceType() override {