mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
New in StaticContext returns at::DataPtr (#12029)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/12029 In order to remove New() function in StaticContext(to remove StaticContext) and converge to the Allocator design, we'll first change the return type of New to at::DataPtr. Reviewed By: ezyang Differential Revision: D9889990 fbshipit-source-id: 3257c763530b987025f428741bdd2e089d11bad4
This commit is contained in:
committed by
Facebook Github Bot
parent
bcc2a0599b
commit
74dc4460eb
@ -43,6 +43,9 @@ class DataPtr {
|
|||||||
void* release_context() {
|
void* release_context() {
|
||||||
return ptr_.release_context();
|
return ptr_.release_context();
|
||||||
}
|
}
|
||||||
|
std::unique_ptr<void, DeleterFnPtr>&& move_context() {
|
||||||
|
return ptr_.move_context();
|
||||||
|
}
|
||||||
operator bool() const {
|
operator bool() const {
|
||||||
return static_cast<bool>(ptr_);
|
return static_cast<bool>(ptr_);
|
||||||
}
|
}
|
||||||
@ -50,6 +53,9 @@ class DataPtr {
|
|||||||
T* cast_context(DeleterFnPtr expected_deleter) const {
|
T* cast_context(DeleterFnPtr expected_deleter) const {
|
||||||
return ptr_.cast_context<T>(expected_deleter);
|
return ptr_.cast_context<T>(expected_deleter);
|
||||||
}
|
}
|
||||||
|
DeleterFnPtr get_deleter() const {
|
||||||
|
return ptr_.get_deleter();
|
||||||
|
}
|
||||||
Device device() const {
|
Device device() const {
|
||||||
return device_;
|
return device_;
|
||||||
}
|
}
|
||||||
|
@ -109,4 +109,20 @@ const Storage& TensorImpl::storage() const {
|
|||||||
return storage_;
|
return storage_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void deletePlacementDeleteContext(void* ptr) {
|
||||||
|
delete static_cast<PlacementDeleteContext*>(ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
at::DataPtr PlacementDeleteContext::makeDataPtr(
|
||||||
|
at::DataPtr&& data_ptr,
|
||||||
|
PlacementDtor placement_dtor,
|
||||||
|
size_t size,
|
||||||
|
at::Device device) {
|
||||||
|
auto* ptr = data_ptr.get();
|
||||||
|
return {ptr,
|
||||||
|
new PlacementDeleteContext(std::move(data_ptr), placement_dtor, size),
|
||||||
|
&deletePlacementDeleteContext,
|
||||||
|
device};
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace at
|
} // namespace at
|
||||||
|
@ -3,13 +3,13 @@
|
|||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
#include "ATen/core/Storage.h"
|
#include <ATen/core/Backend.h>
|
||||||
#include "ATen/core/optional.h"
|
#include <ATen/core/LegacyTypeDispatch.h>
|
||||||
#include "ATen/core/TensorTypeId.h"
|
#include <ATen/core/Storage.h>
|
||||||
#include "ATen/core/TensorTypeIdRegistration.h"
|
#include <ATen/core/TensorTypeId.h>
|
||||||
#include "ATen/core/LegacyTypeDispatch.h"
|
#include <ATen/core/TensorTypeIdRegistration.h>
|
||||||
#include "ATen/core/Backend.h"
|
#include <ATen/core/context_base.h>
|
||||||
#include "ATen/core/context_base.h"
|
#include <ATen/core/optional.h>
|
||||||
|
|
||||||
#include "caffe2/core/allocator.h"
|
#include "caffe2/core/allocator.h"
|
||||||
#include "caffe2/core/common.h"
|
#include "caffe2/core/common.h"
|
||||||
@ -99,6 +99,39 @@ inline int canonical_axis_index_(int axis_index, int ndims) {
|
|||||||
return axis_index;
|
return axis_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
using PlacementDtor = void (*)(void*, size_t);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A Context that will call extra placement deleter during
|
||||||
|
* deconstruction.
|
||||||
|
*
|
||||||
|
* Accept a already constructed DataPtr and store it as member
|
||||||
|
* during destruction, we'll call extra deleter on the underlying
|
||||||
|
* data pointer before the DataPtr is destructed.
|
||||||
|
* `data_ptr_` owns the memory.
|
||||||
|
*/
|
||||||
|
struct CAFFE2_API PlacementDeleteContext {
|
||||||
|
at::DataPtr data_ptr_;
|
||||||
|
PlacementDtor placement_dtor_;
|
||||||
|
size_t size_;
|
||||||
|
PlacementDeleteContext(
|
||||||
|
at::DataPtr&& data_ptr,
|
||||||
|
PlacementDtor placement_dtor,
|
||||||
|
size_t size)
|
||||||
|
: data_ptr_(std::move(data_ptr)),
|
||||||
|
placement_dtor_(placement_dtor),
|
||||||
|
size_(size) {}
|
||||||
|
static at::DataPtr makeDataPtr(
|
||||||
|
at::DataPtr&& data_ptr,
|
||||||
|
PlacementDtor placement_dtor,
|
||||||
|
size_t size,
|
||||||
|
at::Device device);
|
||||||
|
~PlacementDeleteContext() {
|
||||||
|
placement_dtor_(data_ptr_.get(), size_);
|
||||||
|
// original memory will be freed when data_ptr_ is destructed
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The low-level representation of a tensor, which contains a storage
|
* The low-level representation of a tensor, which contains a storage
|
||||||
* (which contains the actual data) and metadata (e.g., sizes and strides)
|
* (which contains the actual data) and metadata (e.g., sizes and strides)
|
||||||
@ -734,29 +767,19 @@ struct CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
|
|||||||
// destruction procedure.
|
// destruction procedure.
|
||||||
auto size = numel_;
|
auto size = numel_;
|
||||||
auto dtor = data_type_.dtor();
|
auto dtor = data_type_.dtor();
|
||||||
void* ptr;
|
auto data_ptr = GetStaticContext()->New(
|
||||||
at::DeleterFnPtr deleter;
|
|
||||||
auto ptr_and_deleter = GetStaticContext()->New(
|
|
||||||
numel_ * storage_.itemsize()); // Removing this can get rid of
|
numel_ * storage_.itemsize()); // Removing this can get rid of
|
||||||
// InefficientStdFunctionContext
|
// InefficientStdFunctionContext
|
||||||
ptr = ptr_and_deleter.first;
|
storage_.set_data_ptr(PlacementDeleteContext::makeDataPtr(
|
||||||
deleter = ptr_and_deleter.second;
|
std::move(data_ptr),
|
||||||
storage_.set_data_ptr(at::InefficientStdFunctionContext::makeDataPtr(
|
dtor,
|
||||||
ptr,
|
size,
|
||||||
[size, dtor, deleter](void* local_ptr) -> void {
|
|
||||||
dtor(local_ptr, size);
|
|
||||||
deleter(local_ptr);
|
|
||||||
},
|
|
||||||
at::Device(storage_.device_type())));
|
at::Device(storage_.device_type())));
|
||||||
data_type_.ctor()(storage_.data(), numel_);
|
data_type_.ctor()(storage_.data(), numel_);
|
||||||
} else {
|
} else {
|
||||||
// For fundamental type, new and delete is easier.
|
// For fundamental type, new and delete is easier.
|
||||||
auto ptr_and_deleter =
|
storage_.set_data_ptr(
|
||||||
GetStaticContext()->New(numel_ * storage_.itemsize());
|
GetStaticContext()->New(numel_ * storage_.itemsize()));
|
||||||
storage_.set_data_ptr(at::InefficientStdFunctionContext::makeDataPtr(
|
|
||||||
ptr_and_deleter.first,
|
|
||||||
ptr_and_deleter.second,
|
|
||||||
at::Device(storage_.device_type())));
|
|
||||||
}
|
}
|
||||||
storage_.set_numel(numel_);
|
storage_.set_numel(numel_);
|
||||||
AT_ASSERT(storage_offset_ == 0); // because we just reallocated
|
AT_ASSERT(storage_offset_ == 0); // because we just reallocated
|
||||||
|
@ -63,6 +63,10 @@ class UniqueVoidPtr {
|
|||||||
void* release_context() {
|
void* release_context() {
|
||||||
return ctx_.release();
|
return ctx_.release();
|
||||||
}
|
}
|
||||||
|
std::unique_ptr<void, DeleterFnPtr>&& move_context() {
|
||||||
|
return std::move(ctx_);
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
T* cast_context(DeleterFnPtr expected_deleter) const {
|
T* cast_context(DeleterFnPtr expected_deleter) const {
|
||||||
if (get_deleter() != expected_deleter)
|
if (get_deleter() != expected_deleter)
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
#include <ATen/core/ATenGeneral.h>
|
#include <ATen/core/ATenGeneral.h>
|
||||||
|
#include <ATen/core/Allocator.h>
|
||||||
#include <ATen/core/Device.h>
|
#include <ATen/core/Device.h>
|
||||||
#include <ATen/core/Error.h>
|
#include <ATen/core/Error.h>
|
||||||
#include <ATen/core/UniqueVoidPtr.h>
|
#include <ATen/core/UniqueVoidPtr.h>
|
||||||
@ -30,7 +31,7 @@ class CAFFE2_API BaseStaticContext {
|
|||||||
public:
|
public:
|
||||||
virtual ~BaseStaticContext() noexcept {}
|
virtual ~BaseStaticContext() noexcept {}
|
||||||
|
|
||||||
virtual std::pair<void*, DeleterFnPtr> New(size_t nbytes) const = 0;
|
virtual at::DataPtr New(size_t nbytes) const = 0;
|
||||||
|
|
||||||
virtual DeviceType GetDeviceType() = 0;
|
virtual DeviceType GetDeviceType() = 0;
|
||||||
|
|
||||||
|
@ -190,9 +190,8 @@ BENCHMARK(BM_OperatorCreationCUDA);
|
|||||||
static void BM_RawAllocDeallocCPU(benchmark::State& state) {
|
static void BM_RawAllocDeallocCPU(benchmark::State& state) {
|
||||||
while (state.KeepRunning()) {
|
while (state.KeepRunning()) {
|
||||||
// Allocating only 1 byte in order to measure the overhead.
|
// Allocating only 1 byte in order to measure the overhead.
|
||||||
auto ptr_and_deleter = GetCPUAllocator()->New(1);
|
auto data_ptr = GetCPUAllocator()->allocate(1);
|
||||||
// Deallocate.
|
// Deallocated when it's out of scope
|
||||||
ptr_and_deleter.second(ptr_and_deleter.first);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
BENCHMARK(BM_RawAllocDeallocCPU);
|
BENCHMARK(BM_RawAllocDeallocCPU);
|
||||||
|
@ -16,16 +16,17 @@ namespace caffe2 {
|
|||||||
|
|
||||||
void NoDelete(void*) {}
|
void NoDelete(void*) {}
|
||||||
|
|
||||||
static std::unique_ptr<CPUAllocator> g_cpu_allocator(new DefaultCPUAllocator());
|
static std::unique_ptr<at::Allocator> g_cpu_allocator(
|
||||||
CPUAllocator* GetCPUAllocator() {
|
new DefaultCPUAllocator());
|
||||||
|
at::Allocator* GetCPUAllocator() {
|
||||||
return g_cpu_allocator.get();
|
return g_cpu_allocator.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetCPUAllocator(CPUAllocator* alloc) {
|
void SetCPUAllocator(at::Allocator* alloc) {
|
||||||
g_cpu_allocator.reset(alloc);
|
g_cpu_allocator.reset(alloc);
|
||||||
}
|
}
|
||||||
|
|
||||||
MemoryAllocationReporter CPUStaticContext::reporter_;
|
MemoryAllocationReporter DefaultCPUAllocator::reporter_;
|
||||||
|
|
||||||
void MemoryAllocationReporter::New(void* ptr, size_t nbytes) {
|
void MemoryAllocationReporter::New(void* ptr, size_t nbytes) {
|
||||||
std::lock_guard<std::mutex> guard(mutex_);
|
std::lock_guard<std::mutex> guard(mutex_);
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
|
#include <ATen/core/Allocator.h>
|
||||||
#include "caffe2/core/logging.h"
|
#include "caffe2/core/logging.h"
|
||||||
#include "caffe2/core/numa.h"
|
#include "caffe2/core/numa.h"
|
||||||
|
|
||||||
@ -42,10 +43,10 @@ class CAFFE2_API MemoryAllocationReporter {
|
|||||||
size_t allocated_;
|
size_t allocated_;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct CAFFE2_API DefaultCPUAllocator final : CPUAllocator {
|
struct CAFFE2_API DefaultCPUAllocator final : at::Allocator {
|
||||||
DefaultCPUAllocator() {}
|
DefaultCPUAllocator() {}
|
||||||
~DefaultCPUAllocator() override {}
|
~DefaultCPUAllocator() override {}
|
||||||
std::pair<void*, MemoryDeleter> New(size_t nbytes) override {
|
at::DataPtr allocate(size_t nbytes) const override {
|
||||||
void* data = nullptr;
|
void* data = nullptr;
|
||||||
#ifdef __ANDROID__
|
#ifdef __ANDROID__
|
||||||
data = memalign(gCaffe2Alignment, nbytes);
|
data = memalign(gCaffe2Alignment, nbytes);
|
||||||
@ -60,7 +61,11 @@ struct CAFFE2_API DefaultCPUAllocator final : CPUAllocator {
|
|||||||
if (FLAGS_caffe2_cpu_allocator_do_zero_fill) {
|
if (FLAGS_caffe2_cpu_allocator_do_zero_fill) {
|
||||||
memset(data, 0, nbytes);
|
memset(data, 0, nbytes);
|
||||||
}
|
}
|
||||||
return {data, Delete};
|
if (FLAGS_caffe2_report_cpu_memory_usage) {
|
||||||
|
reporter_.New(data, nbytes);
|
||||||
|
return {data, data, &ReportAndDelete, at::Device(at::DeviceType::CPU)};
|
||||||
|
}
|
||||||
|
return {data, data, &Delete, at::Device(at::DeviceType::CPU)};
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
@ -73,16 +78,27 @@ struct CAFFE2_API DefaultCPUAllocator final : CPUAllocator {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
MemoryDeleter GetDeleter() override {
|
static void ReportAndDelete(void* ptr) {
|
||||||
return Delete;
|
reporter_.Delete(ptr);
|
||||||
|
Delete(ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
at::DeleterFnPtr raw_deleter() const override {
|
||||||
|
if (FLAGS_caffe2_report_cpu_memory_usage) {
|
||||||
|
return &ReportAndDelete;
|
||||||
|
}
|
||||||
|
return &Delete;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
static MemoryAllocationReporter reporter_;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Get the CPU Alloctor.
|
// Get the CPU Alloctor.
|
||||||
CAFFE2_API CPUAllocator* GetCPUAllocator();
|
CAFFE2_API at::Allocator* GetCPUAllocator();
|
||||||
// Sets the CPU allocator to the given allocator: the caller gives away the
|
// Sets the CPU allocator to the given allocator: the caller gives away the
|
||||||
// ownership of the pointer.
|
// ownership of the pointer.
|
||||||
CAFFE2_API void SetCPUAllocator(CPUAllocator* alloc);
|
CAFFE2_API void SetCPUAllocator(at::Allocator* alloc);
|
||||||
|
|
||||||
} // namespace caffe2
|
} // namespace caffe2
|
||||||
|
|
||||||
|
@ -13,8 +13,8 @@
|
|||||||
#include "caffe2/core/typeid.h"
|
#include "caffe2/core/typeid.h"
|
||||||
#include "caffe2/proto/caffe2_pb.h"
|
#include "caffe2/proto/caffe2_pb.h"
|
||||||
|
|
||||||
#include "ATen/core/ATenCoreTest.h"
|
#include <ATen/core/ATenCoreTest.h>
|
||||||
#include "ATen/core/ArrayRef.h"
|
#include <ATen/core/ArrayRef.h>
|
||||||
|
|
||||||
CAFFE2_DECLARE_bool(caffe2_report_cpu_memory_usage);
|
CAFFE2_DECLARE_bool(caffe2_report_cpu_memory_usage);
|
||||||
|
|
||||||
@ -85,7 +85,7 @@ class CAFFE2_API CPUContext final : public BaseContext {
|
|||||||
return *random_generator_.get();
|
return *random_generator_.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static std::pair<void*, MemoryDeleter> New(size_t nbytes) {
|
inline static at::DataPtr New(size_t nbytes) {
|
||||||
return StaticContext()->New(nbytes);
|
return StaticContext()->New(nbytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -185,13 +185,8 @@ inline void CPUContext::CopyBytes<CPUContext, CPUContext>(
|
|||||||
// TODO(jerryzh): merge CPUStaticContext with Allocator
|
// TODO(jerryzh): merge CPUStaticContext with Allocator
|
||||||
class CAFFE2_API CPUStaticContext : public BaseStaticContext {
|
class CAFFE2_API CPUStaticContext : public BaseStaticContext {
|
||||||
public:
|
public:
|
||||||
std::pair<void*, MemoryDeleter> New(size_t nbytes) const override {
|
at::DataPtr New(size_t nbytes) const override {
|
||||||
auto data_and_deleter = GetCPUAllocator()->New(nbytes);
|
return GetCPUAllocator()->allocate(nbytes);
|
||||||
if (FLAGS_caffe2_report_cpu_memory_usage) {
|
|
||||||
reporter_.New(data_and_deleter.first, nbytes);
|
|
||||||
data_and_deleter.second = ReportAndDelete;
|
|
||||||
}
|
|
||||||
return data_and_deleter;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DeviceType GetDeviceType() override {
|
DeviceType GetDeviceType() override {
|
||||||
@ -204,14 +199,6 @@ class CAFFE2_API CPUStaticContext : public BaseStaticContext {
|
|||||||
device->set_device_type(TypeToProto(GetDeviceType()));
|
device->set_device_type(TypeToProto(GetDeviceType()));
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
|
||||||
static MemoryAllocationReporter reporter_;
|
|
||||||
|
|
||||||
private:
|
|
||||||
static void ReportAndDelete(void* ptr) {
|
|
||||||
reporter_.Delete(ptr);
|
|
||||||
GetCPUAllocator()->GetDeleter()(ptr);
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace caffe2
|
} // namespace caffe2
|
||||||
|
@ -314,7 +314,8 @@ void TrackMemoryAlloc(size_t nbytes) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<void*, MemoryDeleter> CUDAStaticContext::New(size_t nbytes) const {
|
// TODO: wrap this function in DefaultCUDAAllocator
|
||||||
|
at::DataPtr CUDAStaticContext::New(size_t nbytes) const {
|
||||||
// Lock the mutex
|
// Lock the mutex
|
||||||
std::lock_guard<std::mutex> lock(CUDAContext::mutex());
|
std::lock_guard<std::mutex> lock(CUDAContext::mutex());
|
||||||
// A one-time caffe2 cuda initializer.
|
// A one-time caffe2 cuda initializer.
|
||||||
@ -331,7 +332,7 @@ std::pair<void*, MemoryDeleter> CUDAStaticContext::New(size_t nbytes) const {
|
|||||||
g_size_map[ptr] = nbytes;
|
g_size_map[ptr] = nbytes;
|
||||||
g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice();
|
g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice();
|
||||||
}
|
}
|
||||||
return {ptr, Delete};
|
return {ptr, ptr, Delete, at::Device(CUDA)};
|
||||||
case CudaMemoryPoolType::CUB:
|
case CudaMemoryPoolType::CUB:
|
||||||
CUDA_ENFORCE(g_cub_allocator->DeviceAllocate(&ptr, nbytes));
|
CUDA_ENFORCE(g_cub_allocator->DeviceAllocate(&ptr, nbytes));
|
||||||
g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice();
|
g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice();
|
||||||
@ -340,16 +341,16 @@ std::pair<void*, MemoryDeleter> CUDAStaticContext::New(size_t nbytes) const {
|
|||||||
if (FLAGS_caffe2_gpu_memory_tracking) {
|
if (FLAGS_caffe2_gpu_memory_tracking) {
|
||||||
g_size_map[ptr] = nbytes;
|
g_size_map[ptr] = nbytes;
|
||||||
}
|
}
|
||||||
return {ptr, Delete};
|
return {ptr, ptr, Delete, at::Device(CUDA)};
|
||||||
case CudaMemoryPoolType::THC:
|
case CudaMemoryPoolType::THC:
|
||||||
CUDA_ENFORCE(g_thc_allocator->Alloc(&ptr, nbytes, 0 /* stream */));
|
CUDA_ENFORCE(g_thc_allocator->Alloc(&ptr, nbytes, 0 /* stream */));
|
||||||
if (FLAGS_caffe2_gpu_memory_tracking) {
|
if (FLAGS_caffe2_gpu_memory_tracking) {
|
||||||
g_size_map[ptr] = nbytes;
|
g_size_map[ptr] = nbytes;
|
||||||
g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice();
|
g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice();
|
||||||
}
|
}
|
||||||
return {ptr, Delete};
|
return {ptr, ptr, Delete, at::Device(CUDA)};
|
||||||
}
|
}
|
||||||
return {nullptr, Delete};
|
return {nullptr, nullptr, Delete, at::Device(CUDA)};
|
||||||
}
|
}
|
||||||
|
|
||||||
void CUDAStaticContext::Delete(void* ptr) {
|
void CUDAStaticContext::Delete(void* ptr) {
|
||||||
|
@ -223,7 +223,7 @@ class CAFFE2_CUDA_API CUDAContext final : public BaseContext {
|
|||||||
return curand_generator_;
|
return curand_generator_;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static std::pair<void*, MemoryDeleter> New(size_t nbytes) {
|
inline static at::DataPtr New(size_t nbytes) {
|
||||||
return StaticContext()->New(nbytes);
|
return StaticContext()->New(nbytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -334,26 +334,28 @@ inline void CPUContext::CopyBytes<CPUContext, CUDAContext>(
|
|||||||
* GPU present during runtime, at global initialization time we will set
|
* GPU present during runtime, at global initialization time we will set
|
||||||
* the CPU memory allocator to allocate pinned memory.
|
* the CPU memory allocator to allocate pinned memory.
|
||||||
*/
|
*/
|
||||||
struct CAFFE2_CUDA_API PinnedCPUAllocator final : CPUAllocator {
|
struct CAFFE2_CUDA_API PinnedCPUAllocator final : public at::Allocator {
|
||||||
PinnedCPUAllocator() {}
|
PinnedCPUAllocator() {}
|
||||||
~PinnedCPUAllocator() override {}
|
~PinnedCPUAllocator() override {}
|
||||||
std::pair<void*, MemoryDeleter> New(size_t nbytes) override {
|
at::DataPtr allocate(size_t nbytes) const override {
|
||||||
void* data;
|
void* data;
|
||||||
|
at::DataPtr data_ptr;
|
||||||
std::lock_guard<std::mutex> lock(CUDAContext::mutex());
|
std::lock_guard<std::mutex> lock(CUDAContext::mutex());
|
||||||
if (IsNUMAEnabled()) {
|
if (IsNUMAEnabled()) {
|
||||||
auto ptr_and_deleter = baseAllocator_.New(nbytes);
|
data_ptr = baseAllocator_.allocate(nbytes);
|
||||||
data = ptr_and_deleter.first;
|
data = data_ptr.get();
|
||||||
CAFFE_ENFORCE(data);
|
CAFFE_ENFORCE(data);
|
||||||
CUDA_ENFORCE(cudaHostRegister(data, nbytes, cudaHostRegisterDefault));
|
CUDA_ENFORCE(cudaHostRegister(data, nbytes, cudaHostRegisterDefault));
|
||||||
} else {
|
} else {
|
||||||
CUDA_ENFORCE(cudaMallocHost(&data, nbytes));
|
CUDA_ENFORCE(cudaMallocHost(&data, nbytes));
|
||||||
|
data_ptr = {data, data, &Delete, at::Device(CPU)};
|
||||||
}
|
}
|
||||||
memset(data, 0, nbytes);
|
memset(data, 0, nbytes);
|
||||||
return {data, Delete};
|
return data_ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
MemoryDeleter GetDeleter() override {
|
at::DeleterFnPtr raw_deleter() const override {
|
||||||
return Delete;
|
return &Delete;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -385,13 +387,14 @@ struct CAFFE2_CUDA_API PinnedCPUAllocator final : CPUAllocator {
|
|||||||
|
|
||||||
class CAFFE2_CUDA_API CUDAStaticContext final : public BaseStaticContext {
|
class CAFFE2_CUDA_API CUDAStaticContext final : public BaseStaticContext {
|
||||||
public:
|
public:
|
||||||
std::pair<void*, MemoryDeleter> New(size_t nbytes) const override;
|
at::DataPtr New(size_t nbytes) const override;
|
||||||
|
|
||||||
DeviceType GetDeviceType() override {
|
DeviceType GetDeviceType() override {
|
||||||
return CUDA;
|
return CUDA;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ExtractDeviceOption(DeviceOption* device, const void* data) override {
|
void ExtractDeviceOption(DeviceOption* device, const void* data) override {
|
||||||
|
CAFFE_ENFORCE(data, "data cannot be nullptr");
|
||||||
device->set_device_type(TypeToProto(GetDeviceType()));
|
device->set_device_type(TypeToProto(GetDeviceType()));
|
||||||
device->set_cuda_gpu_id(GetGPUIDForPointer(data));
|
device->set_cuda_gpu_id(GetGPUIDForPointer(data));
|
||||||
}
|
}
|
||||||
|
@ -11,12 +11,6 @@ CAFFE2_DECLARE_bool(caffe2_cuda_full_device_control);
|
|||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
|
|
||||||
namespace {
|
|
||||||
std::shared_ptr<void> shared_from_new(std::pair<void*, MemoryDeleter>&& p) {
|
|
||||||
return std::shared_ptr<void>(p.first, std::move(p.second));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(CUDATest, HasCudaRuntime) {
|
TEST(CUDATest, HasCudaRuntime) {
|
||||||
EXPECT_TRUE(HasCudaRuntime());
|
EXPECT_TRUE(HasCudaRuntime());
|
||||||
}
|
}
|
||||||
@ -25,7 +19,7 @@ TEST(CUDAContextTest, TestAllocDealloc) {
|
|||||||
if (!HasCudaGPU()) return;
|
if (!HasCudaGPU()) return;
|
||||||
CUDAContext context(0);
|
CUDAContext context(0);
|
||||||
context.SwitchToDevice();
|
context.SwitchToDevice();
|
||||||
auto data = shared_from_new(CUDAContext::New(10 * sizeof(float)));
|
auto data = CUDAContext::New(10 * sizeof(float));
|
||||||
EXPECT_NE(data.get(), nullptr);
|
EXPECT_NE(data.get(), nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -66,20 +60,20 @@ TEST(CUDAContextTest, MemoryPoolAllocateDealloc) {
|
|||||||
for (int i = 0; i < NumCudaDevices(); ++i) {
|
for (int i = 0; i < NumCudaDevices(); ++i) {
|
||||||
LOG(INFO) << "Device " << i << " of " << NumCudaDevices();
|
LOG(INFO) << "Device " << i << " of " << NumCudaDevices();
|
||||||
DeviceGuard guard(i);
|
DeviceGuard guard(i);
|
||||||
auto allocated = shared_from_new(CUDAContext::New(nbytes));
|
auto allocated = CUDAContext::New(nbytes);
|
||||||
EXPECT_NE(allocated, nullptr);
|
EXPECT_NE(allocated, nullptr);
|
||||||
cudaPointerAttributes attr;
|
cudaPointerAttributes attr;
|
||||||
CUDA_ENFORCE(cudaPointerGetAttributes(&attr, allocated.get()));
|
CUDA_ENFORCE(cudaPointerGetAttributes(&attr, allocated.get()));
|
||||||
EXPECT_EQ(attr.memoryType, cudaMemoryTypeDevice);
|
EXPECT_EQ(attr.memoryType, cudaMemoryTypeDevice);
|
||||||
EXPECT_EQ(attr.device, i);
|
EXPECT_EQ(attr.device, i);
|
||||||
void* prev_allocated = allocated.get();
|
void* prev_allocated = allocated.get();
|
||||||
allocated.reset();
|
allocated.clear();
|
||||||
auto new_allocated = shared_from_new(CUDAContext::New(nbytes));
|
auto new_allocated = CUDAContext::New(nbytes);
|
||||||
// With a pool, the above allocation should yield the same address.
|
// With a pool, the above allocation should yield the same address.
|
||||||
EXPECT_EQ(new_allocated.get(), prev_allocated);
|
EXPECT_EQ(new_allocated.get(), prev_allocated);
|
||||||
// But, if we are allocating something larger, we will have a different
|
// But, if we are allocating something larger, we will have a different
|
||||||
// chunk of memory.
|
// chunk of memory.
|
||||||
auto larger_allocated = shared_from_new(CUDAContext::New(nbytes * 2));
|
auto larger_allocated = CUDAContext::New(nbytes * 2);
|
||||||
EXPECT_NE(larger_allocated.get(), prev_allocated);
|
EXPECT_NE(larger_allocated.get(), prev_allocated);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -14,17 +14,17 @@ TEST(CPUContextTest, ATenCoreTest) {
|
|||||||
TEST(CPUContextTest, TestAllocAlignment) {
|
TEST(CPUContextTest, TestAllocAlignment) {
|
||||||
for (int i = 1; i < 10; ++i) {
|
for (int i = 1; i < 10; ++i) {
|
||||||
auto data = CPUContext::New(i);
|
auto data = CPUContext::New(i);
|
||||||
EXPECT_EQ((reinterpret_cast<size_t>(data.first) % gCaffe2Alignment), 0);
|
EXPECT_EQ((reinterpret_cast<size_t>(data.get()) % gCaffe2Alignment), 0);
|
||||||
data.second(data.first);
|
// data is freed when out of scope
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(CPUContextTest, TestAllocDealloc) {
|
TEST(CPUContextTest, TestAllocDealloc) {
|
||||||
auto data_and_deleter = CPUContext::New(10 * sizeof(float));
|
auto data_ptr = CPUContext::New(10 * sizeof(float));
|
||||||
float* data = static_cast<float*>(data_and_deleter.first);
|
float* data = static_cast<float*>(data_ptr.get());
|
||||||
EXPECT_NE(data, nullptr);
|
EXPECT_NE(data, nullptr);
|
||||||
auto dst_data_and_deleter = CPUContext::New(10 * sizeof(float));
|
auto dst_data_ptr = CPUContext::New(10 * sizeof(float));
|
||||||
float* dst_data = static_cast<float*>(dst_data_and_deleter.first);
|
float* dst_data = static_cast<float*>(dst_data_ptr.get());
|
||||||
EXPECT_NE(dst_data, nullptr);
|
EXPECT_NE(dst_data, nullptr);
|
||||||
for (int i = 0; i < 10; ++i) {
|
for (int i = 0; i < 10; ++i) {
|
||||||
data[i] = i;
|
data[i] = i;
|
||||||
@ -35,8 +35,7 @@ TEST(CPUContextTest, TestAllocDealloc) {
|
|||||||
for (int i = 0; i < 10; ++i) {
|
for (int i = 0; i < 10; ++i) {
|
||||||
EXPECT_FLOAT_EQ(dst_data[i], i);
|
EXPECT_FLOAT_EQ(dst_data[i], i);
|
||||||
}
|
}
|
||||||
data_and_deleter.second(data);
|
// data_ptr is freed when out of scope
|
||||||
dst_data_and_deleter.second(dst_data);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace caffe2
|
} // namespace caffe2
|
||||||
|
@ -24,8 +24,7 @@ struct CuDNNWorkspace {
|
|||||||
void* get(size_t nbytes) {
|
void* get(size_t nbytes) {
|
||||||
if (nbytes_ < nbytes) {
|
if (nbytes_ < nbytes) {
|
||||||
reset();
|
reset();
|
||||||
auto data_and_deleter = CUDAContext::New(nbytes);
|
data_ = CUDAContext::New(nbytes);
|
||||||
data_ = {data_and_deleter.first, data_and_deleter.second};
|
|
||||||
nbytes_ = nbytes;
|
nbytes_ = nbytes;
|
||||||
}
|
}
|
||||||
CAFFE_ENFORCE_GE(nbytes_, nbytes);
|
CAFFE_ENFORCE_GE(nbytes_, nbytes);
|
||||||
@ -33,12 +32,12 @@ struct CuDNNWorkspace {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void reset() {
|
void reset() {
|
||||||
data_ = nullptr;
|
data_.clear();
|
||||||
nbytes_ = 0;
|
nbytes_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::unique_ptr<void, MemoryDeleter> data_{nullptr, NoDelete};
|
at::DataPtr data_{nullptr, nullptr, &NoDelete, at::Device(CUDA)};
|
||||||
size_t nbytes_{0};
|
size_t nbytes_{0};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -326,7 +326,7 @@ void TrackMemoryAlloc(size_t nbytes)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<void*, MemoryDeleter> HIPStaticContext::New(size_t nbytes) const {
|
at::DataPtr HIPStaticContext::New(size_t nbytes) const {
|
||||||
// Lock the mutex
|
// Lock the mutex
|
||||||
std::lock_guard<std::mutex> lock(HIPContext::mutex());
|
std::lock_guard<std::mutex> lock(HIPContext::mutex());
|
||||||
// A one-time caffe2 cuda initializer.
|
// A one-time caffe2 cuda initializer.
|
||||||
@ -344,7 +344,7 @@ std::pair<void*, MemoryDeleter> HIPStaticContext::New(size_t nbytes) const {
|
|||||||
g_size_map[ptr] = nbytes;
|
g_size_map[ptr] = nbytes;
|
||||||
g_hip_device_affiliation[ptr] = CaffeHipGetDevice();
|
g_hip_device_affiliation[ptr] = CaffeHipGetDevice();
|
||||||
}
|
}
|
||||||
return {ptr, Delete};
|
return {ptr, ptr, &Delete, at::Device(HIP)};
|
||||||
case HipMemoryPoolType::CUB:
|
case HipMemoryPoolType::CUB:
|
||||||
HIP_ENFORCE(g_cub_allocator->DeviceAllocate(&ptr, nbytes));
|
HIP_ENFORCE(g_cub_allocator->DeviceAllocate(&ptr, nbytes));
|
||||||
g_hip_device_affiliation[ptr] = CaffeHipGetDevice();
|
g_hip_device_affiliation[ptr] = CaffeHipGetDevice();
|
||||||
@ -353,7 +353,7 @@ std::pair<void*, MemoryDeleter> HIPStaticContext::New(size_t nbytes) const {
|
|||||||
{
|
{
|
||||||
g_size_map[ptr] = nbytes;
|
g_size_map[ptr] = nbytes;
|
||||||
}
|
}
|
||||||
return {ptr, Delete};
|
return {ptr, ptr, &Delete, at::Device(HIP)};
|
||||||
case HipMemoryPoolType::THC:
|
case HipMemoryPoolType::THC:
|
||||||
HIP_ENFORCE(g_thc_allocator->Alloc(&ptr, nbytes, 0 /* stream */));
|
HIP_ENFORCE(g_thc_allocator->Alloc(&ptr, nbytes, 0 /* stream */));
|
||||||
if (FLAGS_caffe2_gpu_memory_tracking)
|
if (FLAGS_caffe2_gpu_memory_tracking)
|
||||||
@ -361,9 +361,9 @@ std::pair<void*, MemoryDeleter> HIPStaticContext::New(size_t nbytes) const {
|
|||||||
g_size_map[ptr] = nbytes;
|
g_size_map[ptr] = nbytes;
|
||||||
g_hip_device_affiliation[ptr] = CaffeHipGetDevice();
|
g_hip_device_affiliation[ptr] = CaffeHipGetDevice();
|
||||||
}
|
}
|
||||||
return {ptr, Delete};
|
return {ptr, ptr, &Delete, at::Device(HIP)};
|
||||||
}
|
}
|
||||||
return {nullptr, Delete};
|
return {nullptr, nullptr, &Delete, at::Device(HIP)};
|
||||||
}
|
}
|
||||||
|
|
||||||
void HIPStaticContext::Delete(void* ptr) {
|
void HIPStaticContext::Delete(void* ptr) {
|
||||||
|
@ -206,7 +206,7 @@ class HIPContext final : public BaseContext {
|
|||||||
return hiprand_generator_;
|
return hiprand_generator_;
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::pair<void*, MemoryDeleter> New(size_t nbytes) {
|
static at::DataPtr New(size_t nbytes) {
|
||||||
return StaticContext()->New(nbytes);
|
return StaticContext()->New(nbytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -323,26 +323,28 @@ inline void CPUContext::CopyBytes<CPUContext, HIPContext>(
|
|||||||
* GPU present during runtime, at global initialization time we will set
|
* GPU present during runtime, at global initialization time we will set
|
||||||
* the CPU memory allocator to allocate pinned memory.
|
* the CPU memory allocator to allocate pinned memory.
|
||||||
*/
|
*/
|
||||||
struct PinnedCPUAllocator final : CPUAllocator {
|
struct PinnedCPUAllocator final : public at::Allocator {
|
||||||
PinnedCPUAllocator() {}
|
PinnedCPUAllocator() {}
|
||||||
~PinnedCPUAllocator() override {}
|
~PinnedCPUAllocator() override {}
|
||||||
std::pair<void*, MemoryDeleter> New(size_t nbytes) override {
|
at::DataPtr allocate(size_t nbytes) const override {
|
||||||
void* data;
|
void* data;
|
||||||
|
at::DataPtr data_ptr;
|
||||||
std::lock_guard<std::mutex> lock(HIPContext::mutex());
|
std::lock_guard<std::mutex> lock(HIPContext::mutex());
|
||||||
if (IsNUMAEnabled()) {
|
if (IsNUMAEnabled()) {
|
||||||
auto ptr_and_deleter = baseAllocator_.New(nbytes);
|
data_ptr = baseAllocator_.allocate(nbytes);
|
||||||
data = ptr_and_deleter.first;
|
data = data_ptr.get();
|
||||||
CAFFE_ENFORCE(data);
|
CAFFE_ENFORCE(data);
|
||||||
HIP_ENFORCE(hipHostRegister(data, nbytes, hipHostRegisterDefault));
|
HIP_ENFORCE(hipHostRegister(data, nbytes, hipHostRegisterDefault));
|
||||||
} else {
|
} else {
|
||||||
HIP_ENFORCE(hipHostMalloc(&data, nbytes));
|
HIP_ENFORCE(hipHostMalloc(&data, nbytes));
|
||||||
|
data_ptr = {data, data, &Delete, at::Device(CPU)};
|
||||||
}
|
}
|
||||||
memset(data, 0, nbytes);
|
memset(data, 0, nbytes);
|
||||||
return {data, Delete};
|
return data_ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
MemoryDeleter GetDeleter() override {
|
at::DeleterFnPtr raw_deleter() const override {
|
||||||
return Delete;
|
return &Delete;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -374,7 +376,7 @@ struct PinnedCPUAllocator final : CPUAllocator {
|
|||||||
|
|
||||||
class HIPStaticContext final : public BaseStaticContext {
|
class HIPStaticContext final : public BaseStaticContext {
|
||||||
public:
|
public:
|
||||||
std::pair<void*, MemoryDeleter> New(size_t nbytes) const override;
|
at::DataPtr New(size_t nbytes) const override;
|
||||||
|
|
||||||
DeviceType GetDeviceType() override {
|
DeviceType GetDeviceType() override {
|
||||||
return HIP;
|
return HIP;
|
||||||
|
@ -26,8 +26,7 @@ struct MIOPENWorkspace
|
|||||||
if(nbytes_ < nbytes)
|
if(nbytes_ < nbytes)
|
||||||
{
|
{
|
||||||
reset();
|
reset();
|
||||||
auto data_and_deleter = HIPContext::New(nbytes);
|
data_ = HIPContext::New(nbytes);
|
||||||
data_ = {data_and_deleter.first, data_and_deleter.second};
|
|
||||||
nbytes_ = nbytes;
|
nbytes_ = nbytes;
|
||||||
}
|
}
|
||||||
CAFFE_ENFORCE_GE(nbytes_, nbytes);
|
CAFFE_ENFORCE_GE(nbytes_, nbytes);
|
||||||
@ -36,12 +35,12 @@ struct MIOPENWorkspace
|
|||||||
|
|
||||||
void reset()
|
void reset()
|
||||||
{
|
{
|
||||||
data_ = nullptr;
|
data_.clear();
|
||||||
nbytes_ = 0;
|
nbytes_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::unique_ptr<void, MemoryDeleter> data_{nullptr, NoDelete};
|
at::DataPtr data_;
|
||||||
size_t nbytes_{0};
|
size_t nbytes_{0};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -59,7 +59,7 @@ class C10_EXPORT QTensor {
|
|||||||
size_t source_size = std::accumulate(
|
size_t source_size = std::accumulate(
|
||||||
dim_source.begin(), dim_source.end(), 1, std::multiplies<int>());
|
dim_source.begin(), dim_source.end(), 1, std::multiplies<int>());
|
||||||
if ((source_size * (precision_ + signed_)) > capacity_) {
|
if ((source_size * (precision_ + signed_)) > capacity_) {
|
||||||
data_.reset();
|
data_ptr_.clear();
|
||||||
capacity_ = 0;
|
capacity_ = 0;
|
||||||
}
|
}
|
||||||
dims_ = dim_source;
|
dims_ = dim_source;
|
||||||
@ -104,12 +104,12 @@ class C10_EXPORT QTensor {
|
|||||||
|
|
||||||
void SetPrecision(const unsigned char precision) {
|
void SetPrecision(const unsigned char precision) {
|
||||||
precision_ = precision;
|
precision_ = precision;
|
||||||
data_.reset();
|
data_ptr_.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetSigned(const bool make_signed = true) {
|
void SetSigned(const bool make_signed = true) {
|
||||||
signed_ = make_signed;
|
signed_ = make_signed;
|
||||||
data_.reset();
|
data_ptr_.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetScale(const double scale) {
|
void SetScale(const double scale) {
|
||||||
@ -121,19 +121,16 @@ class C10_EXPORT QTensor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
unsigned char* mutable_data() {
|
unsigned char* mutable_data() {
|
||||||
if (!data_) {
|
if (!data_ptr_) {
|
||||||
auto ptr_and_deleter = Context::New(nbytes());
|
data_ptr_ = Context::New(nbytes());
|
||||||
data_.reset(
|
|
||||||
static_cast<unsigned char*>(ptr_and_deleter.first),
|
|
||||||
ptr_and_deleter.second);
|
|
||||||
capacity_ = nbytes() * CHAR_BIT;
|
capacity_ = nbytes() * CHAR_BIT;
|
||||||
}
|
}
|
||||||
CAFFE_ENFORCE(capacity_ == nbytes() * CHAR_BIT);
|
CAFFE_ENFORCE(capacity_ == nbytes() * CHAR_BIT);
|
||||||
return data_.get();
|
return static_cast<unsigned char*>(data_ptr_.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
inline const unsigned char* data() const {
|
inline const unsigned char* data() const {
|
||||||
return data_.get();
|
return static_cast<unsigned char*>(data_ptr_.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
inline size_t size() const {
|
inline size_t size() const {
|
||||||
@ -242,7 +239,7 @@ class C10_EXPORT QTensor {
|
|||||||
unsigned char alignment_ = CHAR_BIT;
|
unsigned char alignment_ = CHAR_BIT;
|
||||||
|
|
||||||
// Allocated data.
|
// Allocated data.
|
||||||
std::shared_ptr<unsigned char> data_;
|
at::DataPtr data_ptr_;
|
||||||
|
|
||||||
// value = scale_ * (x + bias_)
|
// value = scale_ * (x + bias_)
|
||||||
double scale_;
|
double scale_;
|
||||||
|
@ -55,7 +55,7 @@ class IDEEPContext final : public BaseContext {
|
|||||||
return *random_generator_.get();
|
return *random_generator_.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static std::pair<void*, MemoryDeleter> New(size_t nbytes) {
|
inline static at::DataPtr New(size_t nbytes) {
|
||||||
return StaticContext()->New(nbytes);
|
return StaticContext()->New(nbytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -176,8 +176,8 @@ inline void IDEEPContext::CopyBytes<IDEEPContext, CPUContext>(
|
|||||||
|
|
||||||
class IDEEPStaticContext : public BaseStaticContext {
|
class IDEEPStaticContext : public BaseStaticContext {
|
||||||
public:
|
public:
|
||||||
inline std::pair<void*, MemoryDeleter> New(size_t nbytes) const override {
|
inline at::DataPtr New(size_t nbytes) const override {
|
||||||
return GetCPUAllocator()->New(nbytes);
|
return GetCPUAllocator()->allocate(nbytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
DeviceType GetDeviceType() override {
|
DeviceType GetDeviceType() override {
|
||||||
|
@ -62,7 +62,7 @@ class MKLContext : public BaseContext {
|
|||||||
return *random_generator_.get();
|
return *random_generator_.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static std::pair<void*, MemoryDeleter> New(size_t nbytes) {
|
inline static at::DataPtr New(size_t nbytes) {
|
||||||
return StaticContext()->New(nbytes);
|
return StaticContext()->New(nbytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -153,8 +153,8 @@ inline void MKLContext::CopyBytes<MKLContext, MKLContext>(
|
|||||||
|
|
||||||
class MKLStaticContext : public BaseStaticContext {
|
class MKLStaticContext : public BaseStaticContext {
|
||||||
public:
|
public:
|
||||||
inline std::pair<void*, MemoryDeleter> New(size_t nbytes) const override {
|
inline at::DataPtr New(size_t nbytes) const override {
|
||||||
return GetCPUAllocator()->New(nbytes);
|
return GetCPUAllocator()->allocate(nbytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
DeviceType GetDeviceType() override {
|
DeviceType GetDeviceType() override {
|
||||||
|
Reference in New Issue
Block a user