mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/12714 This is a short change to enable c10 namespace in caffe2. We did not enable it before due to gflags global variable confusion, but it should have been mostly cleaned now. Right now, the plan on record is that namespace caffe2 and namespace aten will fully be supersets of namespace c10. Most of the diff is codemod, and only two places of non-codemod is in caffe2/core/common.h, where ``` using namespace c10; ``` is added, and in Flags.h, where instead of creating aliasing variables in c10 namespace, we directly put it in the global namespace to match gflags (and same behavior if gflags is not being built with). Reviewed By: dzhulgakov Differential Revision: D10390486 fbshipit-source-id: 5e2df730e28e29a052f513bddc558d9f78a23b9b
106 lines
2.8 KiB
C++
106 lines
2.8 KiB
C++
#ifndef CAFFE2_CORE_ALLOCATOR_H_
|
|
#define CAFFE2_CORE_ALLOCATOR_H_
|
|
|
|
#include <cstring>
|
|
#include <unordered_map>
|
|
|
|
#include <ATen/core/Allocator.h>
|
|
#include "caffe2/core/logging.h"
|
|
#include "caffe2/core/numa.h"
|
|
|
|
C10_DECLARE_bool(caffe2_report_cpu_memory_usage);
|
|
C10_DECLARE_bool(caffe2_cpu_allocator_do_zero_fill);
|
|
|
|
namespace caffe2 {
|
|
|
|
// Use 64-byte alignment should be enough for computation up to AVX512.
|
|
constexpr size_t gCaffe2Alignment = 64;
|
|
|
|
using MemoryDeleter = void (*)(void*);
|
|
|
|
// A helper function that is basically doing nothing.
|
|
CAFFE2_API void NoDelete(void*);
|
|
|
|
// A virtual allocator class to do memory allocation and deallocation.
|
|
struct CAFFE2_API CPUAllocator {
|
|
CPUAllocator() {}
|
|
virtual ~CPUAllocator() noexcept {}
|
|
virtual std::pair<void*, MemoryDeleter> New(size_t nbytes) = 0;
|
|
virtual MemoryDeleter GetDeleter() = 0;
|
|
};
|
|
|
|
// A virtual struct that is used to report Caffe2's memory allocation and
|
|
// deallocation status
|
|
class CAFFE2_API MemoryAllocationReporter {
|
|
public:
|
|
MemoryAllocationReporter() : allocated_(0) {}
|
|
void New(void* ptr, size_t nbytes);
|
|
void Delete(void* ptr);
|
|
|
|
private:
|
|
std::mutex mutex_;
|
|
std::unordered_map<void*, size_t> size_table_;
|
|
size_t allocated_;
|
|
};
|
|
|
|
struct CAFFE2_API DefaultCPUAllocator final : at::Allocator {
|
|
DefaultCPUAllocator() {}
|
|
~DefaultCPUAllocator() override {}
|
|
at::DataPtr allocate(size_t nbytes) const override {
|
|
void* data = nullptr;
|
|
#ifdef __ANDROID__
|
|
data = memalign(gCaffe2Alignment, nbytes);
|
|
#elif defined(_MSC_VER)
|
|
data = _aligned_malloc(nbytes, gCaffe2Alignment);
|
|
#else
|
|
CAFFE_ENFORCE_EQ(posix_memalign(&data, gCaffe2Alignment, nbytes), 0);
|
|
#endif
|
|
CAFFE_ENFORCE(data);
|
|
// move data to a thread's NUMA node
|
|
NUMAMove(data, nbytes, GetCurrentNUMANode());
|
|
if (FLAGS_caffe2_cpu_allocator_do_zero_fill) {
|
|
memset(data, 0, nbytes);
|
|
}
|
|
if (FLAGS_caffe2_report_cpu_memory_usage) {
|
|
reporter_.New(data, nbytes);
|
|
return {data, data, &ReportAndDelete, at::Device(at::DeviceType::CPU)};
|
|
}
|
|
return {data, data, &Delete, at::Device(at::DeviceType::CPU)};
|
|
}
|
|
|
|
#ifdef _MSC_VER
|
|
static void Delete(void* data) {
|
|
_aligned_free(data);
|
|
}
|
|
#else
|
|
static void Delete(void* data) {
|
|
free(data);
|
|
}
|
|
#endif
|
|
|
|
static void ReportAndDelete(void* ptr) {
|
|
reporter_.Delete(ptr);
|
|
Delete(ptr);
|
|
}
|
|
|
|
at::DeleterFnPtr raw_deleter() const override {
|
|
if (FLAGS_caffe2_report_cpu_memory_usage) {
|
|
return &ReportAndDelete;
|
|
}
|
|
return &Delete;
|
|
}
|
|
|
|
protected:
|
|
static MemoryAllocationReporter reporter_;
|
|
};
|
|
|
|
// Get the CPU Alloctor.
|
|
CAFFE2_API at::Allocator* GetCPUAllocator();
|
|
// Sets the CPU allocator to the given allocator: the caller gives away the
|
|
// ownership of the pointer.
|
|
CAFFE2_API void SetCPUAllocator(at::Allocator* alloc);
|
|
|
|
} // namespace caffe2
|
|
|
|
#endif // CAFFE2_CORE_ALLOCATOR_H_
|