[CUDACachingAllocator] Turn Allocator::allocate into non-const (#120969)

Ideally, the method should be non-const since it changes the allocator state. Some const_casts are also removed in the way.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/120969
Approved by: https://github.com/albanD
This commit is contained in:
cyy
2024-03-05 09:53:01 +00:00
committed by PyTorch MergeBot
parent 46c9d646dd
commit 507611f9ae
18 changed files with 24 additions and 30 deletions

View File

@ -316,7 +316,7 @@ struct MetaAllocator final : public at::Allocator {
static void deleter(void* const pointer) {
TORCH_INTERNAL_ASSERT(!pointer);
}
DataPtr allocate(const size_t nbytes) const override {
DataPtr allocate(const size_t nbytes) override {
return {nullptr, nullptr, &deleter, at::Device(DeviceType::Meta)};
}
DeleterFnPtr raw_deleter() const override {

View File

@ -492,7 +492,7 @@ void CachingHostAllocator_emptyCache() {
}
struct CUDAHostAllocatorWrapper final : public at::Allocator {
at::DataPtr allocate(size_t size) const override {
at::DataPtr allocate(size_t size) override {
auto ptr_and_ctx = getCUDAHostAllocator().allocate(size);
return {
ptr_and_ctx.first,

View File

@ -15,7 +15,7 @@ class HIPAllocatorMasqueradingAsCUDA final : public Allocator {
public:
explicit HIPAllocatorMasqueradingAsCUDA(Allocator* allocator)
: allocator_(allocator) {}
DataPtr allocate(size_t size) const override {
DataPtr allocate(size_t size) override {
DataPtr r = allocator_->allocate(size);
r.unsafe_set_device(Device(c10::DeviceType::CUDA, r.device().index()));
return r;

View File

@ -748,7 +748,7 @@ struct TORCH_API MPSAllocator final : public IMPSAllocator {
return &Delete;
}
DataPtr allocate(const size_t nbytes) const override {
DataPtr allocate(const size_t nbytes) override {
__block id<MTLBuffer> buf = nbytes > 0 ? _getAllocImpl().malloc(nbytes, m_usage) : nullptr;
return {buf, buf, &Delete, at::Device(at::DeviceType::MPS, 0)};
}

View File

@ -124,7 +124,7 @@ struct ZeroTensorAllocator final : public at::Allocator {
static void deleter(void* const pointer) {
TORCH_INTERNAL_ASSERT(!pointer);
}
DataPtr allocate(const size_t /*nbytes*/) const override {
DataPtr allocate(const size_t /*nbytes*/) override {
return {nullptr, nullptr, &deleter, device_};
}
DeleterFnPtr raw_deleter() const override {

View File

@ -17,7 +17,7 @@ void* XLAMalloc(ptrdiff_t size) {
}
struct XLAAllocator final : public at::Allocator {
at::DataPtr allocate(size_t size) const override {
at::DataPtr allocate(size_t size) override {
auto* ptr = XLAMalloc(size);
return {ptr, ptr, &XLAFree, at::DeviceType::XLA};
}

View File

@ -4,7 +4,7 @@
namespace c10 {
DataPtr Allocator::clone(const void* data, std::size_t n) const {
DataPtr Allocator::clone(const void* data, std::size_t n) {
DataPtr new_data = allocate(n);
copy_data(new_data.mutable_get(), data, n);
return new_data;

View File

@ -160,7 +160,7 @@ inline bool operator!=(std::nullptr_t, const DataPtr& dp) noexcept {
struct C10_API Allocator {
virtual ~Allocator() = default;
virtual DataPtr allocate(size_t n) const = 0;
virtual DataPtr allocate(size_t n) = 0;
// Clones an allocation that came from this allocator.
//
@ -171,7 +171,7 @@ struct C10_API Allocator {
// attached to the input data.
//
// Requires: input data was allocated by the same allocator.
DataPtr clone(const void* data, std::size_t n) const;
DataPtr clone(const void* data, std::size_t n);
// Checks if DataPtr has a simple context, not wrapped with any out of the
// ordinary contexts.

View File

@ -17,7 +17,7 @@ namespace c10 {
struct C10_API DefaultCPUAllocator final : at::Allocator {
DefaultCPUAllocator() = default;
at::DataPtr allocate(size_t nbytes) const override {
at::DataPtr allocate(size_t nbytes) override {
void* data = nullptr;
try {
data = c10::alloc_cpu(nbytes);
@ -103,7 +103,7 @@ class DefaultMobileCPUAllocator final : public at::Allocator {
}
}
DataPtr allocate(const size_t nbytes) const override {
DataPtr allocate(const size_t nbytes) override {
if (C10_UNLIKELY(0u == nbytes)) {
return {
nullptr,

View File

@ -2262,7 +2262,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
storage_offset_ == 0); // because we just reallocated
return storage_.mutable_data();
}
const Allocator* allocator = storage_.allocator();
Allocator* allocator = storage_.allocator();
// Storage might have nullptr allocator in rare cases, for example, if
// an external memory segment has been wrapped with Tensor and we don't
// know how to reallocate it. However, in order to preserve legacy C2

View File

@ -3106,7 +3106,7 @@ class NativeCachingAllocator : public CUDAAllocator {
return cpd;
}
DataPtr allocate(size_t size) const override {
DataPtr allocate(size_t size) override {
constexpr size_t one_exa_bytes = 1152921504606846976ULL;
TORCH_CHECK_WITH(
OutOfMemoryError,
@ -3131,9 +3131,7 @@ class NativeCachingAllocator : public CUDAAllocator {
}
} else {
if (size != 0) {
// Allocator declars allocate const!?
const_cast<NativeCachingAllocator*>(this)->malloc(
&devPtr, device, size, stream);
this->malloc(&devPtr, device, size, stream);
}
}

View File

@ -405,7 +405,7 @@ void local_raw_delete(void* ptr);
// Same pattern as CUDACachingAllocator.cpp.
struct CudaMallocAsyncAllocator : public CUDAAllocator {
DataPtr allocate(size_t size) const override {
DataPtr allocate(size_t size) override {
constexpr size_t one_exa_bytes = 1152921504606846976ULL;
TORCH_CHECK_WITH(
OutOfMemoryError,

View File

@ -497,13 +497,11 @@ class XPUAllocator : public Allocator {
device_allocators[block->device]->recordStream(block, stream);
}
DataPtr allocate(size_t size) const override {
DataPtr allocate(size_t size) override {
auto device = c10::xpu::current_device();
void* r = nullptr;
if (size != 0) {
// Allocator declares allocate const!
const_cast<XPUAllocator*>(this)->malloc(
&r, device, size, xpu::getCurrentXPUStream(device));
this->malloc(&r, device, size, xpu::getCurrentXPUStream(device));
}
return {r, r, &local_raw_delete, Device(DeviceType::XPU, device)};
}

View File

@ -306,7 +306,7 @@ struct CAFFE2_CUDA_API PinnedCPUAllocator final : public at::Allocator {
baseAllocator_ = GetDefaultCPUAllocator();
}
~PinnedCPUAllocator() override {}
at::DataPtr allocate(size_t nbytes) const override {
at::DataPtr allocate(size_t nbytes) override {
if (nbytes == 0) {
// replicate c10::alloc_cpu behavior - return nullptr
return {nullptr, nullptr, &Delete, at::Device(CPU)};
@ -513,7 +513,7 @@ void TrackMemoryAlloc(size_t nbytes) {
struct DefaultCUDAAllocator final : public at::Allocator {
DefaultCUDAAllocator() {}
~DefaultCUDAAllocator() override {}
at::DataPtr allocate(size_t nbytes) const override {
at::DataPtr allocate(size_t nbytes) override {
// Lock the mutex
std::lock_guard<std::mutex> lock(CUDAContext::mutex());
// A one-time caffe2 cuda initializer.

View File

@ -173,7 +173,7 @@ at::Tensor& custom_abs_out(const at::Tensor& self, at::Tensor& out) {
// A dummy allocator for our custom device, that secretly uses the CPU
struct DummyCustomAllocator final : at::Allocator {
DummyCustomAllocator() = default;
at::DataPtr allocate(size_t nbytes) const override {
at::DataPtr allocate(size_t nbytes) override {
void* data = c10::alloc_cpu(nbytes);
return {data, data, &ReportAndDelete, at::Device(at::DeviceType::PrivateUse1, custom_device_index)};
}

View File

@ -66,7 +66,7 @@ at::Tensor custom_to_device(
// A dummy allocator for our custom device, that secretly uses the CPU
struct DummyCustomAllocator final : at::Allocator {
DummyCustomAllocator() = default;
at::DataPtr allocate(size_t nbytes) const override {
at::DataPtr allocate(size_t nbytes) override {
void* data = c10::alloc_cpu(nbytes);
return {data, data, &ReportAndDelete, at::Device(at::DeviceType::PrivateUse1, 0)};
}

View File

@ -94,13 +94,11 @@ void* CUDAPluggableAllocator::malloc(
return r;
}
c10::DataPtr CUDAPluggableAllocator::allocate(size_t size) const {
c10::DataPtr CUDAPluggableAllocator::allocate(size_t size) {
c10::DeviceIndex device = -1;
C10_CUDA_CHECK(c10::cuda::GetDevice(&device));
cudaStream_t stream = c10::cuda::getCurrentCUDAStream(device);
void* r =
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
const_cast<CUDAPluggableAllocator*>(this)->malloc(size, device, stream);
void* r = this->malloc(size, device, stream);
c10::DataPtr data_ptr = {
r, r, raw_deleter(), c10::Device(c10::DeviceType::CUDA, device)};
return data_ptr;

View File

@ -71,7 +71,7 @@ struct CUDAPluggableAllocator
void* malloc(size_t size, c10::DeviceIndex device, cudaStream_t stream);
c10::DataPtr allocate(size_t size) const override;
c10::DataPtr allocate(size_t size) override;
c10::DeleterFnPtr raw_deleter() const override;
void* raw_alloc(size_t nbytes) override;